三种实现逻辑回归算法的代码

编程入门行业动态更新时间:2024-10-05 21:23:36

三种实现逻辑回归算法的代码

了解逻辑回归是解决二分类问题

需要了解的数学知识，如何用逻辑回归算法来解决分类问题。视频：=1

数据集：iris.data

5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica

方式一：

#!/usr/bin/python
# -*- coding:utf-8 -*-
#鸢尾花分类
#1.首先导入包：
import numpy as np #机器学习基础包
from sklearn.linear_model import LogisticRegression #逻辑回归算法库
import matplotlib.pyplot as plt #绘图工具包
import matplotlib as mpl #绘图地图包
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import datasets #sklearn官方提供的数据集def iris_type(s):it = {b'Iris-setosa': 0, b'Iris-versicolor': 1, b'Iris-virginica': 2}return it[s]#2.数据预处理，得到x,y
if __name__ == "__main__":path = u'iris.data'  # 数据文件路径# # 路径，浮点型数据，逗号分隔，第4列使用函数iris_type单独处理#data = np.loadtxt(path, dtype=float, delimiter=',', converters={4: iris_type})# print (data)# 将数据的0到3列组成x，第4列得到y#x, y = np.split(data, (4,), axis=1) #axis=1，默认为0，横向切分；为1时，纵向切分。# 为了可视化，仅使用前两列特征#x = x[:, :2]# 用sklearn的数据集iris = datasets.load_iris()x = iris.data[:, :2]  # we only take the first two features.y = iris.targetfrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(x, y, random_state=666)
#3.用pipline建立模型#StandardScaler()作用：去均值和方差归一化。且是针对每一个特征维度来做的，而不是针对样本。 StandardScaler对每列分别标准化。#PolynomialFeatures(degree=1)：进行特征的构造。它是使用多项式的方法来进行的，如果有a，b两个特征，那么它的2次多项式为（1,a,b,a^2,ab, b^2）。PolynomialFeatures有三个参数：#1.degree：控制多项式的度#2.interaction_only： 默认为False，如果指定为True，那么就不会有特征自己和自己结合的项，上面的二次项中没有a2和b2。#3.include_bias：默认为True。如果为True的话，那么就会有上面的 1那一项。#LogisticRegression（）建立逻辑回归模型lr = Pipeline([('sc', StandardScaler()),('clf', LogisticRegression(multi_class="multinomial",solver="newton-cg")) ])lr.fit(X_train,y_train) #ravel将多维数组降位一维,y轴是标签只有一维#4.画图准备N, M = 500, 500  # 横纵各采样多少个值x1_min, x1_max = x[:, 0].min(), x[:, 0].max()  # 第0列的范围x2_min, x2_max = x[:, 1].min(), x[:, 1].max()  # 第1列的范围t1 = np.linspace(x1_min, x1_max, N)t2 = np.linspace(x2_min, x2_max, M)x1, x2 = np.meshgrid(t1, t2)  # 生成网格采样点x_test = np.stack((x1.flat, x2.flat), axis=1)  # 测试点#5.开始画图cm_light = mpl.colors.ListedColormap(['#77E0A0', '#FF8080', '#A0A0FF'])cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])y_hat = lr.predict(x_test)  # 预测值y_hat = y_hat.reshape(x1.shape)  # 使之与输入的形状相同# print(y_hat)plt.pcolormesh(x1, x2, y_hat,shading='auto', cmap=cm_light)  # 预测值的显示 其实就是背景plt.scatter(x[:, 0], x[:, 1], c=y.ravel(), edgecolors='k', s=50, cmap=cm_dark)  # 样本的显示plt.xlabel('petal length')plt.ylabel('petal width')plt.xlim(x1_min, x1_max)plt.ylim(x2_min, x2_max)plt.grid()plt.savefig('2.png')plt.show()
#6.训练集上的预测结果y_hat = lr.predict(x) #回归的yy =y.ravel() #变一维print(y)#y = y.reshape(-1) #变一维#print(y)result = y_hat == y #回归的y和真实值y比较print(y_hat)print(result)acc = np.mean(result) #求平均数print('准确率: %.2f%%' % (100 * acc))

方式二：官方例子.html#sphx-glr-auto-examples-linear-model-plot-iris-logistic-py

print(__doc__)# Code source: Gaël Varoquaux
# Modified for documentation by Jaques Grobler
# License: BSD 3 clauseimport numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import datasets# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
Y = iris.target# Create an instance of Logistic Regression Classifier and fit the data.
logreg = LogisticRegression(C=1e5)
logreg.fit(X, Y)# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = .02  # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(4, 3))
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())plt.show()

方式三：

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressiondef plot_decision_boundary(model, axis):x0, x1 = np.meshgrid(np.linspace(axis[0], axis[1], int((axis[1] - axis[0]) * 100)).reshape(-1, 1),np.linspace(axis[2], axis[3], int((axis[3] - axis[2]) * 100)).reshape(-1, 1),)X_new = np.c_[x0.ravel(), x1.ravel()]y_predict = model.predict(X_new)zz = y_predict.reshape(x0.shape)from matplotlib.colors import ListedColormapcustom_cmap = ListedColormap(['#EF9A9A', '#FFF59D', '#90CAF9'])plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)iris = datasets.load_iris()
X = iris.data[:,:2] #取二维数据做分析
print(X)
y = iris.target
# random_state是数据随机化
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666) #把所有数据随机拆成俩个部分，（X_train,y_train)训练集;(X_test,y_testce)测试集log_reg2 = LogisticRegression() #调用LR算法函数
log_reg2.fit(X_train, y_train) #填充数据到LR函数
print('二维数据的准确率：',log_reg2.score(X_test, y_test)) #对比训练数据和测试数据计算准确率plot_decision_boundary(log_reg2, axis=[4, 8.5, 1.5, 4.5])
plt.scatter(X[y==0,0], X[y==0,1])
plt.scatter(X[y==1,0], X[y==1,1])
plt.scatter(X[y==2,0], X[y==2,1])
plt.show()X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
print('全部数据的准确率：',log_reg.score(X_test, y_test))

方式四:手写的逻辑回归算法

logistic_regression_binary.csv

-0.017612   14.053064   0
-1.395634   4.662541    1
-0.752157   6.538620    0
-1.322371   7.152853    0
0.423363    11.054677   0
0.406704    7.067335    1
0.667394    12.741452   0
-2.460150   6.866805    1
0.569411    9.548755    0
-0.026632   10.427743   0
0.850433    6.920334    1
1.347183    13.175500   0
1.176813    3.167020    1
-1.781871   9.097953    0
-0.566606   5.749003    1
0.931635    1.589505    1
-0.024205   6.151823    1
-0.036453   2.690988    1
-0.196949   0.444165    1
1.014459    5.754399    1
1.985298    3.230619    1
-1.693453   -0.557540   1
-0.576525   11.778922   0
-0.346811   -1.678730   1
-2.124484   2.672471    1
1.217916    9.597015    0
-0.733928   9.098687    0
-3.642001   -1.618087   1
0.315985    3.523953    1
1.416614    9.619232    0
-0.386323   3.989286    1
0.556921    8.294984    1
1.224863    11.587360   0
-1.347803   -2.406051   1
1.196604    4.951851    1
0.275221    9.543647    0
0.470575    9.332488    0
-1.889567   9.542662    0
-1.527893   12.150579   0
-1.185247   11.309318   0
-0.445678   3.297303    1
1.042222    6.105155    1
-0.618787   10.320986   0
1.152083    0.548467    1
0.828534    2.676045    1
-1.237728   10.549033   0
-0.683565   -2.166125   1
0.229456    5.921938    1
-0.959885   11.555336   0
0.492911    10.993324   0
0.184992    8.721488    0
-0.355715   10.325976   0
-0.397822   8.058397    0
0.824839    13.730343   0
1.507278    5.027866    1
0.099671    6.835839    1
-0.344008   10.717485   0
1.785928    7.718645    1
-0.918801   11.560217   0
-0.364009   4.747300    1
-0.841722   4.119083    1
0.490426    1.960539    1
-0.007194   9.075792    0
0.356107    12.447863   0
0.342578    12.281162   0
-0.810823   -1.466018   1
2.530777    6.476801    1
1.296683    11.607559   0
0.475487    12.040035   0
-0.783277   11.009725   0
0.074798    11.023650   0
-1.337472   0.468339    1
-0.102781   13.763651   0
-0.147324   2.874846    1
0.518389    9.887035    0
1.015399    7.571882    0
-1.658086   -0.027255   1
1.319944    2.171228    1
2.056216    5.019981    1
-0.851633   4.375691    1
-1.510047   6.061992    0
-1.076637   -3.181888   1
1.821096    10.283990   0
3.010150    8.401766    1
-1.099458   1.688274    1
-0.834872   -1.733869   1
-0.846637   3.849075    1
1.400102    12.628781   0
1.752842    5.468166    1
0.078557    0.059736    1
0.089392    -0.715300   1
1.825662    12.693808   0
0.197445    9.744638    0
0.126117    0.922311    1
-0.679797   1.220530    1
0.677983    2.556666    1
0.761349    10.693862   0
-2.168791   0.143632    1
1.388610    9.341997    0
0.317029    14.739025   0

# 这是一个手写的逻辑回归算法，解决分类问题
# 网上很多傻逼拿来卖分，还有一些傻逼复制粘贴的看不明白，本人最讨厌这种忽悠
# 分享能运行的代码和注释import numpy as np #机器算法库
import matplotlib.pyplot as plt #图形绘制库# sigmod函数，即得分函数,逻辑回归的计算公式；
# 值域在0，1之间；
# 来计算数据的概率是0还是1；得到y大于等于0.5是1，y小于等于0.5为0
def sigmod(x):return 1/(1+np.exp(-x))# 损失函数，损失函数的计算公式；
# hx是概率估计值，是sigmod(x)得来的值
# y是样本真值
def cost(hx, y):return -y * np.log(hx) - (1-y) * np.log(1-hx)# 梯度下降函数
def gradient(current_para, x, y, learning_rate):m = len(y)matrix_gradient = np.zeros(len(x[0]))for i in range(m):current_x = x[i]current_y = y[i]current_x = np.asarray(current_x)matrix_gradient += (sigmod(np.dot(current_para, current_x)) - current_y) * current_xnew_para = current_para - learning_rate * matrix_gradientreturn new_para# 误差计算函数
def error(para, x, y):total = len(y)error_num = 0for i in range(total):current_x = x[i]current_y = y[i]hx = sigmod(np.dot(para, current_x)) #LR算法if cost(hx, current_y) > 0.5: #进一步计算损失error_num += 1return error_num/total# 训练过程
def train(initial_para, x, y, learning_rate, num_iter):dataMat = np.asarray(x)labelMat = np.asarray(y)para = initial_parafor i in range(num_iter+1):para = gradient(para, dataMat, labelMat, learning_rate) #梯度下降法来if i % 100 == 0:err = error(para, dataMat, labelMat)print("iter:" + str(i) + " ; error:" + str(err))return para# 加载数据集
def load_dataset():dataMat = []labelMat = []with open("logistic_regression_binary.csv", "r+") as file_object:lines = file_object.readlines()for line in lines:line_array = line.strip().split()# 数据矩阵dataMat.append([1.0, float(line_array[0]), float(line_array[1])])# 标签矩阵labelMat.append(int(line_array[2]))return dataMat, labelMat# 绘制图形
def plotBestFit(wei, data, label):if type(wei).__name__ == 'ndarray':weights = weielse:weights = wei.getA()fig = plt.figure(0)ax = fig.add_subplot(111)xxx = np.arange(-3,3,0.1)yyy = - weights[0]/weights[2] - weights[1]/weights[2]*xxxax.plot(xxx,yyy)cord1 = []cord0 = []for i in range(len(label)):if label[i] == 1:cord1.append(data[i][1:3])else:cord0.append(data[i][1:3])cord1 = np.array(cord1)cord0 = np.array(cord0)ax.scatter(cord1[:, 0], cord1[:, 1], c='red')ax.scatter(cord0[:, 0], cord0[:, 1], c='green')plt.show()def logistic_regression():x, y = load_dataset()n = len(x[0])initial_para = np.ones(n)learning_rate = 0.001num_iter = 1000print("起始参数：")print(initial_para)para = train(initial_para, x, y, learning_rate, num_iter)print("最后训练得到的参数：")print(para)plotBestFit(para, x, y)logistic_regression()