三种实现逻辑回归算法的代码"/>
三种实现逻辑回归算法的代码
了解逻辑回归是解决二分类问题
需要了解的数学知识,如何用逻辑回归算法来解决分类问题。视频:=1
数据集:iris.data
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
方式一:
#!/usr/bin/python
# -*- coding:utf-8 -*-
#鸢尾花分类
#1.首先导入包:
import numpy as np #机器学习基础包
from sklearn.linear_model import LogisticRegression #逻辑回归算法库
import matplotlib.pyplot as plt #绘图工具包
import matplotlib as mpl #绘图地图包
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import datasets #sklearn官方提供的数据集def iris_type(s):it = {b'Iris-setosa': 0, b'Iris-versicolor': 1, b'Iris-virginica': 2}return it[s]#2.数据预处理,得到x,y
if __name__ == "__main__":path = u'iris.data' # 数据文件路径# # 路径,浮点型数据,逗号分隔,第4列使用函数iris_type单独处理#data = np.loadtxt(path, dtype=float, delimiter=',', converters={4: iris_type})# print (data)# 将数据的0到3列组成x,第4列得到y#x, y = np.split(data, (4,), axis=1) #axis=1,默认为0,横向切分;为1时,纵向切分。# 为了可视化,仅使用前两列特征#x = x[:, :2]# 用sklearn的数据集iris = datasets.load_iris()x = iris.data[:, :2] # we only take the first two features.y = iris.targetfrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(x, y, random_state=666)
#3.用pipline建立模型#StandardScaler()作用:去均值和方差归一化。且是针对每一个特征维度来做的,而不是针对样本。 StandardScaler对每列分别标准化。#PolynomialFeatures(degree=1):进行特征的构造。它是使用多项式的方法来进行的,如果有a,b两个特征,那么它的2次多项式为(1,a,b,a^2,ab, b^2)。PolynomialFeatures有三个参数:#1.degree:控制多项式的度#2.interaction_only: 默认为False,如果指定为True,那么就不会有特征自己和自己结合的项,上面的二次项中没有a2和b2。#3.include_bias:默认为True。如果为True的话,那么就会有上面的 1那一项。#LogisticRegression()建立逻辑回归模型lr = Pipeline([('sc', StandardScaler()),('clf', LogisticRegression(multi_class="multinomial",solver="newton-cg")) ])lr.fit(X_train,y_train) #ravel将多维数组降位一维,y轴是标签只有一维#4.画图准备N, M = 500, 500 # 横纵各采样多少个值x1_min, x1_max = x[:, 0].min(), x[:, 0].max() # 第0列的范围x2_min, x2_max = x[:, 1].min(), x[:, 1].max() # 第1列的范围t1 = np.linspace(x1_min, x1_max, N)t2 = np.linspace(x2_min, x2_max, M)x1, x2 = np.meshgrid(t1, t2) # 生成网格采样点x_test = np.stack((x1.flat, x2.flat), axis=1) # 测试点#5.开始画图cm_light = mpl.colors.ListedColormap(['#77E0A0', '#FF8080', '#A0A0FF'])cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])y_hat = lr.predict(x_test) # 预测值y_hat = y_hat.reshape(x1.shape) # 使之与输入的形状相同# print(y_hat)plt.pcolormesh(x1, x2, y_hat,shading='auto', cmap=cm_light) # 预测值的显示 其实就是背景plt.scatter(x[:, 0], x[:, 1], c=y.ravel(), edgecolors='k', s=50, cmap=cm_dark) # 样本的显示plt.xlabel('petal length')plt.ylabel('petal width')plt.xlim(x1_min, x1_max)plt.ylim(x2_min, x2_max)plt.grid()plt.savefig('2.png')plt.show()
#6.训练集上的预测结果y_hat = lr.predict(x) #回归的yy =y.ravel() #变一维print(y)#y = y.reshape(-1) #变一维#print(y)result = y_hat == y #回归的y和真实值y比较print(y_hat)print(result)acc = np.mean(result) #求平均数print('准确率: %.2f%%' % (100 * acc))
方式二:官方例子.html#sphx-glr-auto-examples-linear-model-plot-iris-logistic-py
print(__doc__)# Code source: Gaël Varoquaux
# Modified for documentation by Jaques Grobler
# License: BSD 3 clauseimport numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import datasets# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features.
Y = iris.target# Create an instance of Logistic Regression Classifier and fit the data.
logreg = LogisticRegression(C=1e5)
logreg.fit(X, Y)# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = .02 # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(4, 3))
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())plt.show()
方式三:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressiondef plot_decision_boundary(model, axis):x0, x1 = np.meshgrid(np.linspace(axis[0], axis[1], int((axis[1] - axis[0]) * 100)).reshape(-1, 1),np.linspace(axis[2], axis[3], int((axis[3] - axis[2]) * 100)).reshape(-1, 1),)X_new = np.c_[x0.ravel(), x1.ravel()]y_predict = model.predict(X_new)zz = y_predict.reshape(x0.shape)from matplotlib.colors import ListedColormapcustom_cmap = ListedColormap(['#EF9A9A', '#FFF59D', '#90CAF9'])plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)iris = datasets.load_iris()
X = iris.data[:,:2] #取二维数据做分析
print(X)
y = iris.target
# random_state是数据随机化
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666) #把所有数据随机拆成俩个部分,(X_train,y_train)训练集;(X_test,y_testce)测试集log_reg2 = LogisticRegression() #调用LR算法函数
log_reg2.fit(X_train, y_train) #填充数据到LR函数
print('二维数据的准确率:',log_reg2.score(X_test, y_test)) #对比训练数据和测试数据计算准确率plot_decision_boundary(log_reg2, axis=[4, 8.5, 1.5, 4.5])
plt.scatter(X[y==0,0], X[y==0,1])
plt.scatter(X[y==1,0], X[y==1,1])
plt.scatter(X[y==2,0], X[y==2,1])
plt.show()X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
print('全部数据的准确率:',log_reg.score(X_test, y_test))
方式四:手写的逻辑回归算法
logistic_regression_binary.csv
-0.017612 14.053064 0
-1.395634 4.662541 1
-0.752157 6.538620 0
-1.322371 7.152853 0
0.423363 11.054677 0
0.406704 7.067335 1
0.667394 12.741452 0
-2.460150 6.866805 1
0.569411 9.548755 0
-0.026632 10.427743 0
0.850433 6.920334 1
1.347183 13.175500 0
1.176813 3.167020 1
-1.781871 9.097953 0
-0.566606 5.749003 1
0.931635 1.589505 1
-0.024205 6.151823 1
-0.036453 2.690988 1
-0.196949 0.444165 1
1.014459 5.754399 1
1.985298 3.230619 1
-1.693453 -0.557540 1
-0.576525 11.778922 0
-0.346811 -1.678730 1
-2.124484 2.672471 1
1.217916 9.597015 0
-0.733928 9.098687 0
-3.642001 -1.618087 1
0.315985 3.523953 1
1.416614 9.619232 0
-0.386323 3.989286 1
0.556921 8.294984 1
1.224863 11.587360 0
-1.347803 -2.406051 1
1.196604 4.951851 1
0.275221 9.543647 0
0.470575 9.332488 0
-1.889567 9.542662 0
-1.527893 12.150579 0
-1.185247 11.309318 0
-0.445678 3.297303 1
1.042222 6.105155 1
-0.618787 10.320986 0
1.152083 0.548467 1
0.828534 2.676045 1
-1.237728 10.549033 0
-0.683565 -2.166125 1
0.229456 5.921938 1
-0.959885 11.555336 0
0.492911 10.993324 0
0.184992 8.721488 0
-0.355715 10.325976 0
-0.397822 8.058397 0
0.824839 13.730343 0
1.507278 5.027866 1
0.099671 6.835839 1
-0.344008 10.717485 0
1.785928 7.718645 1
-0.918801 11.560217 0
-0.364009 4.747300 1
-0.841722 4.119083 1
0.490426 1.960539 1
-0.007194 9.075792 0
0.356107 12.447863 0
0.342578 12.281162 0
-0.810823 -1.466018 1
2.530777 6.476801 1
1.296683 11.607559 0
0.475487 12.040035 0
-0.783277 11.009725 0
0.074798 11.023650 0
-1.337472 0.468339 1
-0.102781 13.763651 0
-0.147324 2.874846 1
0.518389 9.887035 0
1.015399 7.571882 0
-1.658086 -0.027255 1
1.319944 2.171228 1
2.056216 5.019981 1
-0.851633 4.375691 1
-1.510047 6.061992 0
-1.076637 -3.181888 1
1.821096 10.283990 0
3.010150 8.401766 1
-1.099458 1.688274 1
-0.834872 -1.733869 1
-0.846637 3.849075 1
1.400102 12.628781 0
1.752842 5.468166 1
0.078557 0.059736 1
0.089392 -0.715300 1
1.825662 12.693808 0
0.197445 9.744638 0
0.126117 0.922311 1
-0.679797 1.220530 1
0.677983 2.556666 1
0.761349 10.693862 0
-2.168791 0.143632 1
1.388610 9.341997 0
0.317029 14.739025 0
# 这是一个手写的逻辑回归算法,解决分类问题
# 网上很多傻逼拿来卖分,还有一些傻逼复制粘贴的看不明白,本人最讨厌这种忽悠
# 分享能运行的代码和注释import numpy as np #机器算法库
import matplotlib.pyplot as plt #图形绘制库# sigmod函数,即得分函数,逻辑回归的计算公式;
# 值域在0,1之间;
# 来计算数据的概率是0还是1;得到y大于等于0.5是1,y小于等于0.5为0
def sigmod(x):return 1/(1+np.exp(-x))# 损失函数,损失函数的计算公式;
# hx是概率估计值,是sigmod(x)得来的值
# y是样本真值
def cost(hx, y):return -y * np.log(hx) - (1-y) * np.log(1-hx)# 梯度下降函数
def gradient(current_para, x, y, learning_rate):m = len(y)matrix_gradient = np.zeros(len(x[0]))for i in range(m):current_x = x[i]current_y = y[i]current_x = np.asarray(current_x)matrix_gradient += (sigmod(np.dot(current_para, current_x)) - current_y) * current_xnew_para = current_para - learning_rate * matrix_gradientreturn new_para# 误差计算函数
def error(para, x, y):total = len(y)error_num = 0for i in range(total):current_x = x[i]current_y = y[i]hx = sigmod(np.dot(para, current_x)) #LR算法if cost(hx, current_y) > 0.5: #进一步计算损失error_num += 1return error_num/total# 训练过程
def train(initial_para, x, y, learning_rate, num_iter):dataMat = np.asarray(x)labelMat = np.asarray(y)para = initial_parafor i in range(num_iter+1):para = gradient(para, dataMat, labelMat, learning_rate) #梯度下降法来if i % 100 == 0:err = error(para, dataMat, labelMat)print("iter:" + str(i) + " ; error:" + str(err))return para# 加载数据集
def load_dataset():dataMat = []labelMat = []with open("logistic_regression_binary.csv", "r+") as file_object:lines = file_object.readlines()for line in lines:line_array = line.strip().split()# 数据矩阵dataMat.append([1.0, float(line_array[0]), float(line_array[1])])# 标签矩阵labelMat.append(int(line_array[2]))return dataMat, labelMat# 绘制图形
def plotBestFit(wei, data, label):if type(wei).__name__ == 'ndarray':weights = weielse:weights = wei.getA()fig = plt.figure(0)ax = fig.add_subplot(111)xxx = np.arange(-3,3,0.1)yyy = - weights[0]/weights[2] - weights[1]/weights[2]*xxxax.plot(xxx,yyy)cord1 = []cord0 = []for i in range(len(label)):if label[i] == 1:cord1.append(data[i][1:3])else:cord0.append(data[i][1:3])cord1 = np.array(cord1)cord0 = np.array(cord0)ax.scatter(cord1[:, 0], cord1[:, 1], c='red')ax.scatter(cord0[:, 0], cord0[:, 1], c='green')plt.show()def logistic_regression():x, y = load_dataset()n = len(x[0])initial_para = np.ones(n)learning_rate = 0.001num_iter = 1000print("起始参数:")print(initial_para)para = train(initial_para, x, y, learning_rate, num_iter)print("最后训练得到的参数:")print(para)plotBestFit(para, x, y)logistic_regression()
一个商品分类的例子:
逻辑回归做商品分类比朴素叶贝斯要优秀。文章链接:
1、朴素叶贝斯算法实现商品分类:
2、对爬取京东商品按照标题为其进行自动分类---基于逻辑回归的文本分类
.html
3、Otto商品分类(二)----Logistic回归预测&超参数调优
更多推荐
三种实现逻辑回归算法的代码
发布评论