决策树、贝叶斯编程"/>
决策树、贝叶斯编程
数据准备--训练集测试集
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score, recall_score,auc
from sklearn.metrics import roc_curve,roc_auc_score, plot_roc_curve#数据准备
path="data\irisData.xlsx" #should change the path accordingly
rawdata= pd.read_excel(path) # Need to install a library to read xlsx file, use: pip3 install openpyxl
print ("data summary")
print (rawdata.describe())
nrow, ncol = rawdata.shape
print (nrow, ncol)
print ("\n correlation Matrix")
print (rawdata.corr())
rawdata.hist()
plt.show()#展示两两变量之间的联系
pd.plotting.scatter_matrix(rawdata,figsize=[8,8])
plt.show()
# boxplot
fig = plt.figure(1, figsize=(9, 6))
ax = fig.add_subplot(111)
ax.boxplot(rawdata.values)
ax.set_xticklabels(['Petal Length', 'Petal Width', 'Sepal Length', 'Sepal Width', 'Class'])
plt.show()#属性和类
predictors = rawdata.iloc[:,:ncol-1]
print(predictors)
index to last column to obtain class values
target = rawdata.iloc[:,-1]
print(target)#划分测试集和训练集
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, target,
test_size=0.3,
random_state=0)
决策树处理标准数据(数字型)
classifier = DecisionTreeClassifier() # configure the classifier
classifier = classifier.fit(pred_train, tar_train) # train a decision tree model
predictions = classifier.predict(pred_test) # deploy model and make predictions on test se
更多推荐
决策树、贝叶斯编程
发布评论