方法"/>
HPO超参调优方法
数据集来源 (kaggle的mobile数据集)
视频教程
import optuna
import pandas as pd
import numpy as np
import torch
from hyperopt.pyll import scope
from sklearn import ensemble, metrics, model_selection, preprocessing, pipeline, decomposition
from functools import partial
from skopt import space, gp_minimize
from hyperopt import hp, fmin, tpe, Trials
def optimize(params, param_names, x, y):params = dict(zip(param_names, params))model = ensemble.RandomForestClassifier(**params)kf = model_selection.StratifiedKFold(n_splits=5)accuracies = []for idx in kf.split(X=x, y=y):train_idx, test_idx = idx[0], idx[1]xtrain = x[train_idx]ytrain = y[train_idx]xtest = x[test_idx]ytest = y[test_idx]model.fit(xtrain, ytrain)preds = model.predict(xtest)fold_acc = metrics.accuracy_score(ytest, preds)accuracies.append(fold_acc)return -1*np.mean(accuracies)def optimize_1(params, x, y):model = ensemble.RandomForestClassifier(**params)kf = model_selection.StratifiedKFold(n_splits=5)accuracies = []for idx in kf.split(X=x, y=y):train_idx, test_idx = idx[0], idx[1]xtrain = x[train_idx]ytrain = y[train_idx]xtest = x[test_idx]ytest = y[test_idx]model.fit(xtrain, ytrain)preds = model.predict(xtest)fold_acc = metrics.accuracy_score(ytest, preds)accuracies.append(fold_acc)return -1*np.mean(accuracies)
def optimize_2(trial, x, y):criterion = trial.suggest_categorical("criterion", ["gini", "entropy"])n_estimators = trial.suggest_int("n_estimators", 100, 1500)max_depth = trial.suggest_int("max_depth", 3, 15)max_features = trial.suggest_uniform("max_features", 0.1, 1.0)model = ensemble.RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,max_features=max_features,criterion=criterion,)kf = model_selection.StratifiedKFold(n_splits=5)accuracies = []for idx in kf.split(X=x, y=y):train_idx, test_idx = idx[0], idx[1]xtrain = x[train_idx]ytrain = y[train_idx]xtest = x[test_idx]ytest = y[test_idx]model.fit(xtrain, ytrain)preds = model.predict(xtest)fold_acc = metrics.accuracy_score(ytest, preds)accuracies.append(fold_acc)return -1*np.mean(accuracies)if __name__ == "__main__":df = pd.read_csv("../Data/archive/train.csv")X = df.drop("price_range", axis=1).valuesy = df.price_range.valuesclassifier = ensemble.RandomForestClassifier(n_jobs=4)'''网格搜索'''param_grid_1 = {"n_estimators": [100, 200, 300, 400],"max_depth": [1, 3],"criterion": ["gini", "entropy"],}'''随机搜索'''param_grid_2 = {"n_estimators": np.arange(100, 1500, 100),"max_depth": np.arange(1, 20),"criterion": ["gini", "entropy"],}model1 = model_selection.GridSearchCV(estimator=classifier,param_grid=param_grid_1,scoring="accuracy",verbose=10,n_jobs=4,cv=5,)model2 = model_selection.RandomizedSearchCV(estimator=classifier,param_distributions=param_grid_2,n_iter=10,scoring="accuracy",verbose=10,n_jobs=4,cv=5,)scl = preprocessing.StandardScaler()pca = decomposition.PCA()rf = ensemble.RandomForestClassifier(n_jobs=8)classifier1 = pipeline.Pipeline([("scaling", scl), ("pca", pca), ("rf", rf)])param_grid_3={"pca__n_components": np.arange(5, 10),"rf__n_estimators": np.arange(100, 1500, 100),"rf__max_depth": np.arange(1, 20),"rf__criterion": ["gini", "entropy"],}model3 = model_selection.RandomizedSearchCV(estimator=classifier1,param_distributions=param_grid_3,n_iter=10,scoring="accuracy",verbose=10,n_jobs=1,cv=5,)# model.fit(X, y)# print(model.best_score_)# print(model.best_estimator_)'''test space'''# param_space=[# space.Integer(3, 15, name="max_depth"),# space.Integer(100, 600, name="n_estimators"),# space.Categorical(["gini", "entropy"], name="criterion"),# space.Real(0.01, 1, prior="uniform", name="max_features")# ]# param_names=["max_depth", "n_estimators", "criterion", "max_features"]# optimization_function = partial(# optimize,# param_names=param_names,# x=X,# y=y# )# result = gp_minimize(# optimization_function,# dimensions = param_space,# n_calls = 15,# n_random_starts=10,# verbose = 10,# )# print(dict(zip(param_names, result.x)))'''hyperopt'''# param_space1={# "max_depth": scope.int(hp.quniform("max_depth", 3, 15, 1)),# "n_estimators": scope.int(hp.quniform("n_estimators", 100, 600, 1)),# "criterion": hp.choice("criterion", ["gini", "entropy"]),# "max_features": hp.uniform("max_features", 0.01, 1),# }# optimization_function_1 = partial(# optimize_1,# x=X,# y=y# )# trials = Trials()## result = fmin(# optimization_function_1,# space=param_space1,# algo=tpe.suggest,# max_evals=15,# trials=trials,# )# print(result)'''optuna'''optimization_function = partial(optimize_2, x=X, y=y)study = optuna.create_study(study_name='test', direction="minimize", storage='sqlite:///db.sqlite3')# study = optuna.create_study(direction="minimize")study.optimize(optimization_function, n_trials=15)
更多推荐
HPO超参调优方法
发布评论