When I try to Optimize Gradient Boosting Classifier,the error is "train_predictions = best_gbrt_clf.predict(X_train)"
this is the code:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import fbeta_score, make_scorer
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.externals import joblib
start_time=time.clock()
Dump_clf = False
Load_clf = True
clf = GradientBoostingClassifier(random_state=0)
if (Load_clf) :
best_gbrt_clf = joblib.load('best_gbrt_clf.pkl')
else: # build and dump new classifier
# the following hyper-parameter values were recommended in the Prettenhofer tutorial
parameters = {'n_estimators': [100],
'learning_rate': [0.1, 0.05, 0.02, 0.01],
'max_depth': [4, 6],
'min_samples_leaf': [3, 5, 9, 17],
'max_features': [1.0, 0.3, 0.1]
}
# TODO: Make an fbeta_score scoring object
scorer = make_scorer(fbeta_score, beta=0.5, average='weighted')
# Use a stratified sample because there are a lot more examples of one class that the other in the input data
cv = StratifiedShuffleSplit(y_train, test_size=0.2, random_state=42)
# TODO: Perform grid search on the classifier using 'scorer' as the scoring method
grid_obj = GridSearchCV(clf, param_grid=parameters, cv=cv, scoring=scorer)
# TODO: Fit the grid search object to the training data and find the optimal parameters
grid_fit = grid_obj.fit(X_train, y_train)
# Get the estimator
best_gbrt_clf = grid_obj.best_estimator_
print("best params found by gridsearchCV=%s" % grid_obj.best_params_)
# best_params = grid_obj.best_params_
if (Dump_clf):
joblib.dump(best_gbrt_clf, 'best_gbrt_clf.pkl')
best_name = best_gbrt_clf.__class__.__name__
best_params = best_gbrt_clf.get_params()
print("Best classifier is %s" % best_name)
print("Best params=%s\n" % best_params)
# Make predictions using the unoptimized and model
train_predictions = best_gbrt_clf.predict(X_train)
test_predictions = (clf.fit(X_train, y_train)).predict(X_test)
best_predictions = best_gbrt_clf.predict(X_test)
end_time=time.clock()
I am trying to compare different classifiers and I can get the result of RandomForestClassifier(best_rf)
the vision of scikit-learn is 0.20 python 3.6
Comments
Post a Comment