diff --git a/Supervised/XGBoost-Classifier-breast-cancer.py b/Supervised/XGBoost-Classifier-breast-cancer.py new file mode 100644 index 0000000..55c8d26 --- /dev/null +++ b/Supervised/XGBoost-Classifier-breast-cancer.py @@ -0,0 +1,37 @@ +from xgboost import XGBClassifier +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_breast_cancer +from sklearn.tree import export_graphviz +import matplotlib.pyplot as plt + +cancer = load_breast_cancer() +X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state=0) + + +learning_rates =[x * 0.004 for x in range(1, 61)] +training_accuracy = [] +test_accuracy = [] +best_train_score = 0 +best_test_score = 0 + +for r in learning_rates: + #build the model + model = XGBClassifier(learning_rate = r) + + model.fit(X_train, y_train) + training_accuracy.append(model.score(X_train, y_train)) + best_train_score = max(best_train_score,model.score(X_train, y_train)) + #record generalization accuracy + test_accuracy.append(model.score(X_test, y_test)) + best_test_score = max(best_test_score,model.score(X_test, y_test)) + + +plt.plot(learning_rates, training_accuracy, label="training accuracy") +plt.plot(learning_rates, test_accuracy, label="test accuracy") +plt.ylabel("Accuracy") +plt.xlabel("Learning Rate") +plt.legend() +plt.show() + +print("Accuracy on training set: {:.3f}".format(best_train_score)) +print("Accuracy on test set: {:.3f}".format(best_test_score))