Merge pull request #8 from ottenbreit-data-science/examples

mathias-von-ottenbreit · web-flow · commit 3bdc405a8445 · 2022-07-06T12:33:32.000+02:00
Examples
diff --git a/examples/train_aplr_cross_validation.py b/examples/train_aplr_cross_validation.py
@@ -1,5 +1,5 @@
 import pandas as pd
-import pickle
+import joblib
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.datasets import load_diabetes
 from aplr import APLRRegressor
@@ -35,7 +35,7 @@
 print("Done training")
 
 #Saving model
-pickle.dump(best_model,open("best_model.zip","wb"))
+joblib.dump(best_model,"best_model.gz")
 
 #Cross validation results when doing grid search
 cv_results = pd.DataFrame(grid_search_cv.cv_results_).sort_values(by="rank_test_score")
@@ -44,11 +44,11 @@
 validation_error_per_boosting_step = best_model.get_validation_error_steps()
 
 #Terms in the best model
-terms=pd.DataFrame({"Predictor":best_model.get_term_names(),"Coefficient":best_model.get_term_coefficients()})
+terms=pd.DataFrame({"term":best_model.get_term_names(),"coefficient":best_model.get_term_coefficients()})
 
-#Coefficients for intercept and the first predictor per boosting step
+#Coefficients for intercept and the first term per boosting step
 intercept_coefficient_per_boosting_step = best_model.get_intercept_steps()
-first_predictor_coefficient_per_boosting_step = best_model.get_term_coefficient_steps(term_index=0)
+first_term_coefficient_per_boosting_step = best_model.get_term_coefficient_steps(term_index=0)
 
 #Estimated feature importance was estimated on the validation set when the best model was trained
 estimated_feature_importance = pd.DataFrame({"predictor":predictors,"importance":best_model.get_feature_importance()})
diff --git a/examples/train_aplr_validation.py b/examples/train_aplr_validation.py
@@ -1,6 +1,6 @@
 import pandas as pd
 import numpy as np
-import pickle
+import joblib
 from sklearn.model_selection import ParameterGrid, train_test_split
 from sklearn.datasets import load_diabetes
 from aplr import APLRRegressor
@@ -45,7 +45,7 @@
 print("Done training")
 
 #Saving model
-pickle.dump(best_model,open("best_model.zip","wb"))
+joblib.dump(best_model,"best_model.gz")
 
 #Validation results when doing grid search
 validation_results = validation_results.sort_values(by="validation_error")
@@ -54,11 +54,11 @@
 validation_error_per_boosting_step = best_model.get_validation_error_steps()
 
 #Terms in the best model
-terms=pd.DataFrame({"Predictor":best_model.get_term_names(),"Coefficient":best_model.get_term_coefficients()})
+terms=pd.DataFrame({"term":best_model.get_term_names(),"coefficient":best_model.get_term_coefficients()})
 
-#Coefficients for intercept and the first predictor per boosting step
+#Coefficients for intercept and the first term per boosting step
 intercept_coefficient_per_boosting_step = best_model.get_intercept_steps()
-first_predictor_coefficient_per_boosting_step = best_model.get_term_coefficient_steps(term_index=0)
+first_term_coefficient_per_boosting_step = best_model.get_term_coefficient_steps(term_index=0)
 
 #Estimated feature importance was estimated on the validation set when the best model was trained
 estimated_feature_importance = pd.DataFrame({"predictor":predictors,"importance":best_model.get_feature_importance()})