@@ -603,13 +603,13 @@ and rename the parameter column to be more readable.
603
603
604
604
``` {code-cell} ipython3
605
605
# fit the GridSearchCV object
606
- sacr_fit = sacr_gridsearch.fit(
606
+ sacr_gridsearch.fit(
607
607
sacramento_train[["sqft"]], # A single-column data frame
608
608
sacramento_train["price"] # A series
609
609
)
610
610
611
611
# Retrieve the CV scores
612
- sacr_results = pd.DataFrame(sacr_fit .cv_results_)[[
612
+ sacr_results = pd.DataFrame(sacr_gridsearch .cv_results_)[[
613
613
"param_kneighborsregressor__n_neighbors",
614
614
"mean_test_score",
615
615
"std_test_score"
@@ -689,7 +689,7 @@ Note that it is still useful to visualize the results as we did above
689
689
since this provides additional information on how the model performance varies.
690
690
691
691
``` {code-cell} ipython3
692
- sacr_fit .best_params_
692
+ sacr_gridsearch .best_params_
693
693
```
694
694
695
695
+++
@@ -835,7 +835,7 @@ model uses a different default scoring metric than the RMSPE.
835
835
from sklearn.metrics import mean_squared_error
836
836
837
837
sacr_preds = sacramento_test.assign(
838
- predicted = sacr_fit .predict(sacramento_test)
838
+ predicted = sacr_gridsearch .predict(sacramento_test)
839
839
)
840
840
RMSPE = mean_squared_error(
841
841
y_true = sacr_preds["price"],
@@ -891,7 +891,7 @@ sqft_prediction_grid = pd.DataFrame({
891
891
})
892
892
# Predict the price for each of the sqft values in the grid
893
893
sacr_preds = sqft_prediction_grid.assign(
894
- predicted = sacr_fit .predict(sqft_prediction_grid)
894
+ predicted = sacr_gridsearch .predict(sqft_prediction_grid)
895
895
)
896
896
897
897
# Plot all the houses
@@ -1012,18 +1012,19 @@ param_grid = {
1012
1012
"kneighborsregressor__n_neighbors": range(1, 50),
1013
1013
}
1014
1014
1015
- sacr_fit = GridSearchCV(
1015
+ sacr_gridsearch = GridSearchCV(
1016
1016
estimator=sacr_pipeline,
1017
1017
param_grid=param_grid,
1018
1018
cv=5,
1019
1019
scoring="neg_root_mean_squared_error"
1020
- ).fit(
1021
- sacramento_train[["sqft", "beds"]],
1022
- sacramento_train["price"]
1023
- )
1020
+ )
1021
+ sacr_gridsearch.fit(
1022
+ sacramento_train[["sqft", "beds"]],
1023
+ sacramento_train["price"]
1024
+ )
1024
1025
1025
1026
# retrieve the CV scores
1026
- sacr_results = pd.DataFrame(sacr_fit .cv_results_)[[
1027
+ sacr_results = pd.DataFrame(sacr_gridsearch .cv_results_)[[
1027
1028
"param_kneighborsregressor__n_neighbors",
1028
1029
"mean_test_score",
1029
1030
"std_test_score"
@@ -1035,13 +1036,10 @@ sacr_results = (
1035
1036
.rename(columns={"param_kneighborsregressor__n_neighbors" : "n_neighbors"})
1036
1037
.drop(columns=["std_test_score"])
1037
1038
)
1038
-
1039
1039
sacr_results["mean_test_score"] = -sacr_results["mean_test_score"]
1040
1040
1041
1041
# show only the row of minimum RMSPE
1042
- sacr_results[
1043
- sacr_results["mean_test_score"] == sacr_results["mean_test_score"].min()
1044
- ]
1042
+ sacr_results.nsmallest(1, "mean_test_score")
1045
1043
```
1046
1044
1047
1045
``` {code-cell} ipython3
@@ -1072,7 +1070,7 @@ to compute the RMSPE.
1072
1070
1073
1071
``` {code-cell} ipython3
1074
1072
sacr_preds = sacramento_test.assign(
1075
- predicted = sacr_fit .predict(sacramento_test)
1073
+ predicted = sacr_gridsearch .predict(sacramento_test)
1076
1074
)
1077
1075
RMSPE_mult = mean_squared_error(
1078
1076
y_true = sacr_preds["price"],
@@ -1109,7 +1107,7 @@ xygrid = np.array(np.meshgrid(xvals, yvals)).reshape(2, -1).T
1109
1107
xygrid = pd.DataFrame(xygrid, columns=["sqft", "beds"])
1110
1108
1111
1109
# add prediction
1112
- knnPredGrid = sacr_fit .predict(xygrid)
1110
+ knnPredGrid = sacr_gridsearch .predict(xygrid)
1113
1111
1114
1112
fig = px.scatter_3d(
1115
1113
sacramento_train,
0 commit comments