Skip to content

Commit 153f028

Browse files
Merge pull request #1 from brettcannon/python-tweaks
Python tweaks
2 parents 003979d + b586643 commit 153f028

File tree

2 files changed

+42
-46
lines changed

2 files changed

+42
-46
lines changed
Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import pandas as pd
2-
from sklearn.linear_model import LinearRegression
3-
from sklearn.metrics import mean_squared_error
1+
import pandas
2+
import sklearn.linear_model
3+
import sklearn.metrics
44

55
from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer
66
from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData
@@ -9,43 +9,36 @@
99

1010
def get_rental_predictions():
1111
conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
12-
column_info = {
13-
"Year" : { "type" : "integer" },
14-
"Month" : { "type" : "integer" },
15-
"Day" : { "type" : "integer" },
16-
"RentalCount" : { "type" : "integer" },
17-
"WeekDay" : {
18-
"type" : "factor",
19-
"levels" : ["1", "2", "3", "4", "5", "6", "7"]
20-
},
21-
"Holiday" : {
22-
"type" : "factor",
23-
"levels" : ["1", "0"]
24-
},
25-
"Snow" : {
26-
"type" : "factor",
27-
"levels" : ["1", "0"]
28-
}
12+
column_info = {
13+
"Year": {"type": "integer"},
14+
"Month": {"type": "integer"},
15+
"Day": {"type": "integer"},
16+
"RentalCount": {"type": "integer"},
17+
"WeekDay": {
18+
"type": "factor",
19+
"levels": ["1", "2", "3", "4", "5", "6", "7"],
20+
},
21+
"Holiday": {
22+
"type": "factor",
23+
"levels": ["1", "0"],
24+
},
25+
"Snow": {
26+
"type": "factor",
27+
"levels": ["1", "0"],
2928
}
29+
}
3030

3131
data_source = RxSqlServerData(table="dbo.rental_data",
32-
connectionString=conn_str, colInfo=column_info)
33-
computeContext = RxInSqlServer(
34-
connectionString = conn_str,
35-
numTasks = 1,
36-
autoCleanup = False
37-
)
38-
39-
32+
connectionString=conn_str,
33+
colInfo=column_info)
4034
RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False)
41-
35+
4236
# import data source and convert to pandas dataframe
43-
df = pd.DataFrame(rx_import_datasource(data_source))
37+
df = pandas.DataFrame(rx_import_datasource(data_source))
4438
print("Data frame:", df)
45-
# Get all the columns from the dataframe.
46-
columns = df.columns.tolist()
47-
# Filter the columns to remove ones we don't want.
48-
columns = [c for c in columns if c not in ["Year"]]
39+
# Get all the columns from the dataframe and filter out the ones we don't
40+
# want.
41+
columns = [x for x in df.columns if x == "Year"]
4942
# Store the variable we'll be predicting on.
5043
target = "RentalCount"
5144
# Generate the training set. Set random_state to be able to replicate results.
@@ -56,14 +49,16 @@ def get_rental_predictions():
5649
print("Training set shape:", train.shape)
5750
print("Testing set shape:", test.shape)
5851
# Initialize the model class.
59-
lin_model = LinearRegression()
52+
lin_model = sklearn.linear_model.LinearRegression()
6053
# Fit the model to the training data.
6154
lin_model.fit(train[columns], train[target])
6255
# Generate our predictions for the test set.
6356
lin_predictions = lin_model.predict(test[columns])
6457
print("Predictions:", lin_predictions)
6558
# Compute error between our test predictions and the actual values.
66-
lin_mse = mean_squared_error(lin_predictions, test[target])
59+
lin_mse = sklearn.metrics.mean_squared_error(lin_predictions, test[target])
6760
print("Computed error:", lin_mse)
6861

69-
get_rental_predictions()
62+
63+
if __name__ == "__main__":
64+
get_rental_predictions()

samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,24 @@ BEGIN
2727
@language = N'Python'
2828
, @script = N'
2929
30+
from sklearn import linear_model
31+
32+
import pickle
33+
34+
3035
df = rental_train_data
3136
3237
# Get all the columns from the dataframe.
3338
columns = df.columns.tolist()
3439
35-
3640
# Store the variable well be predicting on.
3741
target = "RentalCount"
3842
39-
from sklearn.linear_model import LinearRegression
40-
4143
# Initialize the model class.
42-
lin_model = LinearRegression()
44+
lin_model = linear_model.LinearRegression()
4345
# Fit the model to the training data.
4446
lin_model.fit(df[columns], df[target])
4547
46-
import pickle
4748
#Before saving the model to the DB table, we need to convert it to a binary object
4849
trained_model = pickle.dumps(lin_model)
4950
'
@@ -75,15 +76,15 @@ AS
7576
BEGIN
7677
DECLARE @py_model varbinary(max) = (select model from rental_py_models where model_name = @model);
7778

78-
EXEC sp_execute_external_script
79+
EXEC sp_execute_external_script
7980
@language = N'Python'
8081
, @script = N'
8182
8283
8384
import pickle
8485
rental_model = pickle.loads(py_model)
8586
86-
87+
8788
df = rental_score_data
8889
#print(df)
8990
@@ -106,15 +107,15 @@ lin_mse = mean_squared_error(linpredictions, df[target])
106107
#print(lin_mse)
107108
108109
import pandas as pd
109-
predictions_df = pd.DataFrame(lin_predictions)
110+
predictions_df = pd.DataFrame(lin_predictions)
110111
OutputDataSet = pd.concat([predictions_df, df["RentalCount"], df["Month"], df["Day"], df["WeekDay"], df["Snow"], df["Holiday"], df["Year"]], axis=1)
111112
'
112113
, @input_data_1 = N'Select "RentalCount", "Year" ,"Month", "Day", "WeekDay", "Snow", "Holiday" from rental_data where Year = 2015'
113114
, @input_data_1_name = N'rental_score_data'
114115
, @params = N'@py_model varbinary(max)'
115116
, @py_model = @py_model
116117
with result sets (("RentalCount_Predicted" float, "RentalCount" float, "Month" float,"Day" float,"WeekDay" float,"Snow" float,"Holiday" float, "Year" float));
117-
118+
118119
END;
119120
GO
120121

0 commit comments

Comments
 (0)