Skip to content

Commit 2dda902

Browse files
update to python imports to reflect revoscalepy updates
1 parent 153f028 commit 2dda902

File tree

1 file changed

+66
-57
lines changed
  • samples/features/machine-learning-services/python/getting-started/rental-prediction

1 file changed

+66
-57
lines changed
Lines changed: 66 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,73 @@
1-
import pandas
2-
import sklearn.linear_model
3-
import sklearn.metrics
4-
5-
from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer
6-
from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData
7-
from revoscalepy.etl.RxImport import rx_import_datasource
1+
import pandas as pd
2+
from sklearn.linear_model import LinearRegression
3+
from sklearn.metrics import mean_squared_error
84

5+
#If you are running SQL Server 2017 RC1 and above:
6+
from revoscalepy import RxComputeContext, RxInSqlServer, RxSqlServerData
7+
from revoscalepy import rx_import
98

109
def get_rental_predictions():
11-
conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
12-
column_info = {
13-
"Year": {"type": "integer"},
14-
"Month": {"type": "integer"},
15-
"Day": {"type": "integer"},
16-
"RentalCount": {"type": "integer"},
17-
"WeekDay": {
18-
"type": "factor",
19-
"levels": ["1", "2", "3", "4", "5", "6", "7"],
20-
},
21-
"Holiday": {
22-
"type": "factor",
23-
"levels": ["1", "0"],
24-
},
25-
"Snow": {
26-
"type": "factor",
27-
"levels": ["1", "0"],
28-
}
29-
}
10+
#Connection string to connect to SQL Server named instance
11+
conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
12+
13+
#Define the columns we wish to import
14+
column_info = {
15+
"Year" : { "type" : "integer" },
16+
"Month" : { "type" : "integer" },
17+
"Day" : { "type" : "integer" },
18+
"RentalCount" : { "type" : "integer" },
19+
"WeekDay" : {
20+
"type" : "factor",
21+
"levels" : ["1", "2", "3", "4", "5", "6", "7"]
22+
},
23+
"Holiday" : {
24+
"type" : "factor",
25+
"levels" : ["1", "0"]
26+
},
27+
"Snow" : {
28+
"type" : "factor",
29+
"levels" : ["1", "0"]
30+
}
31+
}
32+
33+
#Get the data from SQL Server Table
34+
data_source = RxSqlServerData(table="dbo.rental_data",
35+
connection_string=conn_str, column_info=column_info)
36+
computeContext = RxInSqlServer(
37+
connection_string = conn_str,
38+
num_tasks = 1,
39+
auto_cleanup = False
40+
)
41+
3042

31-
data_source = RxSqlServerData(table="dbo.rental_data",
32-
connectionString=conn_str,
33-
colInfo=column_info)
34-
RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False)
43+
RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False)
3544

36-
# import data source and convert to pandas dataframe
37-
df = pandas.DataFrame(rx_import_datasource(data_source))
38-
print("Data frame:", df)
39-
# Get all the columns from the dataframe and filter out the ones we don't
40-
# want.
41-
columns = [x for x in df.columns if x == "Year"]
42-
# Store the variable we'll be predicting on.
43-
target = "RentalCount"
44-
# Generate the training set. Set random_state to be able to replicate results.
45-
train = df.sample(frac=0.8, random_state=1)
46-
# Select anything not in the training set and put it in the testing set.
47-
test = df.loc[~df.index.isin(train.index)]
48-
# Print the shapes of both sets.
49-
print("Training set shape:", train.shape)
50-
print("Testing set shape:", test.shape)
51-
# Initialize the model class.
52-
lin_model = sklearn.linear_model.LinearRegression()
53-
# Fit the model to the training data.
54-
lin_model.fit(train[columns], train[target])
55-
# Generate our predictions for the test set.
56-
lin_predictions = lin_model.predict(test[columns])
57-
print("Predictions:", lin_predictions)
58-
# Compute error between our test predictions and the actual values.
59-
lin_mse = sklearn.metrics.mean_squared_error(lin_predictions, test[target])
60-
print("Computed error:", lin_mse)
45+
# import data source and convert to pandas dataframe
46+
df = pd.DataFrame(rx_import(input_data = data_source))
47+
print("Data frame:", df)
48+
# Get all the columns from the dataframe.
49+
columns = df.columns.tolist()
50+
# Filter the columns to remove ones we don't want to use in the training
51+
columns = [c for c in columns if c not in ["Year"]]
52+
# Store the variable we'll be predicting on.
53+
target = "RentalCount"
54+
# Generate the training set. Set random_state to be able to replicate results.
55+
train = df.sample(frac=0.8, random_state=1)
56+
# Select anything not in the training set and put it in the testing set.
57+
test = df.loc[~df.index.isin(train.index)]
58+
# Print the shapes of both sets.
59+
print("Training set shape:", train.shape)
60+
print("Testing set shape:", test.shape)
61+
# Initialize the model class.
62+
lin_model = LinearRegression()
63+
# Fit the model to the training data.
64+
lin_model.fit(train[columns], train[target])
6165

66+
# Generate our predictions for the test set.
67+
lin_predictions = lin_model.predict(test[columns])
68+
print("Predictions:", lin_predictions)
69+
# Compute error between our test predictions and the actual values.
70+
lin_mse = mean_squared_error(lin_predictions, test[target])
71+
print("Computed error:", lin_mse)
6272

63-
if __name__ == "__main__":
64-
get_rental_predictions()
73+
get_rental_predictions()

0 commit comments

Comments
 (0)