|
2 | 2 | from sklearn.linear_model import LinearRegression
|
3 | 3 | from sklearn.metrics import mean_squared_error
|
4 | 4 |
|
5 |
| -from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer |
6 |
| -from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData |
7 |
| -from revoscalepy.etl.RxImport import rx_import_datasource |
8 |
| - |
| 5 | +#If you are running SQL Server 2017 RC1 and above: |
| 6 | +from revoscalepy import RxComputeContext, RxInSqlServer, RxSqlServerData |
| 7 | +from revoscalepy import rx_import |
9 | 8 |
|
10 | 9 | def get_rental_predictions():
|
11 |
| - conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;' |
12 |
| - column_info = { |
13 |
| - "Year" : { "type" : "integer" }, |
14 |
| - "Month" : { "type" : "integer" }, |
15 |
| - "Day" : { "type" : "integer" }, |
16 |
| - "RentalCount" : { "type" : "integer" }, |
17 |
| - "WeekDay" : { |
18 |
| - "type" : "factor", |
19 |
| - "levels" : ["1", "2", "3", "4", "5", "6", "7"] |
20 |
| - }, |
21 |
| - "Holiday" : { |
22 |
| - "type" : "factor", |
23 |
| - "levels" : ["1", "0"] |
24 |
| - }, |
25 |
| - "Snow" : { |
26 |
| - "type" : "factor", |
27 |
| - "levels" : ["1", "0"] |
28 |
| - } |
29 |
| - } |
| 10 | +#Connection string to connect to SQL Server named instance |
| 11 | + conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;' |
| 12 | + |
| 13 | +#Define the columns we wish to import |
| 14 | + column_info = { |
| 15 | + "Year" : { "type" : "integer" }, |
| 16 | + "Month" : { "type" : "integer" }, |
| 17 | + "Day" : { "type" : "integer" }, |
| 18 | + "RentalCount" : { "type" : "integer" }, |
| 19 | + "WeekDay" : { |
| 20 | + "type" : "factor", |
| 21 | + "levels" : ["1", "2", "3", "4", "5", "6", "7"] |
| 22 | + }, |
| 23 | + "Holiday" : { |
| 24 | + "type" : "factor", |
| 25 | + "levels" : ["1", "0"] |
| 26 | + }, |
| 27 | + "Snow" : { |
| 28 | + "type" : "factor", |
| 29 | + "levels" : ["1", "0"] |
| 30 | + } |
| 31 | + } |
| 32 | + |
| 33 | + #Get the data from SQL Server Table |
| 34 | + data_source = RxSqlServerData(table="dbo.rental_data", |
| 35 | + connection_string=conn_str, column_info=column_info) |
| 36 | + computeContext = RxInSqlServer( |
| 37 | + connection_string = conn_str, |
| 38 | + num_tasks = 1, |
| 39 | + auto_cleanup = False |
| 40 | +) |
| 41 | + |
| 42 | + |
| 43 | + RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False) |
| 44 | + |
| 45 | + # import data source and convert to pandas dataframe |
| 46 | + df = pd.DataFrame(rx_import(input_data = data_source)) |
| 47 | + print("Data frame:", df) |
| 48 | + # Get all the columns from the dataframe. |
| 49 | + columns = df.columns.tolist() |
| 50 | + # Filter the columns to remove ones we don't want to use in the training |
| 51 | + columns = [c for c in columns if c not in ["Year"]] |
| 52 | + # Store the variable we'll be predicting on. |
| 53 | + target = "RentalCount" |
| 54 | + # Generate the training set. Set random_state to be able to replicate results. |
| 55 | + train = df.sample(frac=0.8, random_state=1) |
| 56 | + # Select anything not in the training set and put it in the testing set. |
| 57 | + test = df.loc[~df.index.isin(train.index)] |
| 58 | + # Print the shapes of both sets. |
| 59 | + print("Training set shape:", train.shape) |
| 60 | + print("Testing set shape:", test.shape) |
| 61 | + # Initialize the model class. |
| 62 | + lin_model = LinearRegression() |
| 63 | + # Fit the model to the training data. |
| 64 | + lin_model.fit(train[columns], train[target]) |
30 | 65 |
|
31 |
| - data_source = RxSqlServerData(table="dbo.rental_data", |
32 |
| - connectionString=conn_str, colInfo=column_info) |
33 |
| - computeContext = RxInSqlServer( |
34 |
| - connectionString = conn_str, |
35 |
| - numTasks = 1, |
36 |
| - autoCleanup = False |
37 |
| - ) |
38 |
| - |
39 |
| - |
40 |
| - RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False) |
41 |
| - |
42 |
| - # import data source and convert to pandas dataframe |
43 |
| - df = pd.DataFrame(rx_import_datasource(data_source)) |
44 |
| - print("Data frame:", df) |
45 |
| - # Get all the columns from the dataframe. |
46 |
| - columns = df.columns.tolist() |
47 |
| - # Filter the columns to remove ones we don't want. |
48 |
| - columns = [c for c in columns if c not in ["Year"]] |
49 |
| - # Store the variable we'll be predicting on. |
50 |
| - target = "RentalCount" |
51 |
| - # Generate the training set. Set random_state to be able to replicate results. |
52 |
| - train = df.sample(frac=0.8, random_state=1) |
53 |
| - # Select anything not in the training set and put it in the testing set. |
54 |
| - test = df.loc[~df.index.isin(train.index)] |
55 |
| - # Print the shapes of both sets. |
56 |
| - print("Training set shape:", train.shape) |
57 |
| - print("Testing set shape:", test.shape) |
58 |
| - # Initialize the model class. |
59 |
| - lin_model = LinearRegression() |
60 |
| - # Fit the model to the training data. |
61 |
| - lin_model.fit(train[columns], train[target]) |
62 |
| - # Generate our predictions for the test set. |
63 |
| - lin_predictions = lin_model.predict(test[columns]) |
64 |
| - print("Predictions:", lin_predictions) |
65 |
| - # Compute error between our test predictions and the actual values. |
66 |
| - lin_mse = mean_squared_error(lin_predictions, test[target]) |
67 |
| - print("Computed error:", lin_mse) |
| 66 | + # Generate our predictions for the test set. |
| 67 | + lin_predictions = lin_model.predict(test[columns]) |
| 68 | + print("Predictions:", lin_predictions) |
| 69 | + # Compute error between our test predictions and the actual values. |
| 70 | + lin_mse = mean_squared_error(lin_predictions, test[target]) |
| 71 | + print("Computed error:", lin_mse) |
68 | 72 |
|
69 | 73 | get_rental_predictions()
|
0 commit comments