1
- import pandas as pd
2
- from sklearn .linear_model import LinearRegression
3
- from sklearn .metrics import mean_squared_error
1
+ import pandas
2
+ import sklearn .linear_model
3
+ import sklearn .metrics
4
4
5
5
from revoscalepy .computecontext .RxInSqlServer import RxInSqlServer
6
6
from revoscalepy .computecontext .RxInSqlServer import RxSqlServerData
9
9
10
10
def get_rental_predictions ():
11
11
conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
12
- column_info = {
13
- "Year" : { "type" : "integer" },
14
- "Month" : { "type" : "integer" },
15
- "Day" : { "type" : "integer" },
16
- "RentalCount" : { "type" : "integer" },
17
- "WeekDay" : {
18
- "type" : "factor" ,
19
- "levels" : ["1" , "2" , "3" , "4" , "5" , "6" , "7" ]
20
- },
21
- "Holiday" : {
22
- "type" : "factor" ,
23
- "levels" : ["1" , "0" ]
24
- },
25
- "Snow" : {
26
- "type" : "factor" ,
27
- "levels" : ["1" , "0" ]
28
- }
12
+ column_info = {
13
+ "Year" : {"type" : "integer" },
14
+ "Month" : {"type" : "integer" },
15
+ "Day" : {"type" : "integer" },
16
+ "RentalCount" : {"type" : "integer" },
17
+ "WeekDay" : {
18
+ "type" : "factor" ,
19
+ "levels" : ["1" , "2" , "3" , "4" , "5" , "6" , "7" ],
20
+ },
21
+ "Holiday" : {
22
+ "type" : "factor" ,
23
+ "levels" : ["1" , "0" ],
24
+ },
25
+ "Snow" : {
26
+ "type" : "factor" ,
27
+ "levels" : ["1" , "0" ],
29
28
}
29
+ }
30
30
31
31
data_source = RxSqlServerData (table = "dbo.rental_data" ,
32
- connectionString = conn_str , colInfo = column_info )
33
- computeContext = RxInSqlServer (
34
- connectionString = conn_str ,
35
- numTasks = 1 ,
36
- autoCleanup = False
37
- )
38
-
39
-
32
+ connectionString = conn_str ,
33
+ colInfo = column_info )
40
34
RxInSqlServer (connectionString = conn_str , numTasks = 1 , autoCleanup = False )
41
-
35
+
42
36
# import data source and convert to pandas dataframe
43
- df = pd .DataFrame (rx_import_datasource (data_source ))
37
+ df = pandas .DataFrame (rx_import_datasource (data_source ))
44
38
print ("Data frame:" , df )
45
- # Get all the columns from the dataframe.
46
- columns = df .columns .tolist ()
47
- # Filter the columns to remove ones we don't want.
48
- columns = [c for c in columns if c not in ["Year" ]]
39
+ # Get all the columns from the dataframe and filter out the ones we don't
40
+ # want.
41
+ columns = [x for x in df .columns if x == "Year" ]
49
42
# Store the variable we'll be predicting on.
50
43
target = "RentalCount"
51
44
# Generate the training set. Set random_state to be able to replicate results.
@@ -56,14 +49,16 @@ def get_rental_predictions():
56
49
print ("Training set shape:" , train .shape )
57
50
print ("Testing set shape:" , test .shape )
58
51
# Initialize the model class.
59
- lin_model = LinearRegression ()
52
+ lin_model = sklearn . linear_model . LinearRegression ()
60
53
# Fit the model to the training data.
61
54
lin_model .fit (train [columns ], train [target ])
62
55
# Generate our predictions for the test set.
63
56
lin_predictions = lin_model .predict (test [columns ])
64
57
print ("Predictions:" , lin_predictions )
65
58
# Compute error between our test predictions and the actual values.
66
- lin_mse = mean_squared_error (lin_predictions , test [target ])
59
+ lin_mse = sklearn . metrics . mean_squared_error (lin_predictions , test [target ])
67
60
print ("Computed error:" , lin_mse )
68
61
69
- get_rental_predictions ()
62
+
63
+ if __name__ == "__main__" :
64
+ get_rental_predictions ()
0 commit comments