Add files via upload

datashinobi · web-flow · commit 08c55836af92 · 2017-05-30T10:03:01.000+02:00
diff --git a/samples/features/machine-learning-services/python/getting-started/bike-sharing prediction/runner.py b/samples/features/machine-learning-services/python/getting-started/bike-sharing prediction/runner.py
@@ -0,0 +1,59 @@
+import sys 
+import numpy as np
+from sklearn.pipeline import Pipeline
+from datasource import DataSource
+from pipeline import *
+from revoscalepy.etl.RxImport import rx_import_datasource
+from sklearn.metrics import classification_report
+
+
+
+def run():
+    
+    # modify connection string to point to MLS/SQL Server instance where you restored the database 
+    connectionstring = 'Driver=SQL Server;Server=MLMACHINE\\SQLSERVER17;Database=velibdb;Trusted_Connection=True;'
+
+    ds = DataSource(connectionstring)
+    df = ds.loaddata()
+    
+
+    pipeline = Pipeline(steps= [('outliers', OutliersHandler()),
+                                ('label',LabelDefiner()),
+                                ('dt', DateTimeFeaturesExtractor()),
+                                ('ts', TSFeaturesExtractor()),
+                                ('st',  StatisticalFeaturesExtractor()),
+                                ('exclusion', FeaturesExcluder()),
+                                ('scaler', FeaturesScaler())]
+                       )
+
+    # Execute Pipeline
+
+    df = pipeline.fit_transform(df)
+   
+    # split dataset
+
+    test_size = 24 * 4 # one day test set of each station
+    train = df.groupby('stationid').head(df.shape[0] - test_size)
+    test = df.groupby('stationid').tail(test_size)
+
+    
+    # fit classifier
+
+    clf = RxClassifier(computecontext = ds.getcomputecontext())      
+    coeffs = clf.fit(train)
+    #print coefficients and exclude stationid Factor 
+    print(coeffs.tail(14))
+    
+
+    #  run prediction on hold out set and evaluate  
+  
+    y_pred = clf.predict(test.drop(['label'], axis=1, inplace = False))
+    y_truth = test['label'].as_matrix()
+    print(classification_report(y_truth, y_pred))
+
+
+
+if __name__ == "__main__":  
+   run()
+
+