2323ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
2424POSSIBILITY OF SUCH DAMAGE.
2525"""
26+ import pickle
27+ from azureml .core import Workspace
2628from azureml .core .run import Run
2729import os
2830import argparse
3234from sklearn .model_selection import train_test_split
3335from sklearn .externals import joblib
3436import numpy as np
37+ import json
38+ import subprocess
39+ from typing import Tuple , List
40+
41+
42+ parser = argparse .ArgumentParser ("train" )
43+ parser .add_argument (
44+ "--config_suffix" , type = str , help = "Datetime suffix for json config files"
45+ )
46+ parser .add_argument (
47+ "--json_config" ,
48+ type = str ,
49+ help = "Directory to write all the intermediate json configs" ,
50+ )
51+ args = parser .parse_args ()
52+
53+ print ("Argument 1: %s" % args .config_suffix )
54+ print ("Argument 2: %s" % args .json_config )
55+
56+ if not (args .json_config is None ):
57+ os .makedirs (args .json_config , exist_ok = True )
58+ print ("%s created" % args .json_config )
59+
60+ run = Run .get_context ()
61+ exp = run .experiment
62+ ws = run .experiment .workspace
3563
36- # using diabetes dataset from scikit-learn
3764X , y = load_diabetes (return_X_y = True )
65+ columns = ["age" , "gender" , "bmi" , "bp" , "s1" , "s2" , "s3" , "s4" , "s5" , "s6" ]
3866X_train , X_test , y_train , y_test = train_test_split (X , y , test_size = 0.2 , random_state = 0 )
3967data = {"train" : {"X" : X_train , "y" : y_train }, "test" : {"X" : X_test , "y" : y_test }}
4068
69+ print ("Running train.py" )
70+
71+ # Randomly pic alpha
72+ alphas = np .arange (0.0 , 1.0 , 0.05 )
73+ alpha = alphas [np .random .choice (alphas .shape [0 ], 1 , replace = False )][0 ]
74+ print (alpha )
75+ run .log ("alpha" , alpha )
76+ reg = Ridge (alpha = alpha )
77+ reg .fit (data ["train" ]["X" ], data ["train" ]["y" ])
78+ preds = reg .predict (data ["test" ]["X" ])
79+ run .log ("mse" , mean_squared_error (preds , data ["test" ]["y" ]))
80+
4181
42- def experiment_code (data_split ):
43- run = Run .get_submitted_run ()
44- # Randomly pic alpha
45- alphas = np .arange (0.0 , 1.0 , 0.05 )
46- alpha = alphas [np .random .choice (alphas .shape [0 ], 1 , replace = False )][0 ]
47- print (alpha )
48- # Log alpha metric
49- run .log ("alpha" , alpha )
50- # train the model with selected value of alpha and log mse
51- reg = Ridge (alpha = alpha )
52- reg .fit (data ["train" ]["X" ], data_split ["train" ]["y" ])
53- preds = reg .predict (data ["test" ]["X" ])
54- run .log ("mse" , mean_squared_error (preds , data_split ["test" ]["y" ]))
82+ # Save model as part of the run history
83+ model_name = "sklearn_regression_model.pkl"
84+ # model_name = "."
5585
56- # Write model name to the config file
57- model_name = "sklearn_regression_model.pkl"
58- with open (model_name , "wb" ):
59- joblib .dump (value = reg , filename = model_name )
86+ with open (model_name , "wb" ) as file :
87+ joblib .dump (value = reg , filename = model_name )
6088
61- # upload the model file explicitly into artifacts
62- run .upload_file (name = "./outputs/" + model_name , path_or_stream = model_name )
63- print ("Uploaded the model {} to experiment {}" .format (model_name , run .experiment .name ))
64- dirpath = os .getcwd ()
65- print (dirpath )
89+ # upload the model file explicitly into artifacts
90+ run .upload_file (name = "./outputs/" + model_name , path_or_stream = model_name )
91+ print ("Uploaded the model {} to experiment {}" .format (model_name , run .experiment .name ))
92+ dirpath = os .getcwd ()
93+ print (dirpath )
94+ print ("Following files are uploaded " )
95+ print (run .get_file_names ())
6696
67- print ( "Following files are uploaded " )
68- print ( run .get_file_names () )
69- run . complete ( )
97+ # register the model
98+ # run.log_model(file_name = model_name )
99+ # print('Registered the model {} to run history {}'.format(model_name, run.history.name) )
70100
71101run_id = {}
72102run_id ["run_id" ] = run .id
@@ -76,6 +106,4 @@ def experiment_code(data_split):
76106with open (output_path , "w" ) as outfile :
77107 json .dump (run_id , outfile )
78108
79- if __name__ == "__main__" :
80- print ("Running train.py" )
81- experiment_code (data )
109+ run .complete ()
0 commit comments