Skip to content

Commit 9582886

Browse files
authored
Merge pull request #86 from ciaran28/main
Bug Fix
2 parents c95b341 + 46087f5 commit 9582886

File tree

8 files changed

+64
-53
lines changed

8 files changed

+64
-53
lines changed

.github/workflows/taskDatabricks.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ on:
2626
required: true
2727
RELEASE_BRANCH:
2828
required: false
29-
29+
3030
jobs:
3131
build:
3232
name: ${{ inputs.ENVIRONMENT }} Environment Deployment
@@ -237,7 +237,7 @@ jobs:
237237
238238
dbx configure --enable-inplace-jinja-support --profile $ENVIRONMENT
239239
240-
dbx deploy NYC_TAXI --deployment-file mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow.yaml \
240+
dbx deploy NYC_TAXI --no-package --deployment-file mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow.yaml \
241241
-e $ENVIRONMENT --jinja-variables-file=mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow_params.yaml
242242
243243
env:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ azureDevOps
1313

1414

1515

16+
27 Bytes
Binary file not shown.

mlOps/modelOps/data_science/nyc_taxi/feature_eng.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -23,43 +23,6 @@
2323
from pytz import timezone
2424
from helperFunctions.helperFunction import *
2525

26-
27-
# COMMAND ----------
28-
29-
args = dbutils.notebook.entry_point.getCurrentBindings()
30-
print(args)
31-
32-
# COMMAND ----------
33-
34-
#experiment_id = dbutils.widgets.get("--AZUREML_EXPERIMENT_ID")
35-
#print(experiment_id)
36-
37-
# COMMAND ----------
38-
39-
# Ingest Parameters Files
40-
41-
# COMMAND ----------
42-
43-
p = ArgumentParser()
44-
p.add_argument("--env", required=False, type=str)
45-
namespace = p.parse_known_args(sys.argv[1:])[0]
46-
display(namespace)
47-
48-
49-
if namespace.env is not None:
50-
display(namespace.env)
51-
params = yaml.safe_load(pathlib.Path(namespace.env).read_text())
52-
display(params)
53-
experiment_name = params['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['PARAMETERS']['EXPERIMENT_NAME']
54-
display(experiment_name)
55-
mlflow.set_experiment(experiment_name=experiment_name)
56-
57-
else:
58-
display("Set The Parameters Manually, As We Are Deploying From UI")
59-
mlflow.set_experiment("/Shared/dbxDevelopment")
60-
61-
62-
6326
# COMMAND ----------
6427

6528
#Inggest Data

mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,6 @@ sklearn_pandas==2.2.0
6969
azureml-sdk==1.50.0
7070
uszipcode
7171
lightgbm
72-
azureml-sdk[databricks]
72+
azureml-sdk[databricks]==1.50.0
7373

7474

mlOps/modelOps/data_science/nyc_taxi/train_register.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@
4545

4646
# COMMAND ----------
4747

48-
%md
49-
## Ingest Args (If Triggered From Pipeline)
48+
# MAGIC %md ## Ingest Args (If Triggered From Pipeline)
5049

5150
# COMMAND ----------
5251

@@ -57,9 +56,7 @@
5756

5857
# COMMAND ----------
5958

60-
%md
61-
62-
## Set Azure ML Configs
59+
# MAGIC %md ## Set Azure ML Configs
6360

6461
# COMMAND ----------
6562

@@ -91,9 +88,7 @@
9188

9289
# COMMAND ----------
9390

94-
%md
95-
96-
## Set MLFlow Tracking Server
91+
# MAGIC %md ## Set MLFlow Tracking Server
9792

9893
# COMMAND ----------
9994

@@ -102,8 +97,8 @@
10297

10398
if namespace.env is not None:
10499
params = yaml.safe_load(pathlib.Path(namespace.env).read_text())
105-
experiment_name = params['ML_PIPELINE_FILES']['MODEL_TRAINING']['PARAMETERS']['EXPERIMENT_NAME']
106-
track_in_azure_ml = params['ML_PIPELINE_FILES']['MODEL_TRAINING']['PARAMETERS']['TRACK_IN_AZURE_ML']
100+
experiment_name = params['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['EXPERIMENT_NAME']
101+
track_in_azure_ml = params['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['TRACK_IN_AZURE_ML']
107102

108103
if track_in_azure_ml:
109104
if track_in_azure_ml:
@@ -183,7 +178,7 @@
183178

184179
# COMMAND ----------
185180

186-
from sklearn.metrics import mean_squared_log_error
181+
from sklearn import metrics
187182

188183
features_and_label = training_df.columns
189184

@@ -199,8 +194,6 @@
199194

200195
import joblib
201196

202-
joblib.dump(model, open(model_file_path1,'wb')) #Save The Model
203-
204197
mlflow.end_run()
205198
mlflow.autolog(exclusive=False)
206199
with mlflow.start_run():
@@ -221,6 +214,8 @@
221214
param, train_lgb_dataset, num_rounds
222215
)
223216

217+
joblib.dump(model, open(model_file_path1,'wb')) #Save The Model
218+
224219
expected_y = y_test
225220
predicted_y = model.predict(X_test)
226221

mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow.yaml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,20 @@ build:
2929
no_build: true
3030

3131
environments:
32+
3233
sandbox:
3334
workflows:
3435
- name: NYC_TAXI
3536
tasks:
37+
38+
- task_key: "Data_Ingestion_And_Prep"
39+
<<: *dev-cluster-config
40+
spark_python_task:
41+
python_file: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['FILE_PATH']}}"
42+
libraries: [
43+
whl: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['WHL_PATH']}}"
44+
]
45+
3646
- task_key: "Feature_Engineering"
3747
<<: *dev-cluster-config
3848
spark_python_task:
@@ -41,6 +51,8 @@ environments:
4151
libraries: [
4252
whl: "{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['WHL_PATH']}}"
4353
]
54+
depends_on:
55+
- task_key: "Data_Ingestion_And_Prep"
4456

4557
- task_key: "Train_Register"
4658
<<: *dev-cluster-config
@@ -68,6 +80,15 @@ environments:
6880
workflows:
6981
- name: NYC_TAXI
7082
tasks:
83+
84+
- task_key: "Data_Ingestion_And_Prep"
85+
<<: *dev-cluster-config
86+
spark_python_task:
87+
python_file: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['FILE_PATH']}}"
88+
libraries: [
89+
whl: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['WHL_PATH']}}"
90+
]
91+
7192
- task_key: "Feature_Engineering"
7293
<<: *dev-cluster-config
7394
spark_python_task:
@@ -76,6 +97,8 @@ environments:
7697
libraries: [
7798
whl: "{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['WHL_PATH']}}"
7899
]
100+
depends_on:
101+
- task_key: "Data_Ingestion_And_Prep"
79102

80103
- task_key: "Train_Register"
81104
<<: *dev-cluster-config
@@ -102,6 +125,15 @@ environments:
102125
workflows:
103126
- name: NYC_TAXI
104127
tasks:
128+
129+
- task_key: "Data_Ingestion_And_Prep"
130+
<<: *dev-cluster-config
131+
spark_python_task:
132+
python_file: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['FILE_PATH']}}"
133+
libraries: [
134+
whl: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['WHL_PATH']}}"
135+
]
136+
105137
- task_key: "Feature_Engineering"
106138
<<: *dev-cluster-config
107139
spark_python_task:
@@ -110,6 +142,8 @@ environments:
110142
libraries: [
111143
whl: "{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['WHL_PATH']}}"
112144
]
145+
depends_on:
146+
- task_key: "Data_Ingestion_And_Prep"
113147

114148
- task_key: "Train_Register"
115149
<<: *dev-cluster-config
@@ -137,6 +171,15 @@ environments:
137171
workflows:
138172
- name: NYC_TAXI
139173
tasks:
174+
175+
- task_key: "Data_Ingestion_And_Prep"
176+
<<: *dev-cluster-config
177+
spark_python_task:
178+
python_file: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['FILE_PATH']}}"
179+
libraries: [
180+
whl: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['WHL_PATH']}}"
181+
]
182+
140183
- task_key: "Feature_Engineering"
141184
<<: *dev-cluster-config
142185
spark_python_task:
@@ -145,6 +188,8 @@ environments:
145188
libraries: [
146189
whl: "{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['WHL_PATH']}}"
147190
]
191+
depends_on:
192+
- task_key: "Data_Ingestion_And_Prep"
148193

149194
- task_key: "Train_Register"
150195
<<: *dev-cluster-config

mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow_params.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
ML_PIPELINE_FILES:
2+
3+
DATA_INGEST_PREP:
4+
FILE_PATH: 'file://mlOps/dataOps/nyc_taxi/data_prep.py'
5+
WHL_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/dist/helperfunctions-0.0.1-py3-none-any.whl'
6+
27
FEATURE_ENGINEERING:
38
FILE_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/feature_eng.py'
49
WHL_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/dist/helperfunctions-0.0.1-py3-none-any.whl'
@@ -7,6 +12,7 @@ ML_PIPELINE_FILES:
712
FILE: 'file:fuse://mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow_params.yaml'
813
EXPERIMENT_NAME: 'dbx_workflow_fe'
914
TRACK_IN_AZURE_ML: True
15+
1016
TRAIN_REGISTER:
1117
FILE_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/train_register.py'
1218
WHL_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/dist/helperfunctions-0.0.1-py3-none-any.whl'
@@ -15,6 +21,7 @@ ML_PIPELINE_FILES:
1521
FILE: 'file:fuse://mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow_params.yaml'
1622
EXPERIMENT_NAME: 'dbx_workflow_train'
1723
TRACK_IN_AZURE_ML: True
24+
1825
MODEL_INFERENCE:
1926
FILE_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/score.py'
2027
WHL_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/dist/helperfunctions-0.0.1-py3-none-any.whl'

0 commit comments

Comments
 (0)