Skip to content

Commit 805f05b

Browse files
committed
Building MloPS + Data Science Scripts
1 parent 16041c6 commit 805f05b

File tree

2 files changed

+16
-40
lines changed

2 files changed

+16
-40
lines changed

.github/workflows/taskDatabricks.yaml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ jobs:
9090
#################################/
9191
## Deploy Azure Infrastructure
9292
#################################/
93-
- name: Deploy Azure Resources
94-
run: ${{ inputs.SCRIPT_LANGUAGE }} infrastructure/databricks/databricks_utils/${{ inputs.SCRIPT_LANGUAGE }}/utils_create_azure_resources.py
95-
env:
96-
ENVIRONMENT: ${{ inputs.ENVIRONMENT }}
93+
#- name: Deploy Azure Resources
94+
# run: ${{ inputs.SCRIPT_LANGUAGE }} infrastructure/databricks/databricks_utils/${{ inputs.SCRIPT_LANGUAGE }}/utils_create_azure_resources.py
95+
# env:
96+
# ENVIRONMENT: ${{ inputs.ENVIRONMENT }}
9797

9898

9999

@@ -206,14 +206,14 @@ jobs:
206206
## Functionality Under Development (Not yet parameterized to work on general deployments)
207207
#################################/
208208
# 15. AML Pipeline
209-
# - name: Azure Machine Learning Pipeline For DBX Notebooks
210-
# run: ${{ inputs.SCRIPT_LANGUAGE }} mlOps/modelOps/ml_pipelines/az_machine_learning/v1/nyc_pipeline.py
211-
# env:
212-
# DATABRICKS_COMPUTE_NAME: 'mlclusterlink'
213-
# DATABRICKS_CLUSTER_NAME: 'ml_cluster'
214-
# ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
215-
# ARM_CLIENT_SECRET: ${{ secrets.ARM_CLIENT_SECRET }}
216-
# ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }}
209+
- name: Azure Machine Learning Pipeline For DBX Notebooks
210+
run: ${{ inputs.SCRIPT_LANGUAGE }} mlOps/modelOps/ml_pipelines/az_machine_learning/v1/nyc_pipeline.py
211+
env:
212+
DATABRICKS_COMPUTE_NAME: 'mlclusterlink'
213+
DATABRICKS_CLUSTER_NAME: 'Unity_Cluster_13_ML'
214+
ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
215+
ARM_CLIENT_SECRET: ${{ secrets.ARM_CLIENT_SECRET }}
216+
ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }}
217217

218218

219219
- shell: bash

mlOps/nyc_taxi/aml_pipelines/v1/nyc_pipeline.py

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -70,44 +70,20 @@ def list_clusters(self):
7070
def create_pipeline_structure(databricks_compute, ws, cluster_id):
7171
print('Creating the pipeline structure')
7272

73-
Databricks_Featurization_Step = DatabricksStep(
73+
nyc_taxi_e2e_mlops = DatabricksStep(
7474
name="Databricks_Feature_Engineering",
75-
notebook_path="/Repos/"+ ARM_CLIENT_ID + "/Sandbox/mlOps/modelOps/data_science/nyc_taxi/feature_eng.py",
75+
notebook_path="/Repos/"+ ARM_CLIENT_ID + "/Sandbox/data_science/src_nyc_taxi/src.py",
7676
#notebook_params={'myparam': 'testparam',
7777
# 'myparam2': pipeline_param},
78-
run_name='Databricks_Feature_Engineering',
78+
run_name='nyc_taxi_e2e_mlops',
7979
compute_target=databricks_compute,
8080
existing_cluster_id=cluster_id,
8181
allow_reuse=True,
8282
num_workers=3
8383
)
8484

85-
Databricks_Model_Training = DatabricksStep(
86-
name="Databricks_Model_Training",
87-
88-
notebook_path="/Repos/"+ ARM_CLIENT_ID + "/Sandbox/mlOps/modelOps/data_science/nyc_taxi/train_register.py",
89-
#notebook_params={'myparam': 'testparam',
90-
# 'myparam2': pipeline_param},
91-
run_name='Databricks_Model_Training',
92-
compute_target=databricks_compute,
93-
existing_cluster_id=cluster_id,
94-
allow_reuse=True,
95-
num_workers=3
96-
)
97-
98-
Databricks_Model_Scoring = DatabricksStep(
99-
name="Databricks_Scoring",
100-
notebook_path="/Repos/"+ ARM_CLIENT_ID + "/Sandbox/mlOps/modelOps/data_science/nyc_taxi/score.py",
101-
#notebook_params={'myparam': 'testparam',
102-
# 'myparam2': pipeline_param},
103-
run_name='Databricks_Scoring',
104-
compute_target=databricks_compute,
105-
existing_cluster_id=cluster_id,
106-
allow_reuse=True,
107-
num_workers=3
108-
)
10985

110-
step_sequence = StepSequence(steps=[Databricks_Featurization_Step, Databricks_Model_Training, Databricks_Model_Scoring])
86+
step_sequence = StepSequence(steps=[nyc_taxi_e2e_mlops])
11187
pipeline = Pipeline(workspace=ws, steps=step_sequence)
11288
pipeline.validate()
11389

0 commit comments

Comments
 (0)