Skip to content

Commit 3547543

Browse files
committed
Developing DS Code
1 parent 7a78e2f commit 3547543

File tree

6 files changed

+82
-29
lines changed

6 files changed

+82
-29
lines changed

.github/workflows/taskDatabricks.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,8 @@ jobs:
244244
dbx configure --enable-inplace-jinja-support --profile default
245245
246246
# -e replaced with "default" instead of $ENVIRONMENT
247-
dbx deploy --workflows JOB_WORKFLOW_NYC_TAXI,TRAIN_REGISTER --no-package --deployment-file mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow.yaml \
248-
-e default --jinja-variables-file=mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow_params.yaml
247+
dbx deploy --workflows NYC_TAXI --no-package --deployment-file mlOps/nyc_taxi/databricks_pipelines/workflow.yaml \
248+
-e default --jinja-variables-file=mlOps/nyc_taxi/databricks_pipelines/workflow_params.yaml
249249
250250
env:
251251
ENVIRONMENT: ${{ inputs.ENVIRONMENT }}

data_science/src_nyc_taxi/src.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Databricks notebook source
2+
3+
from featurization import run_feature_store_refresh
4+
run_feature_store_refresh()
5+
6+
# COMMAND ----------
7+
from training import run_training
8+
9+
run_training(
10+
experiment_name = "nyc_e2e_mlops",
11+
model_name = "taxi_example_fare_packaged",
12+
model_params = {
13+
"objective": "regression",
14+
"metric": "rmse",
15+
"num_leaves": 25,
16+
"learning_rate": 0.2,
17+
"bagging_fraction": 0.9,
18+
"feature_fraction": 0.9,
19+
"bagging_seed": 42,
20+
"verbosity": -1,
21+
"seed": 42
22+
}
23+
)
24+
from registration import run_registration
25+
run_registration(
26+
model_name = "taxi_example_fare_packaged"
27+
)

experiments/notebooks/ciaran_experiments/nyc_taxi/nyc_taxi_lgbm_1.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,17 @@
66
# COMMAND ----------
77
from training import run_training
88

9+
10+
11+
12+
913
run_training(
1014
experiment_name = "ciaran_experiment_nyc_taxi",
1115
model_name = "taxi_example_fare_packaged",
1216
model_params = {
1317
"objective": "regression",
1418
"metric": "rmse",
15-
"num_leaves": 32,
19+
"num_leaves": 25,
1620
"learning_rate": 0.2,
1721
"bagging_fraction": 0.9,
1822
"feature_fraction": 0.9,
@@ -21,7 +25,6 @@
2125
"seed": 42
2226
}
2327
)
24-
# COMMAND ----------
2528
from registration import run_registration
2629
run_registration(
2730
model_name = "taxi_example_fare_packaged"
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
ModelOne:
2+
objective": "regression"
3+
"metric": "rmse"
4+
"num_leaves": 25
5+
"learning_rate": 0.2
6+
"bagging_fraction": 0.9
7+
"feature_fraction": 0.9
8+
"bagging_seed": 42
9+
"verbosity": -1
10+
"seed": 42
11+
12+
ModelTwo:
13+
objective": "regression"
14+
"metric": "rmse"
15+
"num_leaves": 27
16+
"learning_rate": 0.3
17+
"bagging_fraction": 0.9
18+
"feature_fraction": 0.9
19+
"bagging_seed": 42
20+
"verbosity": -1
21+
"seed": 42
22+
23+
ModelThree:
24+
objective": "regression"
25+
"metric": "rmse"
26+
"num_leaves": 30
27+
"learning_rate": 0.4
28+
"bagging_fraction": 0.9
29+
"feature_fraction": 0.9
30+
"bagging_seed": 42
31+
"verbosity": -1
32+
"seed": 42

mlOps/nyc_taxi/databricks_pipelines/workflow.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@ environments:
7575
depends_on:
7676
- task_key: "Train_Register"
7777

78-
- name: TRAIN_REGISTER
78+
- name: NYC_TAXI
7979
tasks:
80-
- task_key: "Train_Register"
80+
- task_key: "NYC_TAXI"
8181
<<: *dev-cluster-config
8282
spark_python_task:
83-
python_file: "{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['FILE_PATH']}}"
84-
parameters: ["{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['ENV']}}", "{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['FILE']}}"]
83+
python_file: "{{var['ML_PIPELINE_FILES']['NYC_TAXI']['FILE_PATH']}}"
84+
parameters: ["{{var['ML_PIPELINE_FILES']['NYC_TAXI']['PARAMETERS']['ENV']}}", "{{var['ML_PIPELINE_FILES']['NYC_TAXI']['PARAMETERS']['FILE']}}"]
8585
libraries: [
86-
whl: "{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['WHL_PATH']}}"
86+
whl: "{{var['ML_PIPELINE_FILES']['NYC_TAXI']['WHL_PATH']}}"
8787
]
Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,27 @@
11
ML_PIPELINE_FILES:
2-
DATA_INGEST_PREP:
3-
FILE_PATH: 'file://mlOps/dataOps/nyc_taxi/data_prep.py'
4-
WHL_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/dist/helperfunctions-0.0.1-py3-none-any.whl'
5-
6-
FEATURE_ENGINEERING:
7-
FILE_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/feature_eng.py'
8-
WHL_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/dist/helperfunctions-0.0.1-py3-none-any.whl'
2+
NYC_TAXI:
3+
FILE_PATH: 'file://data_science/src_nyc_taxi/src.py'
4+
WHL_PATH: 'file://data_science/src_nyc_taxi/dist/src_nyc_taxi-0.0.1-py3-none-any.whl'
95
PARAMETERS:
106
ENV: '--env'
11-
FILE: 'file:fuse://mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow_params.yaml'
12-
EXPERIMENT_NAME: 'dbx_workflow_fe'
7+
FILE: 'file:fuse://mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml'
8+
EXPERIMENT_NAME: 'dbx_workflow_nyc_taxi'
139
TRACK_IN_AZURE_ML: True
1410

15-
TRAIN_REGISTER:
16-
FILE_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/train_register.py'
17-
WHL_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/dist/helperfunctions-0.0.1-py3-none-any.whl'
18-
PARAMETERS:
19-
ENV: '--env'
20-
FILE: 'file:fuse://mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow_params.yaml'
21-
EXPERIMENT_NAME: 'dbx_workflow_train'
22-
TRACK_IN_AZURE_ML: True
2311

24-
MODEL_INFERENCE:
25-
FILE_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/score.py'
12+
13+
14+
FEATURE_ENGINEERING:
15+
FILE_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/feature_eng.py'
2616
WHL_PATH: 'file://mlOps/modelOps/data_science/nyc_taxi/pyWheels/Helper_Functions/dist/helperfunctions-0.0.1-py3-none-any.whl'
2717
PARAMETERS:
2818
ENV: '--env'
2919
FILE: 'file:fuse://mlOps/modelOps/ml_pipelines/az_databricks/cicd/workflow_params.yaml'
30-
EXPERIMENT_NAME: 'dbx_workflow_inference'
20+
EXPERIMENT_NAME: 'dbx_workflow_fe'
3121
TRACK_IN_AZURE_ML: True
3222

3323

3424

3525

3626

27+

0 commit comments

Comments
 (0)