66 node_type_id : ' Standard_DS3_v2'
77 driver_node_type_id : ' Standard_DS3_v2'
88 num_workers : 1
9- # To reduce start up time for each job, it is advisable to use a cluster pool. To do so involves supplying the following
10- # two fields with a pool_id to acquire both the driver and instances from.
11- # If driver_instance_pool_id and instance_pool_id are set, both node_type_id and driver_node_type_id CANNOT be supplied.
12- # As such, if providing a pool_id for driver and worker instances, please ensure that node_type_id and driver_node_type_id are not present
13- # driver_instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
14- # instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
159
1610 dev-cluster-config : &dev-cluster-config
1711 new_cluster :
@@ -31,50 +25,6 @@ build:
3125environments :
3226 default :
3327 workflows :
34-
35- - name : JOB_WORKFLOW_NYC_TAXI
36- tasks :
37- - task_key : " Data_Ingestion_And_Prep"
38- << : *dev-cluster-config
39- spark_python_task :
40- python_file : " {{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['FILE_PATH']}}"
41- libraries : [
42- whl : " {{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['WHL_PATH']}}"
43- ]
44-
45- - task_key : " Feature_Engineering"
46- << : *dev-cluster-config
47- spark_python_task :
48- python_file : " {{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['FILE_PATH']}}"
49- parameters : ["{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['PARAMETERS']['ENV']}}", "{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['PARAMETERS']['FILE']}}"]
50- libraries : [
51- whl : " {{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['WHL_PATH']}}"
52- ]
53- depends_on :
54- - task_key : " Data_Ingestion_And_Prep"
55-
56- - task_key : " Train_Register"
57- << : *dev-cluster-config
58- spark_python_task :
59- python_file : " {{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['FILE_PATH']}}"
60- parameters : ["{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['ENV']}}", "{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['FILE']}}"]
61- libraries : [
62- whl : " {{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['WHL_PATH']}}"
63- ]
64- depends_on :
65- - task_key : " Feature_Engineering"
66-
67- - task_key : " Model_Inference"
68- << : *dev-cluster-config
69- spark_python_task :
70- python_file : " {{var['ML_PIPELINE_FILES']['MODEL_INFERENCE']['FILE_PATH']}}"
71- parameters : ["{{var['ML_PIPELINE_FILES']['MODEL_INFERENCE']['PARAMETERS']['ENV']}}", "{{var['ML_PIPELINE_FILES']['MODEL_INFERENCE']['PARAMETERS']['FILE']}}"]
72- libraries : [
73- whl : " {{var['ML_PIPELINE_FILES']['MODEL_INFERENCE']['WHL_PATH']}}"
74- ]
75- depends_on :
76- - task_key : " Train_Register"
77-
7828 - name : NYC_TAXI
7929 tasks :
8030 - task_key : " NYC_TAXI"
0 commit comments