Building MloPS + Data Science Scripts

ciaran28 · ciaran28 · commit 16041c657955 · 2023-06-08T02:30:16.000+01:00
diff --git a/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml b/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml
@@ -6,12 +6,6 @@ custom:
     node_type_id: 'Standard_DS3_v2' 
     driver_node_type_id: 'Standard_DS3_v2'  
     num_workers: 1
-    # To reduce start up time for each job, it is advisable to use a cluster pool. To do so involves supplying the following
-    # two fields with a pool_id to acquire both the driver and instances from.
-    # If driver_instance_pool_id and instance_pool_id are set, both node_type_id and driver_node_type_id CANNOT be supplied.
-    # As such, if providing a pool_id for driver and worker instances, please ensure that node_type_id and driver_node_type_id are not present
-#    driver_instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
-#    instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
 
   dev-cluster-config: &dev-cluster-config
     new_cluster:
@@ -31,50 +25,6 @@ build:
 environments:
   default:
     workflows:
-    
-      - name: JOB_WORKFLOW_NYC_TAXI     
-        tasks:
-          - task_key: "Data_Ingestion_And_Prep"
-            <<: *dev-cluster-config
-            spark_python_task:
-              python_file: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['FILE_PATH']}}"
-            libraries: [ 
-              whl: "{{var['ML_PIPELINE_FILES']['DATA_INGEST_PREP']['WHL_PATH']}}" 
-            ]
-
-          - task_key: "Feature_Engineering"
-            <<: *dev-cluster-config
-            spark_python_task:
-              python_file: "{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['FILE_PATH']}}"
-              parameters: ["{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['PARAMETERS']['ENV']}}", "{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['PARAMETERS']['FILE']}}"]
-            libraries: [ 
-              whl: "{{var['ML_PIPELINE_FILES']['FEATURE_ENGINEERING']['WHL_PATH']}}" 
-            ]
-            depends_on:
-              - task_key: "Data_Ingestion_And_Prep"
-
-          - task_key: "Train_Register"
-            <<: *dev-cluster-config
-            spark_python_task:
-              python_file: "{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['FILE_PATH']}}"
-              parameters: ["{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['ENV']}}", "{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['FILE']}}"]
-            libraries: [ 
-              whl: "{{var['ML_PIPELINE_FILES']['TRAIN_REGISTER']['WHL_PATH']}}" 
-            ]
-            depends_on:
-              - task_key: "Feature_Engineering"
-
-          - task_key: "Model_Inference"
-            <<: *dev-cluster-config
-            spark_python_task:
-              python_file: "{{var['ML_PIPELINE_FILES']['MODEL_INFERENCE']['FILE_PATH']}}"
-              parameters: ["{{var['ML_PIPELINE_FILES']['MODEL_INFERENCE']['PARAMETERS']['ENV']}}", "{{var['ML_PIPELINE_FILES']['MODEL_INFERENCE']['PARAMETERS']['FILE']}}"]
-            libraries: [ 
-              whl: "{{var['ML_PIPELINE_FILES']['MODEL_INFERENCE']['WHL_PATH']}}" 
-            ]
-            depends_on:
-              - task_key: "Train_Register"
-
       - name: NYC_TAXI     
         tasks:
           - task_key: "NYC_TAXI"