Merge pull request #94 from ciaran28/main

ciaran28 · web-flow · commit 616b057ec013 · 2023-06-08T16:12:06.000+01:00
Bug Fix : Databricks Workflows
diff --git a/.github/workflows/taskDatabricks.yaml b/.github/workflows/taskDatabricks.yaml
@@ -90,10 +90,10 @@ jobs:
 #################################/
 ## Deploy Azure Infrastructure.                                               
 #################################/
-      - name:                     Deploy Azure Resources
-        run:                      ${{ inputs.SCRIPT_LANGUAGE }} infrastructure/databricks/databricks_utils/${{ inputs.SCRIPT_LANGUAGE }}/utils_create_azure_resources.py
-        env:
-          ENVIRONMENT:            ${{ inputs.ENVIRONMENT }}
+      #- name:                     Deploy Azure Resources
+      #  run:                      ${{ inputs.SCRIPT_LANGUAGE }} infrastructure/databricks/databricks_utils/${{ inputs.SCRIPT_LANGUAGE }}/utils_create_azure_resources.py
+      #  env:
+      #    ENVIRONMENT:            ${{ inputs.ENVIRONMENT }}
 
 
 
diff --git a/data_science/src_nyc_taxi/training/__init__.py b/data_science/src_nyc_taxi/training/__init__.py
@@ -207,8 +207,8 @@ def set_mlflow(
     ):
     if namespace.env is not None:
         params = yaml.safe_load(pathlib.Path(namespace.env).read_text())
-        experiment_name = params['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['EXPERIMENT_NAME']
-        track_in_azure_ml = params['ML_PIPELINE_FILES']['TRAIN_REGISTER']['PARAMETERS']['TRACK_IN_AZURE_ML']
+        experiment_name = params['Global']['ExperimentName']
+        track_in_azure_ml = params['Global']['AMLTraking']
 
         if track_in_azure_ml:
             if track_in_azure_ml: 
diff --git a/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml b/mlOps/nyc_taxi/databricks_pipelines/hyper_params.yaml
@@ -1,32 +1,48 @@
-ModelOne:
-  objective": "regression"
-  "metric": "rmse"
-  "num_leaves": 25
-  "learning_rate": 0.2
-  "bagging_fraction": 0.9
-  "feature_fraction": 0.9
-  "bagging_seed": 42
-  "verbosity": -1
-  "seed": 42
+Global:
+  ExperimentName: "nyc_taxi_dbx_job"
+  AMLTraking: False
 
-ModelTwo:
-  objective": "regression"
-  "metric": "rmse"
-  "num_leaves": 27
-  "learning_rate": 0.3
-  "bagging_fraction": 0.9
-  "feature_fraction": 0.9
-  "bagging_seed": 42
-  "verbosity": -1
-  "seed": 42
-
-ModelThree:
-  objective": "regression"
-  "metric": "rmse"
-  "num_leaves": 30
-  "learning_rate": 0.4
-  "bagging_fraction": 0.9
-  "feature_fraction": 0.9
-  "bagging_seed": 42
-  "verbosity": -1
-  "seed": 42
+ModelConfigs: [
+  {
+    "ModelName": "ModelOne",
+    "ModelParams": {
+      "objective": "regression",
+      "metric": "rmse",
+      "num_leaves": 25,
+      "learning_rate": 0.2,
+      "bagging_fraction": 0.9,
+      "feature_fraction": 0.9,
+      "bagging_seed": 42,
+      "verbosity": -1,
+      "seed": 42
+    }
+  },
+  {
+    "ModelName": "ModelTwo",
+    "ModelParams": {
+      "objective": "regression",
+      "metric": "rmse",
+      "num_leaves": 27,
+      "learning_rate": 0.3,
+      "bagging_fraction": 0.9,
+      "feature_fraction": 0.9,
+      "bagging_seed": 42,
+      "verbosity": -1,
+      "seed": 42
+    }
+  },
+  {
+    "ModelName": "ModelThree",
+    "ModelParams": {
+      "objective": "regression",
+      "metric": "rmse",
+      "num_leaves": 30,
+      "learning_rate": 0.4,
+      "bagging_fraction": 0.9,
+      "feature_fraction": 0.9,
+      "bagging_seed": 42,
+      "verbosity": -1,
+      "seed": 42
+    }
+  }
+]
diff --git a/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml b/mlOps/nyc_taxi/databricks_pipelines/workflow.yaml
@@ -2,7 +2,7 @@ custom:
 
   # Cluster configs for each environment
   default-cluster-spec: &default-cluster-spec
-    spark_version: '11.3.x-cpu-ml-scala2.12'
+    spark_version: '13.0.x-cpu-ml-scala2.12'
     node_type_id: 'Standard_DS3_v2' 
     driver_node_type_id: 'Standard_DS3_v2'  
     num_workers: 1