Skip to content

Commit 883b1f5

Browse files
authored
Merge pull request #93 from ciaran28/main
Bug Fix
2 parents 397ab6b + 541c36e commit 883b1f5

File tree

15 files changed

+114
-117
lines changed

15 files changed

+114
-117
lines changed

.github/workflows/onRelease.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ jobs:
2525
2626
# IMPORTANT: The testing framework is not yet implemented, and therefore still under development.
2727
28-
cd mlOps/devOps/utils
28+
#cd mlOps/devOps/utils
2929
30-
python -m pytest -v
30+
#python -m pytest -v
3131
3232
prApproved_CD_Development:
3333
if: github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true && contains(github.head_ref, 'feature') && github.base_ref == 'main'

.github/workflows/taskDatabricks.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ jobs:
220220
name: Set Up DBX Environment Variables
221221
run: |
222222
DATABRICKS_TOKEN=$(az keyvault secret show --name "dbkstoken" --vault-name $AZ_KEYVAULT_NAME --query "value" -o tsv)
223-
echo $DATABRICKS_TOKEN
223+
#echo $DATABRICKS_TOKEN
224224
echo "DATABRICKS_TOKEN=$DATABRICKS_TOKEN" >> $GITHUB_ENV
225225
226226
@@ -235,8 +235,8 @@ jobs:
235235
# not the Databricks AAD Token.
236236
pip3 install dbx
237237
238-
echo $DATABRICKS_TOKEN
239-
echo $DATABRICKS_HOST
238+
#echo $DATABRICKS_TOKEN
239+
#echo $DATABRICKS_HOST
240240
241241
databricks -h
242242
databricks fs ls

data_science/src_nyc_taxi/src.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
"feature_fraction": 0.9,
1919
"bagging_seed": 42,
2020
"verbosity": -1,
21-
"seed": 42
21+
"seed": 42,
22+
"num_rounds": 100
2223
}
2324
)
2425
from registration import run_registration

data_science/src_nyc_taxi/training/__init__.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -358,8 +358,6 @@ def evaluate(training_df,random_state, model):
358358
)
359359

360360
evaluation_dict["r2"] = r2
361-
362-
#import pdb; pdb.set_trace()
363361

364362
mlflow.log_metric(
365363
"r2",
@@ -398,11 +396,10 @@ def train_model_lgbm(
398396
# Collect data into a Pandas array for training
399397
data = training_df.toPandas()[features_and_label]
400398
train, test = train_test_split(data, random_state=123)
399+
401400
X_train = train.drop(["fare_amount"], axis=1)
402401
y_train = train.fare_amount
403402

404-
405-
406403
mlflow.end_run()
407404
mlflow.autolog(exclusive=False)
408405
with mlflow.start_run():
@@ -416,29 +413,26 @@ def train_model_lgbm(
416413
# label=y_test.values
417414
# )
418415

419-
416+
num_rounds = model_params["num_rounds"]
417+
420418
# Train a lightGBM model
421419
model = lgb.train(
422-
#param,
423420
model_params,
424421
train_lgb_dataset,
425422
num_rounds
426423
)
427424

428-
425+
mlflow.log_param("num_rounds", num_rounds)
429426
mlflow.log_param("local_model_file_path", model_file_path)
430427

431428
evaulation_dict = evaluate(
432429
training_df=training_df,
433430
random_state=123,
434431
model=model
435432
)
436-
437-
#import pdb; pdb.set_trace()
438433

439434
mlflow.log_metrics(evaulation_dict)
440435

441-
442436
fs.log_model(
443437
model,
444438
artifact_path="model_packaged",
@@ -621,7 +615,8 @@ def dbx_execute_functions():
621615
"feature_fraction": 0.9,
622616
"bagging_seed": 42,
623617
"verbosity": -1,
624-
"seed": 42
618+
"seed": 42,
619+
"num_rounds": 100
625620
}
626621
)
627622

experiments/notebooks/ciaran_experiments/nyc_taxi/nyc_taxi_lgbm_1.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,26 @@
66
# COMMAND ----------
77
from training import run_training
88

9+
num_rounds_arr = [20,40,60,80,100,120,140]
910

10-
11-
12-
13-
run_training(
14-
experiment_name = "ciaran_experiment_nyc_taxi",
15-
model_name = "taxi_example_fare_packaged",
16-
model_params = {
17-
"objective": "regression",
18-
"metric": "rmse",
19-
"num_leaves": 25,
20-
"learning_rate": 0.2,
21-
"bagging_fraction": 0.9,
22-
"feature_fraction": 0.9,
23-
"bagging_seed": 42,
24-
"verbosity": -1,
25-
"seed": 42
26-
}
27-
)
28-
from registration import run_registration
29-
run_registration(
30-
model_name = "taxi_example_fare_packaged"
31-
)
11+
for num_rounds in num_rounds_arr:
12+
run_training(
13+
experiment_name = "ciaran_experiment_nyc_taxi",
14+
model_name = "taxi_example_fare_packaged",
15+
model_params = {
16+
"objective": "regression",
17+
"metric": "rmse",
18+
"num_leaves": 25,
19+
"learning_rate": 0.2,
20+
"bagging_fraction": 0.9,
21+
"feature_fraction": 0.9,
22+
"bagging_seed": 42,
23+
"verbosity": -1,
24+
"seed": 42,
25+
"num_rounds": num_rounds
26+
}
27+
)
28+
from registration import run_registration
29+
run_registration(
30+
model_name = "taxi_example_fare_packaged"
31+
)

infrastructure/bicep/az_templates/az_key_vault/az_key_vault.bicep

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,33 +23,6 @@ resource azKeyVault 'Microsoft.KeyVault/vaults@2021-10-01' = {
2323
enableSoftDelete: true
2424
enabledForTemplateDeployment: true
2525
accessPolicies: [
26-
{
27-
//applicationId: '<>' // Application ID of databricks SPN
28-
permissions: {
29-
// Give it the ability to set secrets // we can then get rid of the Key Vault Admin permission set in the main pipeline
30-
// Can we do this for the main spn , the equivalent of serviceConnect1
31-
secrets: [
32-
'set'
33-
'list'
34-
'get'
35-
]
36-
}
37-
tenantId: subscription().tenantId
38-
objectId: '<>'
39-
}
40-
41-
{
42-
//applicationId: '<>' // Application ID of serviceConnect1
43-
permissions: {
44-
secrets: [
45-
'set'
46-
'list'
47-
'get'
48-
]
49-
}
50-
tenantId: subscription().tenantId
51-
objectId: '<>'
52-
}
5326
]
5427
}
5528

infrastructure/databricks/databricks_utils/bash/utilsCreateDatabricksToken.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ $SECRET_EXISTS == true ]; then
2121
--query "value" \
2222
-o tsv )
2323

24-
echo "Secret Value: $DATABRICKS_TOKEN"
24+
#echo "Secret Value: $DATABRICKS_TOKEN"
2525

2626
# if [[ $DevOps_Agent == "GitHub" ]]; then
2727
# echo "DATABRICKS_TOKEN=$DATABRICKS_TOKEN" >> $GITHUB_ENV

infrastructure/databricks/databricks_utils/bash/utilsCreateRoleBasedAccess.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22

33

4-
echo "Resource Group Name: $RESOURCE_GROUP_NAME"
4+
#echo "Resource Group Name: $RESOURCE_GROUP_NAME"
55
echo "ENVIRONMENT: $ENVIRONMENT"
66
RESOURCE_GROUP_ID=$( az group show -n $RESOURCE_GROUP_NAME --query id -o tsv )
77

@@ -14,7 +14,7 @@ for row in $(echo "${JSON}" | jq -r '.RBAC_Assignments[] | @base64'); do
1414
echo ${row} | base64 --decode | jq -r ${1}
1515
}
1616
ROLES_ARRAY="$(_jq '.roles')"
17-
echo $ROLES_ARRAY
17+
#echo $ROLES_ARRAY
1818

1919
# Before: [ "Contributor", "DBX_Custom_Role", "Key Vault Administrator" ]
2020
# xargs trims whitespace on either side. -n removes newline characters.
@@ -30,7 +30,8 @@ for row in $(echo "${JSON}" | jq -r '.RBAC_Assignments[] | @base64'); do
3030
--role "$ROLE" \
3131
--assignee-object-id $(_jq '.roleBeneficiaryObjID') \
3232
--assignee-principal-type "$(_jq '.principalType')" \
33-
--scope "$RESOURCE_GROUP_ID"
33+
--scope "$RESOURCE_GROUP_ID" \
34+
-o none
3435
#--scope "$(_jq '.scope')"
3536

3637
done

infrastructure/databricks/databricks_utils/bash/utilsSetEnvVariables.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ DATABRICKS_INSTANCE="$(az databricks workspace list -g $RESOURCE_GROUP_NAME --qu
1717
WORKSPACE_ID=$(az databricks workspace list -g $RESOURCE_GROUP_NAME --query "[].id" -o tsv)
1818
AZ_KEYVAULT_NAME=$(az keyvault list -g $RESOURCE_GROUP_NAME --query "[].name" -o tsv)
1919
SUBSCRIPTION_ID=$( az account show --query id -o tsv )
20-
echo $SUBSCRIPTION_ID
21-
echo $DATABRICKS_ORDGID
22-
echo $WORKSPACE_ID
23-
echo $AZ_KEYVAULT_NAME
24-
echo $SUBSCRIPTION_ID
25-
echo $AML_WS_NAME
26-
echo $DATABRICKS_WS_NAME
20+
#echo $SUBSCRIPTION_ID
21+
#echo $DATABRICKS_ORDGID
22+
#echo $WORKSPACE_ID
23+
#echo $AZ_KEYVAULT_NAME
24+
#echo $SUBSCRIPTION_ID
25+
#echo $AML_WS_NAME
26+
#echo $DATABRICKS_WS_NAME
2727
#DATABRICKS_TOKEN=$(az keyvault secret show --name "dbkstoken" --vault-name $AZ_KEYVAULT_NAME --query "value" -o tsv)
2828

2929

infrastructure/databricks/databricks_utils/python/utils_create_cluster.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def create_clusters():
3333
cluster_param_file = _ingest_cluster_param_file('infrastructure/databricks/databricks_configs/' + ENVIRONMENT + '/clusters.json')
3434
existing_clusters, _ = _list_existing_clusters()
3535
existing_clusters_name_arr = _get_cluster_names(existing_clusters)
36-
print(existing_clusters_name_arr)
36+
#print(existing_clusters_name_arr)
3737
for cluster in cluster_param_file:
3838
if cluster['cluster_name'] not in existing_clusters_name_arr:
3939
print(f"Cluster {cluster} does not exist - Deploy.")
@@ -68,7 +68,7 @@ def _list_existing_clusters():
6868
status_code = response.status_code
6969

7070
response_content = response.json()
71-
print(response_content)
71+
#print(response_content)
7272

7373
if status_code != 200:
7474
raise Exception(status_code)

0 commit comments

Comments
 (0)