Skip to content

Commit 0375213

Browse files
committed
Added databricks asset bundle files
1 parent b3394f8 commit 0375213

File tree

7 files changed

+61
-109
lines changed

7 files changed

+61
-109
lines changed

databricks.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ bundle:
55
include:
66
- "resources/**/*.yml"
77
- "targets/dev.yml"
8-
- "targets/test.yml"
9-
- "targets/prod.yml"
8+
# - "targets/test.yml"
9+
# - "targets/prod.yml"
1010

1111
variables:
1212
catalog:
@@ -16,9 +16,15 @@ variables:
1616
environment:
1717
description: "Deployment environment"
1818
default: "dev"
19+
20+
trigger_pause_status:
21+
description: "Default pause status for scheduled jobs"
22+
default: "PAUSED"
1923

2024
# Global permissions
2125
permissions:
26+
- user_name: "pambati@centrilogic.com"
27+
level: "CAN_MANAGE"
2228
- level: "CAN_MANAGE"
2329
group_name: "iot-data-engineers"
2430
- level: "CAN_VIEW"

resources/clusters/shared_cluster.yml

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,8 @@ resources:
3636
dbfs:
3737
destination: "dbfs:/cluster-logs/${var.environment}"
3838

39-
# Install required libraries
40-
libraries:
41-
- pypi:
42-
package: "requests>=2.28.0"
43-
- pypi:
44-
package: "pandas>=1.5.0"
45-
- pypi:
46-
package: "numpy>=1.24.0"
47-
- pypi:
48-
package: "delta-spark>=2.4.0"
39+
# Libraries should be installed via job configuration or init scripts
40+
# libraries field is not supported in clusters resource
4941

5042
# Custom tags for cost tracking
5143
custom_tags:

resources/jobs/config_job.yml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ resources:
3434
description: "Create configuration tables for the environment"
3535
job_cluster_key: "config_cluster"
3636
notebook_task:
37-
notebook_path: "notebooks/config/create_config_tables"
37+
notebook_path: "notebooks/config/create_config_tables.py"
3838
source: "WORKSPACE"
3939
base_parameters:
4040
catalog: "${var.catalog}"
@@ -52,12 +52,8 @@ resources:
5252
# Performance optimization
5353
performance_target: "STANDARD"
5454

55-
# Access control
56-
access_control_list:
57-
- user_name: "odiakonova@seaspancorp.com"
58-
permission_level: "IS_OWNER"
59-
- group_name: "iot-data-engineers"
60-
permission_level: "CAN_MANAGE_RUN"
55+
# Access control should be managed via permissions in bundle configuration
56+
# access_control_list field is not supported in job resources
6157

6258
# Tags for cost tracking and organization
6359
tags:

resources/jobs/iot_ingestion_job.yml

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ resources:
1919
on_success:
2020
- id: "slack-webhook"
2121

22-
# Schedule configuration
22+
# Schedule configuration (environment-specific)
2323
schedule:
2424
quartz_cron_expression: "0 0/2 * * * ?"
2525
timezone_id: "UTC"
26-
pause_status: "${var.trigger_pause_status}"
26+
pause_status: "PAUSED" # Default to paused, override in target configs
2727

2828
# Job cluster configuration
2929
job_clusters:
@@ -53,7 +53,7 @@ resources:
5353
description: "Load data from IoT API endpoints"
5454
job_cluster_key: "iot_processing_cluster"
5555
notebook_task:
56-
notebook_path: "notebooks/iot_ingestion/api_ingestion_framework"
56+
notebook_path: "notebooks/iot_ingestion/api_ingestion_framework.py"
5757
source: "WORKSPACE"
5858
base_parameters:
5959
catalog: "${var.catalog}"
@@ -70,7 +70,7 @@ resources:
7070
- task_key: "load_api_data"
7171
job_cluster_key: "iot_processing_cluster"
7272
notebook_task:
73-
notebook_path: "notebooks/iot_ingestion/load_data_into_bronze"
73+
notebook_path: "notebooks/iot_ingestion/load_data_into_bronze.py"
7474
source: "WORKSPACE"
7575
base_parameters:
7676
catalog: "${var.catalog}"
@@ -88,7 +88,7 @@ resources:
8888
- task_key: "load_to_bronze"
8989
job_cluster_key: "iot_processing_cluster"
9090
notebook_task:
91-
notebook_path: "notebooks/iot_ingestion/create_final_bronze_table"
91+
notebook_path: "notebooks/iot_ingestion/create_final_bronze_table.py"
9292
source: "WORKSPACE"
9393
base_parameters:
9494
catalog: "${var.catalog}"
@@ -101,14 +101,8 @@ resources:
101101
max_retries: 1
102102
min_retry_interval_millis: 60000
103103

104-
# Access control
105-
access_control_list:
106-
- user_name: "odiakonova@seaspancorp.com"
107-
permission_level: "IS_OWNER"
108-
- group_name: "iot-data-engineers"
109-
permission_level: "CAN_MANAGE_RUN"
110-
- group_name: "iot-analysts"
111-
permission_level: "CAN_VIEW"
104+
# Access control should be managed via permissions in bundle configuration
105+
# access_control_list field is not supported in job resources
112106

113107
# Tags for cost tracking and organization
114108
tags:

targets/dev.yml

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,28 @@
1-
# Development Environment Configuration
2-
variables:
3-
catalog: "dev_test"
4-
environment: "dev"
1+
targets:
2+
dev:
3+
# Development Environment Configuration
4+
variables:
5+
catalog:
6+
default: "dev_test"
7+
environment:
8+
default: "dev"
59

6-
workspace:
7-
host: "https://adb-dev-88474350318135.15.azuredatabricks.net" # Dev workspace
8-
root_path: "/Users/${workspace.current_user}/.bundle/${bundle.name}/${bundle.target}"
10+
workspace:
11+
host: "https://adb-787779827200774.14.azuredatabricks.net/" # Dev workspace
12+
root_path: "/Workspace/Shared/.bundle/${bundle.name}/${bundle.target}"
13+
14+
sync:
15+
include:
16+
- "notebooks/**"
917

10-
mode: development
11-
default: true
18+
presets:
19+
name_prefix: "dev-"
20+
trigger_pause_status: "PAUSED"
21+
jobs_max_concurrent_runs: 1
22+
tags:
23+
environment: "dev"
24+
cost_center: "analytics"
25+
project: "iot-ingestion"
1226

13-
presets:
14-
name_prefix: "dev-"
15-
trigger_pause_status: "PAUSED"
16-
jobs_max_concurrent_runs: 1
17-
tags:
18-
environment: "dev"
19-
cost_center: "analytics"
20-
project: "iot-ingestion"
21-
22-
# Override job settings for development
23-
resources:
24-
jobs:
25-
iot_ingestion_job:
26-
schedule:
27-
pause_status: "PAUSED"
28-
job_clusters:
29-
- job_cluster_key: "iot_processing_cluster"
30-
new_cluster:
31-
num_workers: 1
32-
autoscale:
33-
min_workers: 1
34-
max_workers: 2
27+
# Job overrides should be handled via variables or presets
28+
# Remove duplicate job definitions to avoid conflicts

targets/prod.yml

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
# Production Environment Configuration
22
variables:
3-
catalog: "prod"
4-
environment: "prod"
3+
catalog:
4+
default: "prod"
5+
environment:
6+
default: "prod"
57

68
workspace:
79
host: "https://adb-prod-88474350318135.15.azuredatabricks.net" # Production workspace
8-
root_path: "/Shared/bundles/${bundle.name}/${bundle.target}"
10+
root_path: "/Workspace/Shared/.bundle/${bundle.name}/${bundle.target}"
911

10-
mode: production
12+
# Databricks Asset Bundle target configuration
1113

1214
presets:
1315
name_prefix: "prod-"
@@ -29,24 +31,5 @@ permissions:
2931
- level: "CAN_VIEW"
3032
group_name: "business-analysts"
3133

32-
# Override job settings for production
33-
resources:
34-
jobs:
35-
iot_ingestion_job:
36-
schedule:
37-
quartz_cron_expression: "0 0/2 * * * ?"
38-
timezone_id: "UTC"
39-
pause_status: "UNPAUSED"
40-
job_clusters:
41-
- job_cluster_key: "iot_processing_cluster"
42-
new_cluster:
43-
num_workers: 4
44-
autoscale:
45-
min_workers: 2
46-
max_workers: 8
47-
enable_elastic_disk: true
48-
49-
config_tables_job:
50-
# Config job typically runs on-demand in production
51-
schedule:
52-
pause_status: "PAUSED"
34+
# Job overrides should be handled via variables or presets
35+
# Remove duplicate job definitions to avoid conflicts

targets/test.yml

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
# Test Environment Configuration
22
variables:
3-
catalog: "iot_test"
4-
environment: "test"
3+
catalog:
4+
default: "iot_test"
5+
environment:
6+
default: "test"
57

68
workspace:
79
host: "https://adb-test-88474350318135.15.azuredatabricks.net" # Test workspace
8-
root_path: "/Shared/bundles/${bundle.name}/${bundle.target}"
10+
root_path: "/Workspace/Shared/.bundle/${bundle.name}/${bundle.target}"
911

10-
mode: development
12+
# Databricks Asset Bundle target configuration
1113

1214
presets:
1315
name_prefix: "test-"
@@ -22,20 +24,5 @@ presets:
2224
run_as:
2325
service_principal_name: "12345678-1234-1234-1234-123456789012" # Shared service principal client ID
2426

25-
# Override job settings for test environment
26-
resources:
27-
jobs:
28-
iot_ingestion_job:
29-
schedule:
30-
pause_status: "PAUSED"
31-
job_clusters:
32-
- job_cluster_key: "iot_processing_cluster"
33-
new_cluster:
34-
num_workers: 2
35-
autoscale:
36-
min_workers: 1
37-
max_workers: 3
38-
39-
config_tables_job:
40-
schedule:
41-
pause_status: "PAUSED"
27+
# Job overrides should be handled via variables or presets
28+
# Remove duplicate job definitions to avoid conflicts

0 commit comments

Comments
 (0)