Skip to content

Commit 5e0f031

Browse files
Merge pull request #25 from databrickslabs/feature/dlt-meta-uc-cli
Added - dtlmeta labs cli feature - unit test coverage for uc_enabled feature in onboarding and dataflowpipeline
2 parents ff03b9c + a244ee4 commit 5e0f031

34 files changed

+1728
-115
lines changed

.coveragerc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ include = src/*.py
55
omit =
66
*/site-packages/*
77
tests/*
8+
src/install.py
9+
src/config.py
10+
src/cli.py
811

912
[report]
1013
exclude_lines =

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,4 +151,7 @@ deployment-merged.yaml
151151
.vscode/
152152

153153
# ignore integration test onboarding file.
154-
integration-tests/conf/dlt-meta/onboarding.json
154+
integration-tests/conf/dlt-meta/onboarding.json
155+
156+
.databricks
157+
.databricks-login.json

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
[Please read through the Keep a Changelog (~5min)](https://keepachangelog.com/en/1.0.0/).
1111

12+
## [v.0.0.5]
13+
- enabled UC (link to PR)
14+
- databricks labs cli integration (link to PR)
15+
1216
## [v0.0.4] - 2023-10-09
1317
### Added
1418
- Functionality to introduce an new option for event hub configuration. Namely a source_details option 'eventhub.accessKeySecretName' to properly construct the eh_shared_key_value properly. Without this option, there were errors while connecting to the event hub service (linked to [issue-13 - java.lang.RuntimeException: non-nullable field authBytes was serialized as null #13](https://github.com/databrickslabs/dlt-meta/issues/13))

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
clean:
2+
rm -fr build .databricks dlt_meta.egg-info
3+
4+
dev:
5+
python3 -m venv .databricks
6+
.databricks/bin/python -m pip install -e .

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ With this framework you need to record the source and target metadata in an onbo
6868

6969
## Getting Started
7070
Refer to the [Getting Started](https://databrickslabs.github.io/dlt-meta/getting_started)
71+
### Databricks Labs DLT-META CLI lets you run onboard and deploy in interactive python terminal
72+
- ```databricks labs dlt-meta onboard```
73+
- ```databricks labs dlt-meta deploy```
7174

7275
## More questions
7376
Refer to the [FAQ](https://databrickslabs.github.io/dlt-meta/faq)

demo/launch_dais_demo.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ class DLTMETADAISDemo(DLTMETARunner):
1818
- workspace_client: Databricks workspace client
1919
- base_dir: base directory
2020
"""
21-
def __init__(self, args, workspace_client, base_dir):
21+
def __init__(self, args, ws, base_dir):
2222
self.args = args
23-
self.workspace_client = workspace_client
23+
self.ws = ws
2424
self.base_dir = base_dir
25-
25+
2626
def init_runner_conf(self) -> DLTMetaRunnerConf:
2727
"""
2828
Initialize the runner configuration.
@@ -33,13 +33,13 @@ def init_runner_conf(self) -> DLTMetaRunnerConf:
3333
run_id = uuid.uuid4().hex
3434
runner_conf = DLTMetaRunnerConf(
3535
run_id=run_id,
36-
username=self.args.__dict__['username'],
36+
username=self._my_username(self.ws),
3737
dbfs_tmp_path=f"{self.args.__dict__['dbfs_path']}/{run_id}",
3838
int_tests_dir="file:./demo",
3939
dlt_meta_schema=f"dlt_meta_dataflowspecs_demo_{run_id}",
4040
bronze_schema=f"dlt_meta_bronze_dais_demo_{run_id}",
4141
silver_schema=f"dlt_meta_silver_dais_demo_{run_id}",
42-
runners_nb_path=f"/Users/{self.args.__dict__['username']}/dlt_meta_dais_demo/{run_id}",
42+
runners_nb_path=f"/Users/{self._my_username(self.ws)}/dlt_meta_dais_demo/{run_id}",
4343
node_type_id=cloud_node_type_id_dict[self.args.__dict__['cloud_provider_name']],
4444
dbr_version=self.args.__dict__['dbr_version'],
4545
cloudfiles_template="demo/conf/onboarding.template",
@@ -70,7 +70,7 @@ def run(self, runner_conf: DLTMetaRunnerConf):
7070
print(e)
7171
# finally:
7272
# self.clean_up(runner_conf)
73-
73+
7474
def launch_workflow(self, runner_conf: DLTMetaRunnerConf):
7575
"""
7676
Launch the workflow for DLT-META DAIS DEMO.
@@ -82,9 +82,9 @@ def launch_workflow(self, runner_conf: DLTMetaRunnerConf):
8282
runner_conf.job_id = created_job.job_id
8383
print(f"Job created successfully. job_id={created_job.job_id}, started run...")
8484
print(f"Waiting for job to complete. run_id={created_job.job_id}")
85-
run_by_id = self.workspace_client.jobs.run_now(job_id=created_job.job_id).result()
85+
run_by_id = self.ws.jobs.run_now(job_id=created_job.job_id).result()
8686
print(f"Job run finished. run_id={run_by_id}")
87-
87+
8888
def create_daisdemo_workflow(self, runner_conf: DLTMetaRunnerConf):
8989
"""
9090
Create the workflow for DLT-META DAIS DEMO.
@@ -96,7 +96,7 @@ def create_daisdemo_workflow(self, runner_conf: DLTMetaRunnerConf):
9696
- created_job: created job object
9797
"""
9898
database, dlt_lib = self.init_db_dltlib(runner_conf)
99-
return self.workspace_client.jobs.create(
99+
return self.ws.jobs.create(
100100
name=f"dltmeta_dais_demo-{runner_conf.run_id}",
101101
tasks=[
102102
jobs.Task(
@@ -174,7 +174,6 @@ def create_daisdemo_workflow(self, runner_conf: DLTMetaRunnerConf):
174174

175175

176176
dais_args_map = {"--profile": "provide databricks cli profile name, if not provide databricks_host and token",
177-
"--username": "provide databricks username, this is required to upload runners notebook",
178177
"--source": "provide source. Supported values are cloudfiles, eventhub, kafka",
179178
"--uc_catalog_name": "provide databricks uc_catalog name, \
180179
this is required to create volume, schema, table",
@@ -183,7 +182,7 @@ def create_daisdemo_workflow(self, runner_conf: DLTMetaRunnerConf):
183182
"--dbfs_path": "Provide databricks workspace dbfs path where you want run integration tests \
184183
e.g --dbfs_path=dbfs:/tmp/DLT-META/"}
185184

186-
dais_mandatory_args = ["username", "source", "cloud_provider_name",
185+
dais_mandatory_args = ["source", "cloud_provider_name",
187186
"dbr_version", "dbfs_path"]
188187

189188

demo/launch_techsummit_demo.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ class DLTMETATechSummitDemo(DLTMETARunner):
6262
- workspace_client: Databricks workspace client.
6363
- base_dir: Base directory.
6464
"""
65-
def __init__(self, args, workspace_client, base_dir):
65+
def __init__(self, args, ws, base_dir):
6666
self.args = args
67-
self.workspace_client = workspace_client
67+
self.ws = ws
6868
self.base_dir = base_dir
69-
69+
7070
def init_runner_conf(self) -> TechsummitRunnerConf:
7171
"""
7272
Initializes the TechsummitRunnerConf object with the provided configuration parameters.
@@ -78,19 +78,20 @@ def init_runner_conf(self) -> TechsummitRunnerConf:
7878
print(f"run_id={run_id}")
7979
runner_conf = TechsummitRunnerConf(
8080
run_id=run_id,
81+
username=self._my_username(self.ws),
8182
dbfs_tmp_path=f"{self.args.__dict__['dbfs_path']}/{run_id}",
8283
dlt_meta_schema=f"dlt_meta_dataflowspecs_demo_{run_id}",
8384
bronze_schema=f"dlt_meta_bronze_demo_{run_id}",
8485
silver_schema=f"dlt_meta_silver_demo_{run_id}",
8586
runners_full_local_path='./demo/dbc/tech_summit_dlt_meta_runners.dbc',
86-
runners_nb_path=f"/Users/{self.args.__dict__['username']}/dlt_meta_techsummit_demo/{run_id}",
87+
runners_nb_path=f"/Users/{self._my_username(self.ws)}/dlt_meta_techsummit_demo/{run_id}",
8788
node_type_id=cloud_node_type_id_dict[self.args.__dict__['cloud_provider_name']],
8889
dbr_version=self.args.__dict__['dbr_version'],
8990
env="prod",
9091
table_count=self.args.__dict__['table_count'] if self.args.__dict__['table_count'] else "100",
9192
table_column_count=(self.args.__dict__['table_column_count'] if self.args.__dict__['table_column_count']
9293
else "5"),
93-
table_data_rows_count=(self.args.__dict__['table_data_rows_count']
94+
table_data_rows_count=(self.args.__dict__['table_data_rows_count']
9495
if self.args.__dict__['table_data_rows_count'] else "10"),
9596
worker_nodes=self.args.__dict__['worker_nodes'] if self.args.__dict__['worker_nodes'] else "4",
9697
source=self.args.__dict__['source'],
@@ -110,24 +111,24 @@ def init_dltmeta_runner_conf(self, runner_conf: DLTMetaRunnerConf):
110111
- runner_conf: The DLTMetaRunnerConf object containing the runner configuration parameters.
111112
"""
112113
fp = open(runner_conf.runners_full_local_path, "rb")
113-
self.workspace_client.workspace.mkdirs(runner_conf.runners_nb_path)
114-
self.workspace_client.workspace.upload(path=f"{runner_conf.runners_nb_path}/runners",
115-
format=ImportFormat.DBC, content=fp.read())
114+
self.ws.workspace.mkdirs(runner_conf.runners_nb_path)
115+
self.ws.workspace.upload(path=f"{runner_conf.runners_nb_path}/runners",
116+
format=ImportFormat.DBC, content=fp.read())
116117
if runner_conf.uc_catalog_name:
117-
SchemasAPI(self.workspace_client.api_client).create(catalog_name=runner_conf.uc_catalog_name,
118-
name=runner_conf.dlt_meta_schema,
119-
comment="dlt_meta framework schema")
120-
volume_info = self.workspace_client.volumes.create(catalog_name=runner_conf.uc_catalog_name,
121-
schema_name=runner_conf.dlt_meta_schema,
122-
name=runner_conf.uc_volume_name,
123-
volume_type=VolumeType.MANAGED)
118+
SchemasAPI(self.ws.api_client).create(catalog_name=runner_conf.uc_catalog_name,
119+
name=runner_conf.dlt_meta_schema,
120+
comment="dlt_meta framework schema")
121+
volume_info = self.ws.volumes.create(catalog_name=runner_conf.uc_catalog_name,
122+
schema_name=runner_conf.dlt_meta_schema,
123+
name=runner_conf.uc_volume_name,
124+
volume_type=VolumeType.MANAGED)
124125
runner_conf.volume_info = volume_info
125-
SchemasAPI(self.workspace_client.api_client).create(catalog_name=runner_conf.uc_catalog_name,
126-
name=runner_conf.bronze_schema,
127-
comment="bronze_schema")
128-
SchemasAPI(self.workspace_client.api_client).create(catalog_name=runner_conf.uc_catalog_name,
129-
name=runner_conf.silver_schema,
130-
comment="silver_schema")
126+
SchemasAPI(self.ws.api_client).create(catalog_name=runner_conf.uc_catalog_name,
127+
name=runner_conf.bronze_schema,
128+
comment="bronze_schema")
129+
SchemasAPI(self.ws.api_client).create(catalog_name=runner_conf.uc_catalog_name,
130+
name=runner_conf.silver_schema,
131+
comment="silver_schema")
131132

132133
self.build_and_upload_package(runner_conf) # comment this line before merging to master
133134

@@ -160,9 +161,9 @@ def launch_workflow(self, runner_conf: DLTMetaRunnerConf):
160161
runner_conf.job_id = created_job.job_id
161162
print(f"Job created successfully. job_id={created_job.job_id}, started run...")
162163
print(f"Waiting for job to complete. run_id={created_job.job_id}")
163-
run_by_id = self.workspace_client.jobs.run_now(job_id=created_job.job_id).result()
164+
run_by_id = self.ws.jobs.run_now(job_id=created_job.job_id).result()
164165
print(f"Job run finished. run_id={run_by_id}")
165-
166+
166167
def create_techsummit_demo_workflow(self, runner_conf: TechsummitRunnerConf):
167168
"""
168169
Creates the workflow for the Techsummit Demo by defining the tasks and their dependencies.
@@ -174,7 +175,7 @@ def create_techsummit_demo_workflow(self, runner_conf: TechsummitRunnerConf):
174175
- created_job: The created job object.
175176
"""
176177
database, dlt_lib = self.init_db_dltlib(runner_conf)
177-
return self.workspace_client.jobs.create(
178+
return self.ws.jobs.create(
178179
name=f"dlt-meta-dais-demo-{runner_conf.run_id}",
179180
tasks=[
180181
jobs.Task(
@@ -240,7 +241,6 @@ def create_techsummit_demo_workflow(self, runner_conf: TechsummitRunnerConf):
240241

241242

242243
techsummit_args_map = {"--profile": "provide databricks cli profile name, if not provide databricks_host and token",
243-
"--username": "provide databricks username, this is required to upload runners notebook",
244244
"--source": "provide --source=cloudfiles",
245245
"--uc_catalog_name": "provide databricks uc_catalog name, \
246246
this is required to create volume, schema, table",
@@ -253,7 +253,7 @@ def create_techsummit_demo_workflow(self, runner_conf: TechsummitRunnerConf):
253253
"--table_data_rows_count": "table_data_rows_count"
254254
}
255255

256-
techsummit_mandatory_args = ["username", "source", "cloud_provider_name", "dbr_version", "dbfs_path"]
256+
techsummit_mandatory_args = ["source", "cloud_provider_name", "dbr_version", "dbfs_path"]
257257

258258

259259
def main():

integration_tests/cleanup_script.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def process_arguments():
1919
parser.add_argument("--profile",
2020
help="provide databricks cli profile name, if not provide databricks_host and token")
2121
parser.add_argument("--uc_catalog_name",
22-
help="provide uc_catalog_name")
22+
help="provide uc_catalog_name")
2323
args = parser.parse_args()
2424
return args
2525

@@ -32,7 +32,7 @@ def main():
3232
# job_list = workspace_client.jobs.list()
3333
# for job in job_list:
3434
# print(f"Deleting job:{job.creator_user_name}")
35-
#workspace_client.jobs.delete(job.job_id)
35+
# workspace_client.jobs.delete(job.job_id)
3636
# list = workspace_client.pipelines.list_pipelines(filter="name like 'dlt-meta-integration-test-silver-%'")
3737
# print("List of pipelines:")
3838
# for pipeline in list:
@@ -42,7 +42,7 @@ def main():
4242
# print("List of pipelines:")
4343
# for pipeline in list:
4444
# print(f"id = {pipeline.pipeline_id} , name = {pipeline.name}")
45-
# workspace_client.pipelines.delete(pipeline.pipeline_id)
45+
# workspace_client.pipelines.delete(pipeline.pipeline_id)
4646
uc_catalog_name = args.uc_catalog_name
4747
schema_list = workspace_client.schemas.list(catalog_name=uc_catalog_name)
4848
for schema in schema_list:
@@ -60,4 +60,4 @@ def main():
6060

6161

6262
if __name__ == "__main__":
63-
main()
63+
main()

0 commit comments

Comments
 (0)