Skip to content

Commit 9073224

Browse files
- Added updating paths to onboarding template under cli
- Added readme instructions for using cli with example
1 parent a244ee4 commit 9073224

File tree

2 files changed

+141
-2
lines changed

2 files changed

+141
-2
lines changed

README.md

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,94 @@ With this framework you need to record the source and target metadata in an onbo
6969
## Getting Started
7070
Refer to the [Getting Started](https://databrickslabs.github.io/dlt-meta/getting_started)
7171
### Databricks Labs DLT-META CLI lets you run onboard and deploy in interactive python terminal
72+
- ``` git clone dlt-meta ```
73+
- ``` cd dlt-meta ```
7274
- ```databricks labs dlt-meta onboard```
75+
- - Above command will prompt you to provide onboarding details. If you have cloned dlt-meta git repo then accept defaults which will launch config from demo folder
76+
```Provide onboarding file path (default: demo/conf/onboarding.template):
77+
Provide onboarding files local directory (default: demo/):
78+
Provide dbfs path (default: dbfs:/dlt-meta_cli_demo):
79+
Provide databricks runtime version (default: 14.2.x-scala2.12):
80+
Run onboarding with unity catalog enabled?
81+
[0] False
82+
[1] True
83+
Enter a number between 0 and 1: 1
84+
Provide unity catalog name: ravi_dlt_meta_uc
85+
Provide dlt meta schema name (default: dlt_meta_dataflowspecs_203b9da04bdc49f78cdc6c379d1c9ead):
86+
Provide dlt meta bronze layer schema name (default: dltmeta_bronze_cf5956873137432294892fbb2dc34fdb):
87+
Provide dlt meta silver layer schema name (default: dltmeta_silver_5afa2184543342f98f87b30d92b8c76f):
88+
Provide dlt meta layer
89+
[0] bronze
90+
[1] bronze_silver
91+
[2] silver
92+
Enter a number between 0 and 2: 1
93+
Provide bronze dataflow spec table name (default: bronze_dataflowspec):
94+
Provide silver dataflow spec table name (default: silver_dataflowspec):
95+
Overwrite dataflow spec?
96+
[0] False
97+
[1] True
98+
Enter a number between 0 and 1: 1
99+
Provide dataflow spec version (default: v1):
100+
Provide environment name (default: prod): prod
101+
Provide import author name (default: ravi.gawai):
102+
Provide cloud provider name
103+
[0] aws
104+
[1] azure
105+
[2] gcp
106+
Enter a number between 0 and 2: 0
107+
Do you want to update ws paths, catalog, schema details to your onboarding file?
108+
[0] False
109+
[1] True
110+
```
111+
- Goto your databricks workspace and located onboarding job under: Workflow->Jobs runs
112+
- Once onboarding jobs is finished deploy `bronze` and `silver` DLT using below command
73113
- ```databricks labs dlt-meta deploy```
114+
- - Above command will prompt you to provide dlt details. Please provide respective details for schema which you provided in above steps
115+
- - Bronze DLT
116+
```
117+
Deploy DLT-META with unity catalog enabled?
118+
[0] False
119+
[1] True
120+
Enter a number between 0 and 1: 1
121+
Provide unity catalog name: ravi_dlt_meta_uc
122+
Deploy DLT-META with serverless?
123+
[0] False
124+
[1] True
125+
Enter a number between 0 and 1: 1
126+
Provide dlt meta layer
127+
[0] bronze
128+
[1] silver
129+
Enter a number between 0 and 1: 0
130+
Provide dlt meta onboard group: A1
131+
Provide dlt_meta dataflowspec schema name: dlt_meta_dataflowspecs_203b9da04bdc49f78cdc6c379d1c9ead
132+
Provide bronze dataflowspec table name (default: bronze_dataflowspec):
133+
Provide dlt meta pipeline name (default: dlt_meta_bronze_pipeline_2aee3eb837f3439899eef61b76b80d53):
134+
Provide dlt target schema name: dltmeta_bronze_cf5956873137432294892fbb2dc34fdb
135+
```
74136

137+
- Silver DLT
138+
- - ```databricks labs dlt-meta deploy```
139+
- - Above command will prompt you to provide dlt details. Please provide respective details for schema which you provided in above steps
140+
```
141+
Deploy DLT-META with unity catalog enabled?
142+
[0] False
143+
[1] True
144+
Enter a number between 0 and 1: 1
145+
Provide unity catalog name: ravi_dlt_meta_uc
146+
Deploy DLT-META with serverless?
147+
[0] False
148+
[1] True
149+
Enter a number between 0 and 1: 1
150+
Provide dlt meta layer
151+
[0] bronze
152+
[1] silver
153+
Enter a number between 0 and 1: 1
154+
Provide dlt meta onboard group: A1
155+
Provide dlt_meta dataflowspec schema name: dlt_meta_dataflowspecs_203b9da04bdc49f78cdc6c379d1c9ead
156+
Provide silver dataflowspec table name (default: silver_dataflowspec):
157+
Provide dlt meta pipeline name (default: dlt_meta_silver_pipeline_2147545f9b6b4a8a834f62e873fa1364):
158+
Provide dlt target schema name: dltmeta_silver_5afa2184543342f98f87b30d92b8c76f
159+
```
75160
## More questions
76161
Refer to the [FAQ](https://databrickslabs.github.io/dlt-meta/faq)
77162
and DLT-META [documentation](https://databrickslabs.github.io/dlt-meta/)

src/cli.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,16 @@ class OnboardCommand:
4343
version: str
4444
cloud: str
4545
dlt_meta_schema: str
46+
bronze_schema: str = None
47+
silver_schema: str = None
4648
uc_enabled: bool = False
4749
uc_catalog_name: str = None
4850
overwrite: bool = True
4951
bronze_dataflowspec_table: str = "bronze_dataflowspec"
5052
silver_dataflowspec_table: str = "silver_dataflowspec"
5153
bronze_dataflowspec_path: str = None
5254
silver_dataflowspec_path: str = None
55+
update_paths: bool = True
5356

5457
def __post_init__(self):
5558
if not self.onboarding_file_path or self.onboarding_file_path == "":
@@ -147,6 +150,7 @@ def _my_username(self):
147150

148151
def onboard(self, cmd: OnboardCommand):
149152
"""Perform the onboarding process."""
153+
self.update_ws_onboarding_paths(cmd)
150154
if not self._ws.dbfs.exists(cmd.dbfs_path + "/dltmeta_conf/"):
151155
self._ws.dbfs.create(path=cmd.dbfs_path + "/dltmeta_conf/", overwrite=True)
152156
ob_file = open(cmd.onboarding_file_path, "rb")
@@ -302,9 +306,9 @@ def deploy(self, cmd: DeployCommand):
302306
def _load_onboard_config(self) -> OnboardCommand:
303307
onboard_cmd_dict = {}
304308
onboard_cmd_dict["onboarding_file_path"] = self._wsi._question(
305-
"Provide onboarding file path", default='cli_demo/conf/onboarding.json')
309+
"Provide onboarding file path", default='demo/conf/onboarding.template')
306310
onboarding_files_dir_path = self._wsi._question(
307-
"Provide onboarding files local directory", default='cli_demo/conf')
311+
"Provide onboarding files local directory", default='demo/')
308312
onboard_cmd_dict["onboarding_files_dir_path"] = f"file:./{onboarding_files_dir_path}"
309313
onboard_cmd_dict["dbfs_path"] = self._wsi._question(
310314
"Provide dbfs path", default="dbfs:/dlt-meta_cli_demo")
@@ -318,6 +322,10 @@ def _load_onboard_config(self) -> OnboardCommand:
318322
"Provide unity catalog name")
319323
onboard_cmd_dict["dlt_meta_schema"] = self._wsi._question(
320324
"Provide dlt meta schema name", default=f'dlt_meta_dataflowspecs_{uuid.uuid4().hex}')
325+
onboard_cmd_dict["bronze_schema"] = self._wsi._question(
326+
"Provide dlt meta bronze layer schema name", default=f'dltmeta_bronze_{uuid.uuid4().hex}')
327+
onboard_cmd_dict["silver_schema"] = self._wsi._question(
328+
"Provide dlt meta silver layer schema name", default=f'dltmeta_silver_{uuid.uuid4().hex}')
321329
onboard_cmd_dict["onboard_layer"] = self._wsi._choice(
322330
"Provide dlt meta layer", ['bronze', 'silver', 'bronze_silver'])
323331
if onboard_cmd_dict["onboard_layer"] == "bronze":
@@ -352,7 +360,11 @@ def _load_onboard_config(self) -> OnboardCommand:
352360
"Provide import author name", default=self._wsi._short_name)
353361
onboard_cmd_dict["cloud"] = self._wsi._choice(
354362
"Provide cloud provider name", ['aws', 'azure', 'gcp'])
363+
onboard_cmd_dict["update_paths"] = self._wsi._choice(
364+
"Update workspace/dbfs paths, unity catalog name, bronze/silver schema names in onboarding file?",
365+
['True', 'False'])
355366
cmd = OnboardCommand(**onboard_cmd_dict)
367+
356368
return cmd
357369

358370
def _load_deploy_config(self) -> DeployCommand:
@@ -400,6 +412,48 @@ def _load_deploy_config(self) -> DeployCommand:
400412
"Provide dlt target schema name")
401413
return DeployCommand(**deploy_cmd_dict)
402414

415+
def update_ws_onboarding_paths(self, cmd: OnboardCommand):
416+
"""Create onboarding file for cloudfiles as source."""
417+
with open(f"{cmd.onboarding_file_path}") as f:
418+
onboard_obj = json.load(f)
419+
420+
for data_flow in onboard_obj:
421+
for key, value in data_flow.items():
422+
if key == "source_details":
423+
for source_key, source_value in value.items():
424+
if 'dbfs_path' in source_value:
425+
data_flow[key][source_key] = source_value.format(dbfs_path=f"{cmd.dbfs_path}/dltmeta_conf/")
426+
if 'dbfs_path' in value:
427+
data_flow[key] = value.format(dbfs_path=f"{cmd.dbfs_path}/dltmeta_conf/")
428+
elif 'uc_catalog_name' in value and 'bronze_schema' in value:
429+
if cmd.uc_catalog_name:
430+
data_flow[key] = value.format(
431+
uc_catalog_name=cmd.uc_catalog_name,
432+
bronze_schema=cmd.bronze_schema
433+
)
434+
else:
435+
data_flow[key] = value.format(
436+
uc_catalog_name=cmd.bronze_schema,
437+
bronze_schema=""
438+
).replace(".", "")
439+
440+
elif 'uc_catalog_name' in value and 'silver_schema' in value:
441+
if cmd.uc_catalog_name:
442+
data_flow[key] = value.format(
443+
uc_catalog_name=cmd.uc_catalog_name,
444+
silver_schema=cmd.silver_schema
445+
)
446+
else:
447+
data_flow[key] = value.format(
448+
uc_catalog_name=cmd.silver_schema,
449+
silver_schema=""
450+
).replace(".", "")
451+
onboarding_filename = os.path.basename(cmd.onboarding_file_path)
452+
updated_ob_file_path = cmd.onboarding_file_path.replace(onboarding_filename, "onboarding.json")
453+
with open(updated_ob_file_path, "w") as onboarding_file:
454+
json.dump(onboard_obj, onboarding_file)
455+
cmd.onboarding_file_path = updated_ob_file_path
456+
403457

404458
def onboard(dltmeta: DLTMeta):
405459
logger.info("Please answer a couple of questions to for launching DLT META onboarding job")

0 commit comments

Comments
 (0)