Skip to content

Commit 08d79a9

Browse files
saraivdbxSara Ivanyosnfx
authored
Run toolkit notebook from a job (#164)
- [x] test not complete - [ ] permission assingment - [ ] adding users to groups - [x] acc groups - [ ] validation - [x] job is failing --------- Co-authored-by: Sara Ivanyos <[email protected]> Co-authored-by: Serge Smertin <[email protected]>
1 parent 2b8a0bb commit 08d79a9

File tree

2 files changed

+166
-30
lines changed

2 files changed

+166
-30
lines changed

notebooks/toolkit.py

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,36 +7,19 @@
77
# MAGIC This notebook provides toolkit for group migration (workspace to account).
88
# MAGIC
99
# MAGIC
10-
# MAGIC - Tested on: DBR 13.2, Single Node cluster, UC enabled (Single-User mode).
10+
# MAGIC - Tested on: Latest Databricks Runtime, Single Node cluster, UC enabled (Single-User mode).
1111
# MAGIC
1212

1313
# COMMAND ----------
1414

15-
# MAGIC %md
16-
# MAGIC
17-
# MAGIC ## Prepare imports
18-
19-
# COMMAND ----------
20-
21-
# MAGIC %reload_ext autoreload
22-
# MAGIC %autoreload 2
23-
24-
# COMMAND ----------
25-
26-
from common import pip_install_dependencies
27-
28-
pip_install_dependencies()
29-
30-
# COMMAND ----------
31-
32-
from common import update_module_imports
33-
34-
update_module_imports()
35-
36-
# COMMAND ----------
37-
15+
from databricks.labs.ucx.config import (
16+
GroupsConfig,
17+
InventoryConfig,
18+
InventoryTable,
19+
MigrationConfig,
20+
TaclConfig,
21+
)
3822
from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit
39-
from databricks.labs.ucx.config import MigrationConfig, InventoryConfig, GroupsConfig, InventoryTable, TaclConfig
4023
from databricks.labs.ucx.toolkits.table_acls import TaclToolkit
4124

4225
# COMMAND ----------
@@ -47,27 +30,34 @@
4730

4831
# COMMAND ----------
4932

33+
inventory_schema = dbutils.widgets.get("inventory_schema")
34+
selected_groups = dbutils.widgets.get("selected_groups").split(",")
35+
databases = dbutils.widgets.get("databases").split(",")
36+
5037
config = MigrationConfig(
51-
inventory=InventoryConfig(table=InventoryTable(catalog="main", database="default", name="ucx_migration_inventory")),
38+
inventory=InventoryConfig(
39+
table=InventoryTable(catalog='hive_metastore', database=inventory_schema, name='permissions')
40+
),
5241
groups=GroupsConfig(
5342
# use this option to select specific groups manually
54-
selected=["groupA", "groupB"],
43+
selected=selected_groups,
5544
# use this option to select all groups automatically
5645
# auto=True
5746
),
5847
tacl=TaclConfig(
5948
# use this option to select specific databases manually
60-
databases=["default"],
49+
databases=databases,
6150
# use this option to select all databases automatically
6251
# auto=True
6352
),
6453
log_level="DEBUG",
6554
)
55+
6656
toolkit = GroupMigrationToolkit(config)
6757
tacltoolkit = TaclToolkit(
6858
toolkit._ws,
69-
config.inventory.table.catalog,
70-
config.inventory.table.schema,
59+
inventory_catalog=config.inventory.table.catalog,
60+
inventory_schema=config.inventory.table.database,
7161
databases=config.tacl.databases,
7262
)
7363

tests/integration/test_installation.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
1+
import logging
2+
import os
13
import shutil
24
import subprocess
35
import sys
6+
from io import BytesIO
47
from pathlib import Path
58

69
import pytest
10+
from databricks.sdk.service import compute, jobs
711
from databricks.sdk.service.workspace import ImportFormat
812

913
from databricks.labs.ucx.providers.mixins.compute import CommandExecutor
1014

15+
logger = logging.getLogger(__name__)
16+
1117

1218
@pytest.fixture
1319
def fresh_wheel_file(tmp_path) -> Path:
@@ -85,3 +91,143 @@ def test_sql_backend_works(ws, wsfs_wheel):
8591
)
8692

8793
assert len(database_names) > 0
94+
95+
96+
def test_toolkit_notebook(
97+
ws,
98+
sql_exec,
99+
wsfs_wheel,
100+
make_cluster,
101+
make_cluster_policy,
102+
make_directory,
103+
make_ucx_group,
104+
make_instance_pool,
105+
make_job,
106+
make_notebook,
107+
make_pipeline,
108+
make_random,
109+
make_repo,
110+
make_secret_scope,
111+
make_schema,
112+
make_table,
113+
make_user,
114+
):
115+
logger.info("setting up fixtures")
116+
117+
user_a = make_user()
118+
user_b = make_user()
119+
user_c = make_user()
120+
121+
logger.info(f"user_a={user_a}, user_b={user_b}, user_c={user_c}, ")
122+
123+
# TODO add users to groups
124+
ws_group_a, acc_group_a = make_ucx_group()
125+
ws_group_b, acc_group_b = make_ucx_group()
126+
ws_group_c, acc_group_c = make_ucx_group()
127+
128+
selected_groups = ",".join([ws_group_a.display_name, ws_group_b.display_name, ws_group_c.display_name])
129+
130+
logger.info(f"group_a={ws_group_a}, group_b={ws_group_b}, group_c={ws_group_c}, ")
131+
132+
cluster = make_cluster(instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"], single_node=True)
133+
cluster_policy = make_cluster_policy()
134+
directory = make_directory()
135+
instance_pool = make_instance_pool()
136+
job = make_job()
137+
notebook = make_notebook()
138+
pipeline = make_pipeline()
139+
repo = make_repo()
140+
secret_scope = make_secret_scope()
141+
142+
logger.info(
143+
f"cluster={cluster}, "
144+
f"cluster_policy={cluster_policy}, "
145+
f"directory={directory}, "
146+
f"instance_pool={instance_pool}, "
147+
f"job={job}, "
148+
f"notebook={notebook}, "
149+
f"pipeline={pipeline}"
150+
f"repo={repo}, "
151+
f"secret_scope={secret_scope}, "
152+
)
153+
154+
# TODO create fixtures for DBSQL assets
155+
# TODO set permissions
156+
157+
schema_a = make_schema()
158+
schema_b = make_schema()
159+
schema_c = make_schema()
160+
table_a = make_table(schema=schema_a)
161+
table_b = make_table(schema=schema_b)
162+
163+
logger.info(
164+
f"schema_a={schema_a}, "
165+
f"schema_b={schema_b}, "
166+
f"schema_c={schema_c}, "
167+
f"table_a={table_a}, "
168+
f"table_b={table_b}, "
169+
)
170+
171+
databases = ",".join([schema_a.split(".")[1], schema_b.split(".")[1], schema_c.split(".")[1]])
172+
173+
sql_exec(f"GRANT USAGE ON SCHEMA default TO `{ws_group_a.display_name}`")
174+
sql_exec(f"GRANT USAGE ON SCHEMA default TO `{ws_group_b.display_name}`")
175+
sql_exec(f"GRANT SELECT ON TABLE {table_a} TO `{ws_group_a.display_name}`")
176+
sql_exec(f"GRANT SELECT ON TABLE {table_b} TO `{ws_group_b.display_name}`")
177+
sql_exec(f"GRANT MODIFY ON SCHEMA {schema_b} TO `{ws_group_b.display_name}`")
178+
179+
_, inventory_schema = make_schema(catalog="hive_metastore").split(".")
180+
181+
logger.info(f"inventory_schema={inventory_schema}")
182+
183+
logger.info("uploading notebook")
184+
185+
ucx_notebook_path = Path(__file__).parent.parent.parent / "notebooks" / "toolkit.py"
186+
my_user = ws.current_user.me().user_name
187+
remote_ucx_notebook_location = f"/Users/{my_user}/notebooks/{make_random(10)}"
188+
ws.workspace.mkdirs(remote_ucx_notebook_location)
189+
ws_notebook = f"{remote_ucx_notebook_location}/test_notebook.py"
190+
191+
with open(ucx_notebook_path, "rb") as fh:
192+
buf_notebook = BytesIO(fh.read())
193+
ws.workspace.upload(ws_notebook, buf_notebook, format=ImportFormat.AUTO)
194+
195+
logger.info("creating job")
196+
197+
created_job = ws.jobs.create(
198+
tasks=[
199+
jobs.Task(
200+
task_key="uc-migrate",
201+
notebook_task=jobs.NotebookTask(
202+
notebook_path=f"{remote_ucx_notebook_location}/test_notebook",
203+
base_parameters={
204+
"inventory_schema": inventory_schema,
205+
"selected_groups": selected_groups,
206+
"databases": databases,
207+
},
208+
),
209+
libraries=[compute.Library(whl=f"/Workspace{wsfs_wheel}")],
210+
new_cluster=compute.ClusterSpec(
211+
instance_pool_id=os.environ["TEST_INSTANCE_POOL_ID"],
212+
spark_version=ws.clusters.select_spark_version(latest=True),
213+
num_workers=1,
214+
spark_conf={"spark.databricks.acl.sqlOnly": "true"},
215+
),
216+
)
217+
],
218+
name="[UCX] Run Migration",
219+
)
220+
221+
logger.info("running job")
222+
223+
try:
224+
ws.jobs.run_now(created_job.job_id).result()
225+
# TODO Validate migration, tacl
226+
finally:
227+
logger.info("deleting workbook")
228+
229+
ws.workspace.delete(remote_ucx_notebook_location, recursive=True)
230+
231+
logger.info("deleting job")
232+
233+
ws.jobs.delete(created_job.job_id)

0 commit comments

Comments
 (0)