Skip to content

Commit 56649bc

Browse files
authored
Merge pull request #40 from databricks/IncrementalPersists
Incremental persists
2 parents ac2e05f + 4f7ec3d commit 56649bc

File tree

7 files changed

+447
-569
lines changed

7 files changed

+447
-569
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ This repo contains various functions and utilities for UC Upgrade.
66
## Latest working version and how-to
77

88
Please note that current project statis is 🏗️ **WIP**, but we have a minimal set of already working utilities.
9-
To run the notebooks please use latest LTS Databricks Runtime (non-ML), without Photon, in a single-user cluster mode with UC enabled.
9+
To run the notebooks please use latest LTS Databricks Runtime (non-ML), without Photon, in a single-user cluster mode.
10+
If you have Table ACL Clusters or SQL Warehouse where ACL have been defined, you should create a TableACL cluster to run this notebook
1011

1112
Please note that script is executed only on the driver node, therefore you'll need to use a Single Node Cluster with sufficient amount of cores (e.g. 16 cores).
1213

notebooks/GroupMigration/Workspace_Group_Migration_Notebook.py

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@
5757
# MAGIC %md
5858
# MAGIC ## How to Run
5959
# MAGIC
60-
# MAGIC Run the script in the following sequence
60+
61+
# COMMAND ----------
62+
63+
# MAGIC %md
6164
# MAGIC #### Step 1: Initialize the class
6265
# MAGIC Import the module WSGroupMigration and initialize the class by passing following attributes:
6366
# MAGIC - list of workspace group to be migrated (make sure these are workspace groups and not account level groups)
@@ -70,36 +73,30 @@
7073

7174
# COMMAND ----------
7275

73-
# MAGIC %md ## Installing the package and it's dependencies
74-
75-
# COMMAND ----------
76-
77-
from notebooks.common import install_uc_upgrade_package
78-
79-
install_uc_upgrade_package()
80-
81-
# COMMAND ----------
82-
83-
# MAGIC %md ## Main process entrypoint
76+
from uc_upgrade.group_migration import GroupMigration
8477

8578
# COMMAND ----------
8679

8780
# If autoGenerateList=True then groupL will be ignored and all eliglbe groups will be migrated.
8881
autoGenerateList = False
8982

90-
# please provide groups here, e.g.
83+
# please provide groups here, e.g. analyst.
84+
# please provide group names and not ids
9185
groupL = ["groupA", "groupB"]
9286

9387

9488
# Find this in the account console
9589
inventoryTableName = "WorkspaceInventory"
90+
# the script will create two table
91+
# WorkspaceInventory - to store all the ACL permission
92+
# WorkspaceInventoryTableACL - to store the table acl permission specifically
9693

9794
# Pull from your browser URL bar. Should start with "https://" and end with ".com" or ".net"
9895
workspace_url = "https://<DOMAIN>"
9996

10097

10198
# Personal Access Token. Create one in "User Settings"
102-
token = "<TOKEN"
99+
token = "<TOKEN>"
103100

104101
# Should the migration Check the ACL on tables/views as well?
105102
checkTableACL = False
@@ -111,8 +108,13 @@
111108
userName = "<UserMailID>"
112109

113110
# Number of threads to issue Databricks API requests with. If you get a lot of errors during the inventory, lower this value.
114-
numThreads = 30
111+
numThreads = 10
115112

113+
# The notebook will populate data in the WorkspaceInventory and WorkspaceInventoryTableACL(If applicable).
114+
# if the notebook is run second time, it will retrieve the data from the table if already captured.
115+
# Users have the option to do a fresh inventory in which case it will recreate the tables and start again.
116+
# default set to False
117+
freshInventory = False
116118
# Initialize GroupMigration Class with values supplied above
117119
gm = GroupMigration(
118120
groupL=groupL,
@@ -125,6 +127,7 @@
125127
checkTableACL=checkTableACL,
126128
autoGenerateList=autoGenerateList,
127129
numThreads=numThreads,
130+
freshInventory=freshInventory,
128131
)
129132

130133
# COMMAND ----------
@@ -133,13 +136,36 @@
133136
# MAGIC #### Step 2: Perform Dry run
134137
# MAGIC This steps performs a dry run to verify the current ACL on the supplied workspace groups and print outs the permission.
135138
# MAGIC Please verify if all the permissions are covered
139+
# MAGIC If the inventory was run previously and stored in the table for either Workspace or Account then it will use the same and save time, else it will do a fresh inventory
140+
# MAGIC If the inventory data in the table is present for only few workspace objects , the dryRun will do the fresh inventory of objects not present in the table
136141

137142
# COMMAND ----------
138143

139144
gm.dryRun("Workspace")
140145

141146
# COMMAND ----------
142147

148+
# MAGIC %md
149+
# MAGIC #### Adhoc Step: Selective Inventory
150+
# MAGIC This is a adhoc step for troubleshooting purpose. Once dryRun is complete and data stored in tables, if the acl of any object is changed in the workspace
151+
# MAGIC Ex new notebook permission added, User can force a fresh inventory of the selected object instead of doing a full cleanup and running the dryRun
152+
# MAGIC To save time call gm.performInventory with 3 parameters:
153+
# MAGIC - mode: Workpace("for workspace local group") or Account ("for workspace back up group")
154+
# MAGIC - force: setting to True will force fresh inventory capture and updates to the tables
155+
# MAGIC - objectType: select the list of object for which to do the fresh inventory, options are
156+
# MAGIC
157+
# MAGIC "Group"(will do members, group list, entitlement, roles), "Password","Cluster","ClusterPolicy","Warehouse","Dashboard","Query","Job","Folder"(Will do folders, notebook and files),"TableACL","Alert","Pool","Experiment","Model","DLT","Repo","Token","Secret"
158+
# MAGIC Ex: gm.performInventory('Workspace',force=True,objectType='Cluster') will do:
159+
# MAGIC - fresh inventory of all cluster objects and updated the data the inventory table
160+
# MAGIC - run printInventory() to verify all the permission again (including clusters).
161+
162+
# COMMAND ----------
163+
164+
gm.performInventory("Workspace", force=True, objectType="Cluster")
165+
gm.printInventory()
166+
167+
# COMMAND ----------
168+
143169
# MAGIC %md
144170
# MAGIC #### Step 3: Create Back up group
145171
# MAGIC This steps creates the back up groups, applies the ACL on the new temp group from the original workspace group.
@@ -159,6 +185,8 @@
159185
# MAGIC - Verify the temp group permissions are as seen in the initial dry run
160186
# MAGIC - check randomly if all the ACL are applied correctly
161187
# MAGIC - there should be one temp group for every workspace group (Ex: db-temp-analysts and analysts with same ACLs)
188+
# MAGIC - Similar to dryRun("workspace"), this will also capture inventory for first run and store it in tables, subsequent times inventory will be retrived from the table to save time.
189+
# MAGIC - if inventory table contains partial workspace objects(ex cluster acl is missing), it will do fresh inventory for the missing object and update table
162190

163191
# COMMAND ----------
164192

notebooks/common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from pathlib import Path
33
from tempfile import NamedTemporaryFile
44

5-
from databricks.sdk.runtime import * # noqa: F403
5+
# from databricks.sdk.runtime import * # noqa: F403
66

77

88
def install_uc_upgrade_package():
9-
ipython = get_ipython() # noqa: F405
9+
ipython = get_ipython() # noqa: F405, F821
1010

1111
print("Installing poetry for package management")
1212
ipython.run_line_magic("pip", "install poetry -I")
@@ -19,7 +19,7 @@ def install_uc_upgrade_package():
1919
print("Saved the requirements to a provided file, installing them with pip")
2020
ipython.run_line_magic("pip", f"install -r {requirements_file.name} -I")
2121
print("Requirements installed successfully, restarting Python interpreter")
22-
dbutils.library.restartPython() # noqa: F405
22+
dbutils.library.restartPython() # noqa: F405, F821
2323
print("Python interpreter restarted successfully")
2424

2525
print("Reloading the path-based modules")

notebooks/hms_external_to_uc_managed/hms-external-to-uc-managed.py

Lines changed: 0 additions & 142 deletions
This file was deleted.

notebooks/metastore_export_import/01_backup_catalog.py

Lines changed: 0 additions & 92 deletions
This file was deleted.

0 commit comments

Comments
 (0)