Skip to content

Commit 638f04b

Browse files
✨ introduce make lint to apply various code checks (#28)
* introduce pre-commit * add pre-commit hook * add pre-commit hook * add pre-commit hook * add docs
1 parent b115a24 commit 638f04b

File tree

5 files changed

+69
-28
lines changed

5 files changed

+69
-28
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,4 @@ cython_debug/
133133
.idea/
134134

135135
# ruff
136-
.ruff_cache
136+
.ruff_cache

Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
lint:
2+
@echo "Linting the project code"
3+
black .
4+
isort .
5+
ruff . --fix
6+
7+
verify:
8+
@echo "Verifying the project code"
9+
black . --check
10+
isort . --check
11+
ruff .

README.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
# UC Upgrade Utilities
1+
# UC Migration Toolkit
22

33
This repo contains various functions and utilities for UC Upgrade.
4+
5+
46
## Latest working version and how-to
57

68
Please note that current project statis is 🏗️ **WIP**, but we have a minimal set of already working utilities.
@@ -17,18 +19,25 @@ Recommended VM types are:
1719
**For now please switch to the `v0.0.1` tag in the GitHub to get the latest working version.**
1820

1921

20-
## Local setup
22+
## Local setup and development process
2123

2224
- Install [poetry](https://python-poetry.org/)
2325
- Run `poetry install` in the project directory
2426
- Pin your IDE to use the newly created poetry environment
2527

26-
> Please note that you **don't** need to use `poetry` inside notebooks or in the Databricks workspace.
28+
> Please note that you **don't** need to use `poetry` inside notebooks or in the Databricks workspace.
2729
> It's only introduced to simplify local development.
2830
31+
Before running `git push`, don't forget to link your code with:
32+
33+
```shell
34+
make lint
35+
```
36+
2937
### Details of package installation
3038

3139
Since the package itself is managed with `poetry`, to re-use it inside the notebooks we're doing the following:
3240

3341
1. Installing the package dependencies via poetry export
34-
2. Adding the package itself to the notebook via `sys.path`
42+
2. Adding the package itself to the notebook via `sys.path`
43+

notebooks/common.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from pathlib import Path
33
from tempfile import NamedTemporaryFile
44

5-
from databricks.sdk.runtime import * # noqa: F403
5+
from databricks.sdk.runtime import * # noqa: F403
66

77

88
def install_uc_upgrade_package():
9-
ipython = get_ipython() # noqa: F405
9+
ipython = get_ipython() # noqa: F405
1010

1111
print("Installing poetry for package management")
1212
ipython.run_line_magic("pip", "install poetry -I")
@@ -19,7 +19,7 @@ def install_uc_upgrade_package():
1919
print("Saved the requirements to a provided file, installing them with pip")
2020
ipython.run_line_magic("pip", f"install -r {requirements_file.name} -I")
2121
print("Requirements installed successfully, restarting Python interpreter")
22-
dbutils.library.restartPython() # noqa: F405
22+
dbutils.library.restartPython() # noqa: F405
2323
print("Python interpreter restarted successfully")
2424

2525
print("Reloading the path-based modules")
@@ -33,7 +33,7 @@ def install_uc_upgrade_package():
3333

3434
print("Verifying that package can be properly loaded")
3535
try:
36-
from uc_upgrade.group_migration import GroupMigration # noqa: F401
36+
from uc_upgrade.group_migration import GroupMigration # noqa: F401
3737

3838
print("Successfully loaded the uc-upgrade package")
3939
except Exception as e:

uc_upgrade/group_migration.py

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import concurrent.futures
22
import json
3+
import logging
34
import math
45
import time
56
from typing import List
@@ -8,7 +9,6 @@
89
from pyspark.sql import session
910
from pyspark.sql.functions import array_contains, col, collect_set, lit
1011
from pyspark.sql.types import MapType, StringType, StructField, StructType
11-
import logging
1212

1313
# Initialize logger
1414
logger = logging.getLogger(__name__)
@@ -82,15 +82,17 @@ def __init__(
8282
spark.sql(f"drop table if exists {self.inventoryTableName+'TableACL'}")
8383

8484
# Check if we should automatically generate list, and do it immediately.
85-
# Implementers Note: Could change this section to a lazy calculation by setting groupL to nil or some sentinel value and adding checks before use.
85+
# Implementers Note: Could change this section to a lazy calculation
86+
# by setting groupL to nil or some sentinel value and adding checks before use.
8687
res = requests.get(f"{self.workspace_url}/api/2.0/preview/scim/v2/Me", headers=self.headers)
8788
# logger.info(res.text)
8889
if res.status_code == 403:
8990
logger.error("token not valid.")
9091
return
9192
if autoGenerateList:
9293
logger.info(
93-
"autoGenerateList parameter is set to TRUE. Ignoring groupL parameter and instead will automatically generate list of migraiton groups."
94+
"autoGenerateList parameter is set to TRUE. "
95+
"Ignoring groupL parameter and instead will automatically generate list of migraiton groups."
9496
)
9597
self.groupL = self.findMigrationEligibleGroups()
9698

@@ -102,7 +104,8 @@ def __init__(
102104
self.WorkspaceGroupNames = self.groupL
103105

104106
logger.info(
105-
f"Successfully initialized GroupMigration class with {len(self.groupL)} workspace-local groups to migrate. Groups to migrate:"
107+
f"Successfully initialized GroupMigration class "
108+
f"with {len(self.groupL)} workspace-local groups to migrate. Groups to migrate:"
106109
)
107110
for i, group in enumerate(self.groupL, start=1):
108111
logger.info(f"{i}. {group}")
@@ -166,15 +169,17 @@ def findMigrationEligibleGroups(self):
166169

167170
# logger.info count and membership of not_in_account_groups
168171
logger.info(
169-
f"Unable to match {len(not_in_account_groups)} current workspace-local groups. No matching account level group with the same name found. These groups WILL NOT MIGRATE:"
172+
f"Unable to match {len(not_in_account_groups)} current workspace-local groups. "
173+
f"No matching account level group with the same name found. These groups WILL NOT MIGRATE:"
170174
)
171175
for i, group in enumerate(not_in_account_groups, start=1):
172176
logger.info(f"{i}. {group} (WON'T MIGRATE)")
173177

174178
if len(migration_eligible) > 0:
175179
# logger.info count and membership of intersection
176180
logger.info(
177-
f"\nFound {len(migration_eligible)} current workspace-local groups to account level groups. These groups WILL BE MIGRATED."
181+
f"\nFound {len(migration_eligible)} current workspace-local groups to account level groups. "
182+
f"These groups WILL BE MIGRATED."
178183
)
179184
for i, group in enumerate(migration_eligible, start=1):
180185
logger.info(f"{i}. {group} (WILL MIGRATE)")
@@ -183,7 +188,9 @@ def findMigrationEligibleGroups(self):
183188
return migration_eligible
184189
else:
185190
logger.info(
186-
"There are no migration eligible groups. All existing workspace-local groups do not exist at the account level.\nNO MIGRATION WILL BE PERFORMED."
191+
"There are no migration eligible groups. "
192+
"All existing workspace-local groups do not exist at the account level."
193+
"\nNO MIGRATION WILL BE PERFORMED."
187194
)
188195
return []
189196
except Exception as e:
@@ -250,7 +257,8 @@ def getGroupObjects(self, groupFilterKeeplist) -> list:
250257
try:
251258
for ent in e["entitlements"]:
252259
entms.append(ent["value"])
253-
except:
260+
except Exception as e:
261+
# TBD: introduce warning with proper explanation
254262
pass
255263

256264
groupEntitlements[e["id"]] = entms
@@ -261,7 +269,8 @@ def getGroupObjects(self, groupFilterKeeplist) -> list:
261269
try:
262270
for ent in e["roles"]:
263271
entms.append(ent["value"])
264-
except:
272+
except Exception:
273+
# TBD: introduce a proper warning
265274
continue
266275
if len(entms) == 0:
267276
continue
@@ -345,7 +354,8 @@ def getRecursiveGroupMember(self, groupM: dict):
345354
self.groupUserList.extend(userPrincipalList)
346355
self.groupSPList.extend(spPrincipalList)
347356

348-
# getACL[n] family of functions extract the ACL from the converted json response into a standard format, filtering by groupL
357+
# getACL[n] family of functions extract the ACL
358+
# from the converted json response into a standard format, filtering by groupL
349359
def getACL(self, acls: dict) -> list:
350360
aclList = []
351361
for acl in acls:
@@ -386,10 +396,10 @@ def getACL2(self, acls: dict) -> list:
386396
aclList = []
387397
for acl in acls:
388398
try:
389-
l = []
399+
acls_items = []
390400
for k, v in acl.items():
391-
l.append(v)
392-
aclList.append(l)
401+
acls_items.append(v)
402+
aclList.append(acls_items)
393403
except KeyError:
394404
continue
395405
for acl in aclList:
@@ -829,7 +839,9 @@ def getExperimentACL(self) -> dict:
829839
f"{self.workspace_url}/api/2.0/permissions/experiments/{expID}",
830840
headers=self.headers,
831841
)
832-
# resExpPerm=requests.get(f"{self.workspace_url}/api/2.0/permissions/experiments/{expID}", headers=self.headers)
842+
# resExpPerm=requests.get(
843+
# f"{self.workspace_url}/api/2.0/permissions/experiments/{expID}", headers=self.headers
844+
# )
833845
if resExpPerm.status_code == 404:
834846
logger.error("feature not enabled for this tier")
835847
continue
@@ -1035,7 +1047,9 @@ def getSingleFolderList(self, path: str, depth: int) -> dict:
10351047
except Exception as e:
10361048
lastError = e
10371049
continue
1038-
logger.error(f"[ERROR] retry limit ({MAX_RETRY}) limit exceeded while retrieving path {path}. last err: {lastError}.")
1050+
logger.error(
1051+
f"[ERROR] retry limit ({MAX_RETRY}) limit exceeded while retrieving path {path}. last err: {lastError}."
1052+
)
10391053
return (path, {}, {}, {})
10401054

10411055
def getFoldersNotebookACL(self, rootPath="/") -> list:
@@ -1044,7 +1058,8 @@ def getFoldersNotebookACL(self, rootPath="/") -> list:
10441058
# Get folder list
10451059
self.getRecursiveFolderList(rootPath)
10461060

1047-
# Collect folder IDs, ignoring suffix /Trash to avoid useless errors. /Repos and /Shared are ignored at the folder list level
1061+
# Collect folder IDs, ignoring suffix /Trash to avoid useless errors.
1062+
# /Repos and /Shared are ignored at the folder list level
10481063
folder_ids = [
10491064
folder_id for folder_id in self.folderList.keys() if not self.folderList[folder_id].endswith("/Trash")
10501065
]
@@ -1288,7 +1303,9 @@ def getSecretScoppeACL(self) -> dict:
12881303

12891304
resSSPermJson = resSSPerm.json()
12901305
if "items" not in resSSPermJson:
1291-
# logger.info(f'ACL for Secret Scope {scopeName} missing "items" key. Contents:\n{resSSPermJson}\nSkipping...')
1306+
# logger.info(
1307+
# f'ACL for Secret Scope {scopeName} missing "items" key. Contents:\n{resSSPermJson}\nSkipping...'
1308+
# )
12921309
# This seems to be expected behaviour if there are no ACLs, silently ignore
12931310
continue
12941311

@@ -1487,7 +1504,10 @@ def getDBACL(self, db: str):
14871504
userList = [p.Principal for p in userListCollect]
14881505
userList = list(set(userList))
14891506
if not self.checkPrincipalInGroupOrMember(userList, db):
1490-
# logger.info(f'selected groups or members of the groups have no USAGE or OWN permission on database level. Skipping object level permission check for database {db}.')
1507+
# logger.info(
1508+
# f'selected groups or members of the groups have no USAGE or OWN permission on database level.'
1509+
# 'Skipping object level permission check for database {db}.'
1510+
# )
14911511
return []
14921512

14931513
tables = self.runVerboseSql("show tables in spark_catalog.{}".format(db)).filter(
@@ -1556,7 +1576,8 @@ def getTableACLs(self) -> list:
15561576
userList = list(set(userList))
15571577
if self.checkPrincipalInGroupOrMember(userList, "CATALOG"):
15581578
logger.info(
1559-
"some groups or members of the group given permission at catalog level, running permission for all databases"
1579+
"some groups or members of the group given "
1580+
"permission at catalog level, running permission for all databases"
15601581
)
15611582
self.checkAllDB = True
15621583
database_names = []

0 commit comments

Comments
 (0)