11import concurrent .futures
22import json
3+ import logging
34import math
45import time
56from typing import List
89from pyspark .sql import session
910from pyspark .sql .functions import array_contains , col , collect_set , lit
1011from pyspark .sql .types import MapType , StringType , StructField , StructType
11- import logging
1212
1313# Initialize logger
1414logger = logging .getLogger (__name__ )
@@ -82,15 +82,17 @@ def __init__(
8282 spark .sql (f"drop table if exists { self .inventoryTableName + 'TableACL' } " )
8383
8484 # Check if we should automatically generate list, and do it immediately.
85- # Implementers Note: Could change this section to a lazy calculation by setting groupL to nil or some sentinel value and adding checks before use.
85+ # Implementers Note: Could change this section to a lazy calculation
86+ # by setting groupL to nil or some sentinel value and adding checks before use.
8687 res = requests .get (f"{ self .workspace_url } /api/2.0/preview/scim/v2/Me" , headers = self .headers )
8788 # logger.info(res.text)
8889 if res .status_code == 403 :
8990 logger .error ("token not valid." )
9091 return
9192 if autoGenerateList :
9293 logger .info (
93- "autoGenerateList parameter is set to TRUE. Ignoring groupL parameter and instead will automatically generate list of migraiton groups."
94+ "autoGenerateList parameter is set to TRUE. "
95+ "Ignoring groupL parameter and instead will automatically generate list of migraiton groups."
9496 )
9597 self .groupL = self .findMigrationEligibleGroups ()
9698
@@ -102,7 +104,8 @@ def __init__(
102104 self .WorkspaceGroupNames = self .groupL
103105
104106 logger .info (
105- f"Successfully initialized GroupMigration class with { len (self .groupL )} workspace-local groups to migrate. Groups to migrate:"
107+ f"Successfully initialized GroupMigration class "
108+ f"with { len (self .groupL )} workspace-local groups to migrate. Groups to migrate:"
106109 )
107110 for i , group in enumerate (self .groupL , start = 1 ):
108111 logger .info (f"{ i } . { group } " )
@@ -166,15 +169,17 @@ def findMigrationEligibleGroups(self):
166169
167170 # logger.info count and membership of not_in_account_groups
168171 logger .info (
169- f"Unable to match { len (not_in_account_groups )} current workspace-local groups. No matching account level group with the same name found. These groups WILL NOT MIGRATE:"
172+ f"Unable to match { len (not_in_account_groups )} current workspace-local groups. "
173+ f"No matching account level group with the same name found. These groups WILL NOT MIGRATE:"
170174 )
171175 for i , group in enumerate (not_in_account_groups , start = 1 ):
172176 logger .info (f"{ i } . { group } (WON'T MIGRATE)" )
173177
174178 if len (migration_eligible ) > 0 :
175179 # logger.info count and membership of intersection
176180 logger .info (
177- f"\n Found { len (migration_eligible )} current workspace-local groups to account level groups. These groups WILL BE MIGRATED."
181+ f"\n Found { len (migration_eligible )} current workspace-local groups to account level groups. "
182+ f"These groups WILL BE MIGRATED."
178183 )
179184 for i , group in enumerate (migration_eligible , start = 1 ):
180185 logger .info (f"{ i } . { group } (WILL MIGRATE)" )
@@ -183,7 +188,9 @@ def findMigrationEligibleGroups(self):
183188 return migration_eligible
184189 else :
185190 logger .info (
186- "There are no migration eligible groups. All existing workspace-local groups do not exist at the account level.\n NO MIGRATION WILL BE PERFORMED."
191+ "There are no migration eligible groups. "
192+ "All existing workspace-local groups do not exist at the account level."
193+ "\n NO MIGRATION WILL BE PERFORMED."
187194 )
188195 return []
189196 except Exception as e :
@@ -250,7 +257,8 @@ def getGroupObjects(self, groupFilterKeeplist) -> list:
250257 try :
251258 for ent in e ["entitlements" ]:
252259 entms .append (ent ["value" ])
253- except :
260+ except Exception as e :
261+ # TBD: introduce warning with proper explanation
254262 pass
255263
256264 groupEntitlements [e ["id" ]] = entms
@@ -261,7 +269,8 @@ def getGroupObjects(self, groupFilterKeeplist) -> list:
261269 try :
262270 for ent in e ["roles" ]:
263271 entms .append (ent ["value" ])
264- except :
272+ except Exception :
273+ # TBD: introduce a proper warning
265274 continue
266275 if len (entms ) == 0 :
267276 continue
@@ -345,7 +354,8 @@ def getRecursiveGroupMember(self, groupM: dict):
345354 self .groupUserList .extend (userPrincipalList )
346355 self .groupSPList .extend (spPrincipalList )
347356
348- # getACL[n] family of functions extract the ACL from the converted json response into a standard format, filtering by groupL
357+ # getACL[n] family of functions extract the ACL
358+ # from the converted json response into a standard format, filtering by groupL
349359 def getACL (self , acls : dict ) -> list :
350360 aclList = []
351361 for acl in acls :
@@ -386,10 +396,10 @@ def getACL2(self, acls: dict) -> list:
386396 aclList = []
387397 for acl in acls :
388398 try :
389- l = []
399+ acls_items = []
390400 for k , v in acl .items ():
391- l .append (v )
392- aclList .append (l )
401+ acls_items .append (v )
402+ aclList .append (acls_items )
393403 except KeyError :
394404 continue
395405 for acl in aclList :
@@ -829,7 +839,9 @@ def getExperimentACL(self) -> dict:
829839 f"{ self .workspace_url } /api/2.0/permissions/experiments/{ expID } " ,
830840 headers = self .headers ,
831841 )
832- # resExpPerm=requests.get(f"{self.workspace_url}/api/2.0/permissions/experiments/{expID}", headers=self.headers)
842+ # resExpPerm=requests.get(
843+ # f"{self.workspace_url}/api/2.0/permissions/experiments/{expID}", headers=self.headers
844+ # )
833845 if resExpPerm .status_code == 404 :
834846 logger .error ("feature not enabled for this tier" )
835847 continue
@@ -1035,7 +1047,9 @@ def getSingleFolderList(self, path: str, depth: int) -> dict:
10351047 except Exception as e :
10361048 lastError = e
10371049 continue
1038- logger .error (f"[ERROR] retry limit ({ MAX_RETRY } ) limit exceeded while retrieving path { path } . last err: { lastError } ." )
1050+ logger .error (
1051+ f"[ERROR] retry limit ({ MAX_RETRY } ) limit exceeded while retrieving path { path } . last err: { lastError } ."
1052+ )
10391053 return (path , {}, {}, {})
10401054
10411055 def getFoldersNotebookACL (self , rootPath = "/" ) -> list :
@@ -1044,7 +1058,8 @@ def getFoldersNotebookACL(self, rootPath="/") -> list:
10441058 # Get folder list
10451059 self .getRecursiveFolderList (rootPath )
10461060
1047- # Collect folder IDs, ignoring suffix /Trash to avoid useless errors. /Repos and /Shared are ignored at the folder list level
1061+ # Collect folder IDs, ignoring suffix /Trash to avoid useless errors.
1062+ # /Repos and /Shared are ignored at the folder list level
10481063 folder_ids = [
10491064 folder_id for folder_id in self .folderList .keys () if not self .folderList [folder_id ].endswith ("/Trash" )
10501065 ]
@@ -1288,7 +1303,9 @@ def getSecretScoppeACL(self) -> dict:
12881303
12891304 resSSPermJson = resSSPerm .json ()
12901305 if "items" not in resSSPermJson :
1291- # logger.info(f'ACL for Secret Scope {scopeName} missing "items" key. Contents:\n{resSSPermJson}\nSkipping...')
1306+ # logger.info(
1307+ # f'ACL for Secret Scope {scopeName} missing "items" key. Contents:\n{resSSPermJson}\nSkipping...'
1308+ # )
12921309 # This seems to be expected behaviour if there are no ACLs, silently ignore
12931310 continue
12941311
@@ -1487,7 +1504,10 @@ def getDBACL(self, db: str):
14871504 userList = [p .Principal for p in userListCollect ]
14881505 userList = list (set (userList ))
14891506 if not self .checkPrincipalInGroupOrMember (userList , db ):
1490- # logger.info(f'selected groups or members of the groups have no USAGE or OWN permission on database level. Skipping object level permission check for database {db}.')
1507+ # logger.info(
1508+ # f'selected groups or members of the groups have no USAGE or OWN permission on database level.'
1509+ # 'Skipping object level permission check for database {db}.'
1510+ # )
14911511 return []
14921512
14931513 tables = self .runVerboseSql ("show tables in spark_catalog.{}" .format (db )).filter (
@@ -1556,7 +1576,8 @@ def getTableACLs(self) -> list:
15561576 userList = list (set (userList ))
15571577 if self .checkPrincipalInGroupOrMember (userList , "CATALOG" ):
15581578 logger .info (
1559- "some groups or members of the group given permission at catalog level, running permission for all databases"
1579+ "some groups or members of the group given "
1580+ "permission at catalog level, running permission for all databases"
15601581 )
15611582 self .checkAllDB = True
15621583 database_names = []
0 commit comments