@@ -44,11 +44,16 @@ class AWSRoleAction:
4444 privilege : str
4545 resource_path : str
4646
47+ @property
48+ def role_name (self ):
49+ role_match = re .match (AWSInstanceProfile .ROLE_NAME_REGEX , self .role_arn )
50+ return role_match .group (1 )
51+
4752
4853@dataclass
4954class AWSInstanceProfile :
5055 instance_profile_arn : str
51- iam_role_arn : str | None = None
56+ iam_role_arn : str
5257
5358 ROLE_NAME_REGEX = r"arn:aws:iam::[0-9]+:(?:instance-profile|role)\/([a-zA-Z0-9+=,.@_-]*)$"
5459
@@ -219,8 +224,8 @@ def _s3_actions(self, actions):
219224 s3_actions = [actions ]
220225 return s3_actions
221226
222- def add_uc_role (self , role_name ):
223- aws_role_trust_doc = {
227+ def _aws_role_trust_doc (self , external_id = "0000" ):
228+ return {
224229 "Version" : "2012-10-17" ,
225230 "Statement" : [
226231 {
@@ -229,21 +234,42 @@ def add_uc_role(self, role_name):
229234 "AWS" : "arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"
230235 },
231236 "Action" : "sts:AssumeRole" ,
232- "Condition" : {"StringEquals" : {"sts:ExternalId" : "0000" }},
237+ "Condition" : {"StringEquals" : {"sts:ExternalId" : external_id }},
233238 }
234239 ],
235240 }
236- # the AssumeRole condition will be modified with the external ID captured from the UC credential.
237- # https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
238- assume_role_json = self ._get_json_for_cli (aws_role_trust_doc )
241+
242+ def add_uc_role (self , role_name : str ) -> bool :
243+ """
244+ Create an IAM role for Unity Catalog to access the S3 buckets.
245+ the AssumeRole condition will be modified later with the external ID captured from the UC credential.
246+ https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
247+ """
248+ assume_role_json = self ._get_json_for_cli (self ._aws_role_trust_doc ())
239249 add_role = self ._run_json_command (
240250 f"iam create-role --role-name { role_name } --assume-role-policy-document { assume_role_json } "
241251 )
242252 if not add_role :
243253 return False
244254 return True
245255
246- def add_uc_role_policy (self , role_name , policy_name , s3_prefixes : set [str ], account_id : str , kms_key = None ):
256+ def update_uc_trust_role (self , role_name : str , external_id : str = "0000" ) -> bool :
257+ """
258+ Modify an existing IAM role for Unity Catalog to access the S3 buckets with the external ID
259+ captured from the UC credential.
260+ https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
261+ """
262+ assume_role_json = self ._get_json_for_cli (self ._aws_role_trust_doc (external_id ))
263+ update_role = self ._run_json_command (
264+ f"iam update-assume-role-policy --role-name { role_name } --policy-document { assume_role_json } "
265+ )
266+ if not update_role :
267+ return False
268+ return True
269+
270+ def add_uc_role_policy (
271+ self , role_name : str , policy_name : str , s3_prefixes : set [str ], account_id : str , kms_key = None
272+ ) -> bool :
247273 s3_prefixes_enriched = sorted ([self .S3_PREFIX + s3_prefix for s3_prefix in s3_prefixes ])
248274 statement = [
249275 {
@@ -278,7 +304,8 @@ def add_uc_role_policy(self, role_name, policy_name, s3_prefixes: set[str], acco
278304
279305 policy_document_json = self ._get_json_for_cli (policy_document )
280306 if not self ._run_command (
281- f"iam put-role-policy --role-name { role_name } --policy-name { policy_name } --policy-document { policy_document_json } "
307+ f"iam put-role-policy --role-name { role_name } "
308+ f"--policy-name { policy_name } --policy-document { policy_document_json } "
282309 ):
283310 return False
284311 return True
@@ -325,6 +352,7 @@ def __init__(
325352 self ._schema = schema
326353 self ._aws_account_id = aws_account_id
327354 self ._kms_key = kms_key
355+ self ._filename = self .INSTANCE_PROFILES_FILE_NAMES
328356
329357 @classmethod
330358 def for_cli (cls , ws : WorkspaceClient , backend , aws_profile , schema , kms_key = None , product = 'ucx' ):
@@ -338,26 +366,11 @@ def for_cli(cls, ws: WorkspaceClient, backend, aws_profile, schema, kms_key=None
338366 ws ,
339367 backend ,
340368 aws ,
341- schema = schema ,
342- aws_account_id = caller_identity .get ("Account" ),
343- kms_key = kms_key ,
369+ schema ,
370+ caller_identity .get ("Account" ),
371+ kms_key ,
344372 )
345373
346- def save_uc_compatible_roles (self ):
347- uc_role_access = list (self ._get_role_access ())
348- if len (uc_role_access ) == 0 :
349- logger .warning ("No Mapping Was Generated." )
350- return None
351- return self ._installation .save (uc_role_access , filename = self .UC_ROLES_FILE_NAMES )
352-
353- def get_uc_compatible_roles (self ):
354- try :
355- role_actions = self ._installation .load (list [AWSRoleAction ], filename = self .UC_ROLES_FILE_NAMES )
356- except ResourceDoesNotExist :
357- self .save_uc_compatible_roles ()
358- role_actions = self ._installation .load (list [AWSRoleAction ], filename = self .UC_ROLES_FILE_NAMES )
359- return role_actions
360-
361374 def create_uc_roles_cli (self , * , single_role = True , role_name = "UC_ROLE" , policy_name = "UC_POLICY" ):
362375 # Get the missing paths
363376 # Identify the S3 prefixes
@@ -373,7 +386,7 @@ def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_n
373386 if single_role :
374387 if self ._aws_resources .add_uc_role (role_name ):
375388 self ._aws_resources .add_uc_role_policy (
376- role_name , policy_name , s3_prefixes , account_id = self ._aws_account_id , kms_key = self ._kms_key
389+ role_name , policy_name , s3_prefixes , self ._aws_account_id , self ._kms_key
377390 )
378391 else :
379392 role_id = 1
@@ -383,15 +396,44 @@ def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_n
383396 f"{ role_name } -{ role_id } " ,
384397 f"{ policy_name } -{ role_id } " ,
385398 {s3_prefix },
386- account_id = self ._aws_account_id ,
387- kms_key = self ._kms_key ,
399+ self ._aws_account_id ,
400+ self ._kms_key ,
388401 )
389402 role_id += 1
390403
404+ def update_uc_role_trust_policy (self , role_name , external_id = "0000" ):
405+ return self ._aws_resources .update_uc_trust_role (role_name , external_id )
406+
407+ def save_uc_compatible_roles (self ):
408+ uc_role_access = list (self ._get_role_access ())
409+ if len (uc_role_access ) == 0 :
410+ logger .warning ("No mapping was generated." )
411+ return None
412+ return self ._installation .save (uc_role_access , filename = self .UC_ROLES_FILE_NAMES )
413+
414+ def load_uc_compatible_roles (self ):
415+ try :
416+ role_actions = self ._installation .load (list [AWSRoleAction ], filename = self .UC_ROLES_FILE_NAMES )
417+ except ResourceDoesNotExist :
418+ self .save_uc_compatible_roles ()
419+ role_actions = self ._installation .load (list [AWSRoleAction ], filename = self .UC_ROLES_FILE_NAMES )
420+ return role_actions
421+
422+ def save_instance_profile_permissions (self ) -> str | None :
423+ instance_profile_access = list (self ._get_instance_profiles_access ())
424+ if len (instance_profile_access ) == 0 :
425+ logger .warning ("No mapping was generated." )
426+ return None
427+ return self ._installation .save (instance_profile_access , filename = self .INSTANCE_PROFILES_FILE_NAMES )
428+
391429 def _get_instance_profiles (self ) -> Iterable [AWSInstanceProfile ]:
392430 instance_profiles = self ._ws .instance_profiles .list ()
393431 result_instance_profiles = []
394432 for instance_profile in instance_profiles :
433+ if not instance_profile .iam_role_arn :
434+ instance_profile .iam_role_arn = instance_profile .instance_profile_arn .replace (
435+ "instance-profile" , "role"
436+ )
395437 result_instance_profiles .append (
396438 AWSInstanceProfile (instance_profile .instance_profile_arn , instance_profile .iam_role_arn )
397439 )
@@ -446,7 +488,7 @@ def _get_role_access_task(self, arn: str, role_name: str):
446488
447489 def _identify_missing_paths (self ):
448490 external_locations = ExternalLocations (self ._ws , self ._backend , self ._schema ).snapshot ()
449- compatible_roles = self .get_uc_compatible_roles ()
491+ compatible_roles = self .load_uc_compatible_roles ()
450492 missing_paths = set ()
451493 for external_location in external_locations :
452494 path = PurePath (external_location .location )
@@ -460,13 +502,6 @@ def _identify_missing_paths(self):
460502 missing_paths .add (external_location .location )
461503 return missing_paths
462504
463- def save_instance_profile_permissions (self ) -> str | None :
464- instance_profile_access = list (self ._get_instance_profiles_access ())
465- if len (instance_profile_access ) == 0 :
466- logger .warning ("No Mapping Was Generated." )
467- return None
468- return self ._installation .save (instance_profile_access , filename = self .INSTANCE_PROFILES_FILE_NAMES )
469-
470505 def _identify_missing_external_locations (
471506 self ,
472507 external_locations : Iterable [ExternalLocation ],
@@ -512,7 +547,7 @@ def create_external_locations(self, location_init="UCX_location"):
512547 external_locations = ExternalLocations (self ._ws , self ._backend , self ._schema ).snapshot ()
513548 existing_external_locations = self ._ws .external_locations .list ()
514549 existing_paths = [external_location .url for external_location in existing_external_locations ]
515- compatible_roles = self .get_uc_compatible_roles ()
550+ compatible_roles = self .load_uc_compatible_roles ()
516551 missing_paths = self ._identify_missing_external_locations (external_locations , existing_paths , compatible_roles )
517552 external_location_names = [external_location .name for external_location in existing_external_locations ]
518553 external_location_num = 1
0 commit comments