Skip to content

Commit dbdebb8

Browse files
nkvuongqziyuan
andauthored
Added AWS IAM roles support to databricks labs ucx migrate-credentials command (#973)
## Changes <!-- Summary of your changes that are easy to understand. Add screenshots when necessary --> A few more things to be done - [x] Added `load` function to `AWSResourcePermissions` to return identified instance profiles - [x] Added `IamRoleMigration` class under `aws/credentials.py` to migrate AWS instance profiles identified ### Linked issues <!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword --> Resolves #862 Related PR: - #874 ### Functionality - [x] added relevant user documentation - [x] added new CLI command `databricks labs ucx migrate-credentials` ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] manually tested - [x] added unit tests - [x] added integration tests --------- Co-authored-by: qziyuan <[email protected]>
1 parent 40568e5 commit dbdebb8

File tree

16 files changed

+669
-84
lines changed

16 files changed

+669
-84
lines changed

src/databricks/labs/ucx/assessment/aws.py

Lines changed: 74 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,16 @@ class AWSRoleAction:
4444
privilege: str
4545
resource_path: str
4646

47+
@property
48+
def role_name(self):
49+
role_match = re.match(AWSInstanceProfile.ROLE_NAME_REGEX, self.role_arn)
50+
return role_match.group(1)
51+
4752

4853
@dataclass
4954
class AWSInstanceProfile:
5055
instance_profile_arn: str
51-
iam_role_arn: str | None = None
56+
iam_role_arn: str
5257

5358
ROLE_NAME_REGEX = r"arn:aws:iam::[0-9]+:(?:instance-profile|role)\/([a-zA-Z0-9+=,.@_-]*)$"
5459

@@ -219,8 +224,8 @@ def _s3_actions(self, actions):
219224
s3_actions = [actions]
220225
return s3_actions
221226

222-
def add_uc_role(self, role_name):
223-
aws_role_trust_doc = {
227+
def _aws_role_trust_doc(self, external_id="0000"):
228+
return {
224229
"Version": "2012-10-17",
225230
"Statement": [
226231
{
@@ -229,21 +234,42 @@ def add_uc_role(self, role_name):
229234
"AWS": "arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"
230235
},
231236
"Action": "sts:AssumeRole",
232-
"Condition": {"StringEquals": {"sts:ExternalId": "0000"}},
237+
"Condition": {"StringEquals": {"sts:ExternalId": external_id}},
233238
}
234239
],
235240
}
236-
# the AssumeRole condition will be modified with the external ID captured from the UC credential.
237-
# https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
238-
assume_role_json = self._get_json_for_cli(aws_role_trust_doc)
241+
242+
def add_uc_role(self, role_name: str) -> bool:
243+
"""
244+
Create an IAM role for Unity Catalog to access the S3 buckets.
245+
the AssumeRole condition will be modified later with the external ID captured from the UC credential.
246+
https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
247+
"""
248+
assume_role_json = self._get_json_for_cli(self._aws_role_trust_doc())
239249
add_role = self._run_json_command(
240250
f"iam create-role --role-name {role_name} --assume-role-policy-document {assume_role_json}"
241251
)
242252
if not add_role:
243253
return False
244254
return True
245255

246-
def add_uc_role_policy(self, role_name, policy_name, s3_prefixes: set[str], account_id: str, kms_key=None):
256+
def update_uc_trust_role(self, role_name: str, external_id: str = "0000") -> bool:
257+
"""
258+
Modify an existing IAM role for Unity Catalog to access the S3 buckets with the external ID
259+
captured from the UC credential.
260+
https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
261+
"""
262+
assume_role_json = self._get_json_for_cli(self._aws_role_trust_doc(external_id))
263+
update_role = self._run_json_command(
264+
f"iam update-assume-role-policy --role-name {role_name} --policy-document {assume_role_json}"
265+
)
266+
if not update_role:
267+
return False
268+
return True
269+
270+
def add_uc_role_policy(
271+
self, role_name: str, policy_name: str, s3_prefixes: set[str], account_id: str, kms_key=None
272+
) -> bool:
247273
s3_prefixes_enriched = sorted([self.S3_PREFIX + s3_prefix for s3_prefix in s3_prefixes])
248274
statement = [
249275
{
@@ -278,7 +304,8 @@ def add_uc_role_policy(self, role_name, policy_name, s3_prefixes: set[str], acco
278304

279305
policy_document_json = self._get_json_for_cli(policy_document)
280306
if not self._run_command(
281-
f"iam put-role-policy --role-name {role_name} --policy-name {policy_name} --policy-document {policy_document_json}"
307+
f"iam put-role-policy --role-name {role_name} "
308+
f"--policy-name {policy_name} --policy-document {policy_document_json}"
282309
):
283310
return False
284311
return True
@@ -325,6 +352,7 @@ def __init__(
325352
self._schema = schema
326353
self._aws_account_id = aws_account_id
327354
self._kms_key = kms_key
355+
self._filename = self.INSTANCE_PROFILES_FILE_NAMES
328356

329357
@classmethod
330358
def for_cli(cls, ws: WorkspaceClient, backend, aws_profile, schema, kms_key=None, product='ucx'):
@@ -338,26 +366,11 @@ def for_cli(cls, ws: WorkspaceClient, backend, aws_profile, schema, kms_key=None
338366
ws,
339367
backend,
340368
aws,
341-
schema=schema,
342-
aws_account_id=caller_identity.get("Account"),
343-
kms_key=kms_key,
369+
schema,
370+
caller_identity.get("Account"),
371+
kms_key,
344372
)
345373

346-
def save_uc_compatible_roles(self):
347-
uc_role_access = list(self._get_role_access())
348-
if len(uc_role_access) == 0:
349-
logger.warning("No Mapping Was Generated.")
350-
return None
351-
return self._installation.save(uc_role_access, filename=self.UC_ROLES_FILE_NAMES)
352-
353-
def get_uc_compatible_roles(self):
354-
try:
355-
role_actions = self._installation.load(list[AWSRoleAction], filename=self.UC_ROLES_FILE_NAMES)
356-
except ResourceDoesNotExist:
357-
self.save_uc_compatible_roles()
358-
role_actions = self._installation.load(list[AWSRoleAction], filename=self.UC_ROLES_FILE_NAMES)
359-
return role_actions
360-
361374
def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_name="UC_POLICY"):
362375
# Get the missing paths
363376
# Identify the S3 prefixes
@@ -373,7 +386,7 @@ def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_n
373386
if single_role:
374387
if self._aws_resources.add_uc_role(role_name):
375388
self._aws_resources.add_uc_role_policy(
376-
role_name, policy_name, s3_prefixes, account_id=self._aws_account_id, kms_key=self._kms_key
389+
role_name, policy_name, s3_prefixes, self._aws_account_id, self._kms_key
377390
)
378391
else:
379392
role_id = 1
@@ -383,15 +396,44 @@ def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_n
383396
f"{role_name}-{role_id}",
384397
f"{policy_name}-{role_id}",
385398
{s3_prefix},
386-
account_id=self._aws_account_id,
387-
kms_key=self._kms_key,
399+
self._aws_account_id,
400+
self._kms_key,
388401
)
389402
role_id += 1
390403

404+
def update_uc_role_trust_policy(self, role_name, external_id="0000"):
405+
return self._aws_resources.update_uc_trust_role(role_name, external_id)
406+
407+
def save_uc_compatible_roles(self):
408+
uc_role_access = list(self._get_role_access())
409+
if len(uc_role_access) == 0:
410+
logger.warning("No mapping was generated.")
411+
return None
412+
return self._installation.save(uc_role_access, filename=self.UC_ROLES_FILE_NAMES)
413+
414+
def load_uc_compatible_roles(self):
415+
try:
416+
role_actions = self._installation.load(list[AWSRoleAction], filename=self.UC_ROLES_FILE_NAMES)
417+
except ResourceDoesNotExist:
418+
self.save_uc_compatible_roles()
419+
role_actions = self._installation.load(list[AWSRoleAction], filename=self.UC_ROLES_FILE_NAMES)
420+
return role_actions
421+
422+
def save_instance_profile_permissions(self) -> str | None:
423+
instance_profile_access = list(self._get_instance_profiles_access())
424+
if len(instance_profile_access) == 0:
425+
logger.warning("No mapping was generated.")
426+
return None
427+
return self._installation.save(instance_profile_access, filename=self.INSTANCE_PROFILES_FILE_NAMES)
428+
391429
def _get_instance_profiles(self) -> Iterable[AWSInstanceProfile]:
392430
instance_profiles = self._ws.instance_profiles.list()
393431
result_instance_profiles = []
394432
for instance_profile in instance_profiles:
433+
if not instance_profile.iam_role_arn:
434+
instance_profile.iam_role_arn = instance_profile.instance_profile_arn.replace(
435+
"instance-profile", "role"
436+
)
395437
result_instance_profiles.append(
396438
AWSInstanceProfile(instance_profile.instance_profile_arn, instance_profile.iam_role_arn)
397439
)
@@ -446,7 +488,7 @@ def _get_role_access_task(self, arn: str, role_name: str):
446488

447489
def _identify_missing_paths(self):
448490
external_locations = ExternalLocations(self._ws, self._backend, self._schema).snapshot()
449-
compatible_roles = self.get_uc_compatible_roles()
491+
compatible_roles = self.load_uc_compatible_roles()
450492
missing_paths = set()
451493
for external_location in external_locations:
452494
path = PurePath(external_location.location)
@@ -460,13 +502,6 @@ def _identify_missing_paths(self):
460502
missing_paths.add(external_location.location)
461503
return missing_paths
462504

463-
def save_instance_profile_permissions(self) -> str | None:
464-
instance_profile_access = list(self._get_instance_profiles_access())
465-
if len(instance_profile_access) == 0:
466-
logger.warning("No Mapping Was Generated.")
467-
return None
468-
return self._installation.save(instance_profile_access, filename=self.INSTANCE_PROFILES_FILE_NAMES)
469-
470505
def _identify_missing_external_locations(
471506
self,
472507
external_locations: Iterable[ExternalLocation],
@@ -512,7 +547,7 @@ def create_external_locations(self, location_init="UCX_location"):
512547
external_locations = ExternalLocations(self._ws, self._backend, self._schema).snapshot()
513548
existing_external_locations = self._ws.external_locations.list()
514549
existing_paths = [external_location.url for external_location in existing_external_locations]
515-
compatible_roles = self.get_uc_compatible_roles()
550+
compatible_roles = self.load_uc_compatible_roles()
516551
missing_paths = self._identify_missing_external_locations(external_locations, existing_paths, compatible_roles)
517552
external_location_names = [external_location.name for external_location in existing_external_locations]
518553
external_location_num = 1

src/databricks/labs/ucx/aws/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)