Skip to content

Commit c99708c

Browse files
authored
Added CLI Command databricks labs ucx save-uc-compatible-roles (#863)
1 parent a908704 commit c99708c

File tree

5 files changed

+527
-39
lines changed

5 files changed

+527
-39
lines changed

labs.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,11 @@ commands:
118118
flags:
119119
- name: aws-profile
120120
description: AWS Profile to use for authentication
121+
122+
- name: save-uc-compatible-roles
123+
description: |
124+
Scan all the AWS roles that are set for UC access and produce a mapping to the S3 resources.
125+
Requires a working setup of AWS CLI.
126+
flags:
127+
- name: aws-profile
128+
description: AWS Profile to use for authentication

src/databricks/labs/ucx/assessment/aws.py

Lines changed: 81 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,11 @@ class AWSPolicyAction:
3232

3333

3434
@dataclass
35-
class AWSInstanceProfileAction:
36-
instance_profile_arn: str
35+
class AWSRoleAction:
36+
role_arn: str
3737
resource_type: str
3838
privilege: str
3939
resource_path: str
40-
iam_role_arn: str | None = None
4140

4241

4342
@dataclass
@@ -72,6 +71,10 @@ class AWSResources:
7271
S3_ACTIONS: typing.ClassVar[set[str]] = {"s3:PutObject", "s3:GetObject", "s3:DeleteObject", "s3:PutObjectAcl"}
7372
S3_READONLY: typing.ClassVar[str] = "s3:GetObject"
7473
S3_REGEX: typing.ClassVar[str] = r"arn:aws:s3:::([a-zA-Z0-9+=,.@_-]*)\/\*$"
74+
UC_MASTER_ROLES_ARN: typing.ClassVar[list[str]] = [
75+
"arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL",
76+
"arn:aws:iam::707343435239:role/unity-catalog-dev-UCMasterRole-G3MMN8SP21FO",
77+
]
7578

7679
def __init__(self, profile: str, command_runner: Callable[[str], tuple[int, str, str]] = run_command):
7780
self._profile = profile
@@ -104,11 +107,55 @@ def list_attached_policies_in_role(self, role_name: str):
104107
attached_policies.append(policy.get("PolicyArn"))
105108
return attached_policies
106109

110+
def list_all_uc_roles(self):
111+
roles = self._run_json_command(f"iam list-roles --profile {self._profile}")
112+
uc_roles = []
113+
roles = roles.get("Roles")
114+
if not roles:
115+
logger.warning("list-roles couldn't find any roles")
116+
return uc_roles
117+
for role in roles:
118+
policy_document = role.get("AssumeRolePolicyDocument")
119+
if not policy_document:
120+
continue
121+
for statement in policy_document["Statement"]:
122+
effect = statement.get("Effect")
123+
action = statement.get("Action")
124+
principal = statement.get("Principal")
125+
if not (effect and action and principal):
126+
continue
127+
if effect != "Allow":
128+
continue
129+
if action != "sts:AssumeRole":
130+
continue
131+
principal = principal.get("AWS")
132+
if not principal:
133+
continue
134+
if isinstance(principal, list):
135+
is_uc_principal = False
136+
for single_principal in principal:
137+
if single_principal in self.UC_MASTER_ROLES_ARN:
138+
is_uc_principal = True
139+
continue
140+
if not is_uc_principal:
141+
continue
142+
elif principal not in self.UC_MASTER_ROLES_ARN:
143+
continue
144+
uc_roles.append(
145+
AWSRole(
146+
role_id=role.get("RoleId"),
147+
role_name=role.get("RoleName"),
148+
arn=role.get("Arn"),
149+
path=role.get("Path"),
150+
)
151+
)
152+
153+
return uc_roles
154+
107155
def get_role_policy(self, role_name, policy_name: str | None = None, attached_policy_arn: str | None = None):
108156
if policy_name:
109157
get_policy = (
110-
f"iam get-role-policy --profile {self._profile} --role-name {role_name} "
111-
f"--policy-name {policy_name} --no-paginate"
158+
f"iam get-role-policy --profile {self._profile} --role-name {role_name} " f"--policy-name {policy_name}"
112159
)
113160
elif attached_policy_arn:
114161
get_attached_policy = f"iam get-policy --profile {self._profile} --policy-arn {attached_policy_arn}"
@@ -118,7 +165,7 @@ def get_role_policy(self, role_name, policy_name: str | None = None, attached_po
118165
policy_version = attached_policy["Policy"]["DefaultVersionId"]
119166
get_policy = (
120167
f"iam get-policy-version --profile {self._profile} --policy-arn {attached_policy_arn} "
121-
f"--version-id {policy_version} --no-paginate"
168+
f"--version-id {policy_version}"
122169
)
123170
else:
124171
logger.error("Failed to retrieve role. No role name or attached role ARN specified.")
@@ -161,7 +208,7 @@ def get_role_policy(self, role_name, policy_name: str | None = None, attached_po
161208

162209
def _run_json_command(self, command: str):
163210
aws_cmd = shutil.which("aws")
164-
code, output, error = self._command_runner(f"{aws_cmd} {command} --output json --no-paginate")
211+
code, output, error = self._command_runner(f"{aws_cmd} {command} --output json")
165212
if code != 0:
166213
logger.error(error)
167214
return None
@@ -182,6 +229,13 @@ def for_cli(cls, ws: WorkspaceClient, aws_profile, product='ucx'):
182229
raise ResourceWarning("AWS CLI is not configured properly.")
183230
return cls(installation, ws, aws)
184231

232+
def save_uc_compatible_roles(self):
233+
uc_role_access = list(self._get_role_access())
234+
if len(uc_role_access) == 0:
235+
logger.warning("No Mapping Was Generated.")
236+
return None
237+
return self._installation.save(uc_role_access, filename='uc_roles_access.csv')
238+
185239
def _get_instance_profiles(self) -> Iterable[AWSInstanceProfile]:
186240
instance_profiles = self._ws.instance_profiles.list()
187241
result_instance_profiles = []
@@ -196,38 +250,44 @@ def _get_instance_profiles_access(self):
196250
instance_profiles = list(self._get_instance_profiles())
197251
tasks = []
198252
for instance_profile in instance_profiles:
199-
tasks.append(partial(self._get_instance_profile_access_task, instance_profile))
253+
tasks.append(
254+
partial(self._get_role_access_task, instance_profile.instance_profile_arn, instance_profile.role_name)
255+
)
200256
# Aggregating the outputs from all the tasks
201257
return sum(Threads.strict("Scanning Instance Profiles", tasks), [])
202258

203-
def _get_instance_profile_access_task(self, instance_profile: AWSInstanceProfile):
259+
def _get_role_access(self):
260+
roles = list(self._aws_resources.list_all_uc_roles())
261+
tasks = []
262+
for role in roles:
263+
tasks.append(partial(self._get_role_access_task, role.arn, role.role_name))
264+
# Aggregating the outputs from all the tasks
265+
return sum(Threads.strict("Scanning Roles", tasks), [])
266+
267+
def _get_role_access_task(self, arn: str, role_name: str):
204268
policy_actions = []
205-
policies = list(self._aws_resources.list_role_policies(instance_profile.role_name))
269+
policies = list(self._aws_resources.list_role_policies(role_name))
206270
for policy in policies:
207-
actions = self._aws_resources.get_role_policy(instance_profile.role_name, policy_name=policy)
271+
actions = self._aws_resources.get_role_policy(role_name, policy_name=policy)
208272
for action in actions:
209273
policy_actions.append(
210-
AWSInstanceProfileAction(
211-
instance_profile.instance_profile_arn,
274+
AWSRoleAction(
275+
arn,
212276
action.resource_type,
213277
action.privilege,
214278
action.resource_path,
215-
instance_profile.iam_role_arn,
216279
)
217280
)
218-
attached_policies = self._aws_resources.list_attached_policies_in_role(instance_profile.role_name)
281+
attached_policies = self._aws_resources.list_attached_policies_in_role(role_name)
219282
for attached_policy in attached_policies:
220-
actions = list(
221-
self._aws_resources.get_role_policy(instance_profile.role_name, attached_policy_arn=attached_policy)
222-
)
283+
actions = list(self._aws_resources.get_role_policy(role_name, attached_policy_arn=attached_policy))
223284
for action in actions:
224285
policy_actions.append(
225-
AWSInstanceProfileAction(
226-
instance_profile.instance_profile_arn,
286+
AWSRoleAction(
287+
arn,
227288
action.resource_type,
228289
action.privilege,
229290
action.resource_path,
230-
instance_profile.iam_role_arn,
231291
)
232292
)
233293
return policy_actions

src/databricks/labs/ucx/cli.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,5 +273,33 @@ def save_aws_iam_profiles(w: WorkspaceClient, aws_profile: str | None = None):
273273
return None
274274

275275

276+
@ucx.command
277+
def save_uc_compatible_roles(w: WorkspaceClient, *, aws_profile: str | None = None):
278+
"""extracts all the iam roles with trust relationships to the UC master role.
279+
Map these roles to the S3 buckets they have access to.
280+
Requires a working setup of AWS CLI.
281+
https://aws.amazon.com/cli/
282+
The command saves a CSV to the UCX installation folder with the mapping.
283+
284+
The user has to be authenticated with AWS and the have the permissions to browse the resources and iam services.
285+
More information can be found here:
286+
https://docs.aws.amazon.com/IAM/latest/UserGuide/access_permissions-required.html
287+
"""
288+
if not shutil.which("aws"):
289+
logger.error("Couldn't find AWS CLI in path.Please obtain and install the CLI from https://aws.amazon.com/cli/")
290+
return None
291+
if not aws_profile:
292+
aws_profile = os.getenv("AWS_DEFAULT_PROFILE")
293+
if not aws_profile:
294+
logger.error(
295+
"AWS Profile is not specified. Use the environment variable [AWS_DEFAULT_PROFILE] "
296+
"or use the '--aws-profile=[profile-name]' parameter."
297+
)
298+
return None
299+
aws_permissions = AWSResourcePermissions.for_cli(w, aws_profile)
300+
aws_permissions.save_uc_compatible_roles()
301+
return None
302+
303+
276304
if __name__ == "__main__":
277305
ucx()

0 commit comments

Comments
 (0)