Skip to content

Commit df6ccb2

Browse files
authored
Handle None directory_id if managed identity encountered during the crawling of StoragePermissionMapping (#986)
While creating StoragePermissionMapping, a principal could be managed identity which does not have directory_id. This PR will allow managed identity to be stored in StoragePermissionMapping, and allow None directory_id. ## Changes <!-- Summary of your changes that are easy to understand. Add screenshots when necessary --> - Add `type` field to dataclass `StoragePermissionMapping` and `Principal` to indicate if a principal is service principal or managed identity. - Allow None `directory_id` if the principal is not a service principal. - Ignore the managed identity while migrating to UC storage credentials for now. ### Linked issues <!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword --> fix #339 ### Functionality - [ ] added relevant user documentation - [ ] added new CLI command - [ ] modified existing command: `databricks labs ucx ...` - [ ] added a new workflow - [ ] modified existing workflow: `...` - [ ] added a new table - [ ] modified existing table: `...` ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [ ] manually tested - [ ] added unit tests - [ ] added integration tests - [ ] verified on staging environment (screenshot attached)
1 parent 0008467 commit df6ccb2

File tree

9 files changed

+81
-21
lines changed

9 files changed

+81
-21
lines changed

src/databricks/labs/ucx/azure/access.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ class StoragePermissionMapping:
1717
client_id: str
1818
principal: str
1919
privilege: str
20+
type: str
2021
# Need this directory_id/tenant_id when create UC storage credentials using service principal
21-
directory_id: str
22+
directory_id: str | None = None
2223

2324

2425
class AzureResourcePermissions:
@@ -65,6 +66,7 @@ def _map_storage(self, storage: AzureResource) -> list[StoragePermissionMapping]
6566
client_id=role_assignment.principal.client_id,
6667
principal=role_assignment.principal.display_name,
6768
privilege=privilege,
69+
type=role_assignment.principal.type,
6870
directory_id=role_assignment.principal.directory_id,
6971
)
7072
)

src/databricks/labs/ucx/azure/credentials.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,19 @@ class ServicePrincipalMigrationInfo:
3737
class StorageCredentialValidationResult:
3838
name: str
3939
application_id: str
40-
directory_id: str
4140
read_only: bool
4241
validated_on: str
42+
directory_id: str | None = None
4343
failures: list[str] | None = None
4444

4545
@classmethod
4646
def from_validation(cls, permission_mapping: StoragePermissionMapping, failures: list[str] | None):
4747
return cls(
4848
permission_mapping.principal,
4949
permission_mapping.client_id,
50-
permission_mapping.directory_id,
5150
permission_mapping.privilege == Privilege.READ_FILES.value,
5251
permission_mapping.prefix,
52+
permission_mapping.directory_id,
5353
failures,
5454
)
5555

@@ -85,6 +85,10 @@ def list(self, include_names: set[str] | None = None) -> set[str]:
8585
return application_ids
8686

8787
def create_with_client_secret(self, spn: ServicePrincipalMigrationInfo) -> StorageCredentialInfo:
88+
# this function should only be used to migrate service principal, fail the command here if
89+
# it's misused to create storage credential with managed identity
90+
assert spn.permission_mapping.directory_id is not None
91+
8892
# prepare the storage credential properties
8993
name = spn.permission_mapping.principal
9094
service_principal = AzureServicePrincipal(
@@ -228,7 +232,9 @@ def _generate_migration_list(self, include_names: set[str] | None = None) -> lis
228232
Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm
229233
"""
230234
# load sp list from azure_storage_account_info.csv
231-
sp_list = self._resource_permissions.load()
235+
permission_mappings = self._resource_permissions.load()
236+
# For now we only migrate Service Principal and ignore Managed Identity
237+
sp_list = [mapping for mapping in permission_mappings if mapping.type == "Application"]
232238
# list existed storage credentials
233239
sc_set = self._storage_credential_manager.list(include_names)
234240
# check if the sp is already used in UC storage credential

src/databricks/labs/ucx/azure/resources.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,12 @@ class Principal:
7070
client_id: str
7171
display_name: str
7272
object_id: str
73+
# service principal will have type: "Application"
74+
# managed identity will have type: "ManagedIdentity"
75+
type: str
7376
# Need this directory_id/tenant_id when create UC storage credentials using service principal
74-
directory_id: str
77+
# it will be None if type is managed identity
78+
directory_id: str | None = None
7579

7680

7781
@dataclass
@@ -173,13 +177,17 @@ def _get_principal(self, principal_id: str) -> Principal | None:
173177
client_id = raw.get("appId")
174178
display_name = raw.get("displayName")
175179
object_id = raw.get("id")
180+
principal_type = raw.get("servicePrincipalType")
176181
# Need this directory_id/tenant_id when create UC storage credentials using service principal
177182
directory_id = raw.get("appOwnerOrganizationId")
178183
assert client_id is not None
179184
assert display_name is not None
180185
assert object_id is not None
181-
assert directory_id is not None
182-
self._principals[principal_id] = Principal(client_id, display_name, object_id, directory_id)
186+
assert principal_type is not None
187+
if principal_type == "Application":
188+
# service principal must have directory_id
189+
assert directory_id is not None
190+
self._principals[principal_id] = Principal(client_id, display_name, object_id, principal_type, directory_id)
183191
return self._principals[principal_id]
184192

185193
def role_assignments(

tests/integration/azure/test_credentials.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import base64
22
import re
33
from dataclasses import dataclass
4+
from datetime import timedelta
45

56
import pytest
67
from databricks.labs.blueprint.installation import MockInstallation
78
from databricks.labs.blueprint.tui import MockPrompts
9+
from databricks.sdk.errors import InternalError, NotFound
10+
from databricks.sdk.retries import retried
811

912
from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo
1013
from databricks.labs.ucx.azure.access import AzureResourcePermissions
@@ -68,6 +71,7 @@ def inner(
6871
'client_id': test_info.application_id,
6972
'principal': test_info.credential_name,
7073
'privilege': "READ_FILES" if read_only else "WRITE_FILES",
74+
'type': "Application",
7175
'directory_id': test_info.directory_id,
7276
},
7377
]
@@ -97,6 +101,7 @@ def inner(
97101
return inner
98102

99103

104+
@retried(on=[InternalError], timeout=timedelta(minutes=2))
100105
def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential_spn, run_migration):
101106
# create a storage credential for this test
102107
make_storage_credential_spn(
@@ -113,13 +118,24 @@ def test_spn_migration_existed_storage_credential(extract_test_info, make_storag
113118
assert not migration_result
114119

115120

121+
def save_delete_credential(ws, name):
122+
try:
123+
ws.storage_credentials.delete(name, force=True)
124+
except NotFound:
125+
# If test failed with exception threw before storage credential is created,
126+
# don't fail the test with storage credential cannot be deleted error,
127+
# instead let the original exception be reported.
128+
pass
129+
130+
131+
@retried(on=[InternalError], timeout=timedelta(minutes=2))
116132
@pytest.mark.parametrize("read_only", [False, True])
117133
def test_spn_migration(ws, extract_test_info, run_migration, read_only):
118134
try:
119135
migration_results = run_migration(extract_test_info, {"lets_migrate_the_spn"}, read_only)
120136
storage_credential = ws.storage_credentials.get(extract_test_info.credential_name)
121137
finally:
122-
ws.storage_credentials.delete(extract_test_info.credential_name, force=True)
138+
save_delete_credential(ws, extract_test_info.credential_name)
123139

124140
assert storage_credential is not None
125141
assert storage_credential.read_only is read_only

tests/unit/azure/azure/mappings.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
"appId": "appIduser2",
1010
"displayName": "disNameuser2",
1111
"id": "Iduser2",
12+
"servicePrincipalType": "Application",
1213
"appOwnerOrganizationId": "0000-0000"
1314
},
1415
"/v1.0/directoryObjects/user3": {
1516
"appId": "appIduser3",
1617
"displayName": "disNameuser3",
1718
"id": "Iduser3",
18-
"appOwnerOrganizationId": "0000-0000"
19+
"servicePrincipalType": "ManagedIdentity"
1920
},
2021
"/subscriptions": {
2122
"value": [

tests/unit/azure/test_access.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,13 @@ def test_save_spn_permissions_valid_azure_storage_account():
6868
AzureRoleAssignment(
6969
resource=AzureResource(f'{containers}/container1'),
7070
scope=AzureResource(f'{containers}/container1'),
71-
principal=Principal('a', 'b', 'c', '0000-0000'),
71+
principal=Principal('a', 'b', 'c', 'Application', '0000-0000'),
7272
role_name='Storage Blob Data Contributor',
7373
),
7474
AzureRoleAssignment(
7575
resource=AzureResource(f'{storage_accounts}/storage1'),
7676
scope=AzureResource(f'{storage_accounts}/storage1'),
77-
principal=Principal('d', 'e', 'f', '0000-0000'),
77+
principal=Principal('d', 'e', 'f', 'Application', '0000-0000'),
7878
role_name='Button Clicker',
7979
),
8080
]
@@ -88,13 +88,15 @@ def test_save_spn_permissions_valid_azure_storage_account():
8888
'prefix': 'abfss://[email protected]/',
8989
'principal': 'b',
9090
'privilege': 'WRITE_FILES',
91+
'type': 'Application',
9192
'directory_id': '0000-0000',
9293
},
9394
{
9495
'client_id': 'a',
9596
'prefix': 'abfss://[email protected]/',
9697
'principal': 'b',
9798
'privilege': 'WRITE_FILES',
99+
'type': 'Application',
98100
'directory_id': '0000-0000',
99101
},
100102
],
@@ -134,14 +136,14 @@ def test_save_spn_permissions_valid_storage_accounts(caplog, mocker, az_token):
134136
'prefix': 'abfss://[email protected]/',
135137
'principal': 'disNameuser3',
136138
'privilege': 'WRITE_FILES',
137-
'directory_id': '0000-0000',
139+
'type': 'ManagedIdentity',
138140
},
139141
{
140142
'client_id': 'appIduser3',
141143
'prefix': 'abfss://[email protected]/',
142144
'principal': 'disNameuser3',
143145
'privilege': 'WRITE_FILES',
144-
'directory_id': '0000-0000',
146+
'type': 'ManagedIdentity',
145147
},
146148
],
147149
)

tests/unit/azure/test_credentials.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,29 +60,40 @@ def installation():
6060
'client_id': 'app_secret1',
6161
'principal': 'principal_1',
6262
'privilege': 'WRITE_FILES',
63+
'type': 'Application',
6364
'directory_id': 'directory_id_1',
6465
},
6566
{
6667
'prefix': 'prefix2',
6768
'client_id': 'app_secret2',
6869
'principal': 'principal_read',
6970
'privilege': 'READ_FILES',
71+
'type': 'Application',
7072
'directory_id': 'directory_id_1',
7173
},
7274
{
7375
'prefix': 'prefix3',
7476
'client_id': 'app_secret3',
7577
'principal': 'principal_write',
7678
'privilege': 'WRITE_FILES',
79+
'type': 'Application',
7780
'directory_id': 'directory_id_2',
7881
},
7982
{
8083
'prefix': 'overlap_with_external_location',
8184
'client_id': 'app_secret4',
8285
'principal': 'principal_overlap',
8386
'privilege': 'WRITE_FILES',
87+
'type': 'Application',
8488
'directory_id': 'directory_id_2',
8589
},
90+
{
91+
'prefix': 'prefix5',
92+
'client_id': 'app_secret4',
93+
'principal': 'managed_identity',
94+
'privilege': 'WRITE_FILES',
95+
'type': 'ManagedIdentity',
96+
},
8697
],
8798
}
8899
)
@@ -160,6 +171,7 @@ def test_create_storage_credentials(credential_manager):
160171
"app_secret1",
161172
"principal_write",
162173
"WRITE_FILES",
174+
"Application",
163175
"directory_id_1",
164176
),
165177
"test",
@@ -170,6 +182,7 @@ def test_create_storage_credentials(credential_manager):
170182
"app_secret2",
171183
"principal_read",
172184
"READ_FILES",
185+
"Application",
173186
"directory_id_1",
174187
),
175188
"test",
@@ -185,7 +198,9 @@ def test_create_storage_credentials(credential_manager):
185198

186199

187200
def test_validate_storage_credentials(credential_manager):
188-
permission_mapping = StoragePermissionMapping("prefix", "client_id", "principal_1", "WRITE_FILES", "directory_id")
201+
permission_mapping = StoragePermissionMapping(
202+
"prefix", "client_id", "principal_1", "WRITE_FILES", "Application", "directory_id"
203+
)
189204

190205
# validate normal storage credential
191206
validation = credential_manager.validate(permission_mapping)
@@ -196,7 +211,7 @@ def test_validate_storage_credentials(credential_manager):
196211

197212
def test_validate_read_only_storage_credentials(credential_manager):
198213
permission_mapping = StoragePermissionMapping(
199-
"prefix", "client_id", "principal_read", "READ_FILES", "directory_id_1"
214+
"prefix", "client_id", "principal_read", "READ_FILES", "Application", "directory_id_1"
200215
)
201216

202217
# validate read-only storage credential
@@ -207,7 +222,9 @@ def test_validate_read_only_storage_credentials(credential_manager):
207222

208223

209224
def test_validate_storage_credentials_overlap_location(credential_manager):
210-
permission_mapping = StoragePermissionMapping("prefix", "client_id", "overlap", "WRITE_FILES", "directory_id_2")
225+
permission_mapping = StoragePermissionMapping(
226+
"prefix", "client_id", "overlap", "WRITE_FILES", "Application", "directory_id_2"
227+
)
211228

212229
# prefix used for validation overlaps with existing external location will raise InvalidParameterValue
213230
# assert InvalidParameterValue is handled
@@ -218,14 +235,18 @@ def test_validate_storage_credentials_overlap_location(credential_manager):
218235

219236

220237
def test_validate_storage_credentials_non_response(credential_manager):
221-
permission_mapping = StoragePermissionMapping("prefix", "client_id", "none", "WRITE_FILES", "directory_id")
238+
permission_mapping = StoragePermissionMapping(
239+
"prefix", "client_id", "none", "WRITE_FILES", "Application", "directory_id"
240+
)
222241

223242
validation = credential_manager.validate(permission_mapping)
224243
assert validation.failures == ["Validation returned none results."]
225244

226245

227246
def test_validate_storage_credentials_failed_operation(credential_manager):
228-
permission_mapping = StoragePermissionMapping("prefix", "client_id", "fail", "WRITE_FILES", "directory_id_2")
247+
permission_mapping = StoragePermissionMapping(
248+
"prefix", "client_id", "fail", "WRITE_FILES", "Application", "directory_id_2"
249+
)
229250

230251
validation = credential_manager.validate(permission_mapping)
231252
assert validation.failures == ["LIST validation failed with message: fail"]

tests/unit/azure/test_resources.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ def test_role_assignments_storage(mocker, az_token):
6161
assert len(role_assignments) == 1
6262
for role_assignment in role_assignments:
6363
assert role_assignment.role_name == "Contributor"
64-
assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2", "0000-0000")
64+
assert role_assignment.principal == Principal(
65+
"appIduser2", "disNameuser2", "Iduser2", "Application", "0000-0000"
66+
)
6567
assert str(role_assignment.scope) == resource_id
6668
assert role_assignment.resource == AzureResource(resource_id)
6769

@@ -75,6 +77,8 @@ def test_role_assignments_container(mocker, az_token):
7577
assert len(role_assignments) == 1
7678
for role_assignment in role_assignments:
7779
assert role_assignment.role_name == "Contributor"
78-
assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2", "0000-0000")
80+
assert role_assignment.principal == Principal(
81+
"appIduser2", "disNameuser2", "Iduser2", "Application", "0000-0000"
82+
)
7983
assert str(role_assignment.scope) == resource_id
8084
assert role_assignment.resource == AzureResource(resource_id)

tests/unit/test_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def ws():
4545
}
4646
),
4747
'/Users/foo/.ucx/state.json': json.dumps({'resources': {'jobs': {'assessment': '123'}}}),
48-
"/Users/foo/.ucx/azure_storage_account_info.csv": "prefix,client_id,principal,privilege,directory_id\ntest,test,test,test,test",
48+
"/Users/foo/.ucx/azure_storage_account_info.csv": "prefix,client_id,principal,privilege,type,directory_id\ntest,test,test,test,Application,test",
4949
}
5050

5151
def download(path: str) -> io.StringIO | io.BytesIO:

0 commit comments

Comments
 (0)