Skip to content

Commit e214477

Browse files
authored
Removed pydantic dependency (#138)
1 parent 999678a commit e214477

File tree

11 files changed

+109
-53
lines changed

11 files changed

+109
-53
lines changed

pyproject.toml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,12 @@ classifiers = [
2727
]
2828
dependencies = [
2929
"databricks-sdk~=0.7.0",
30-
"typer[all]>=0.9.0,<0.10.0",
31-
"pyhocon>=0.3.60,<0.4.0",
32-
"pydantic>=2.0.3, <3.0.0",
3330
"PyYAML>=6.0.0,<7.0.0",
34-
"ratelimit>=2.2.1,<3.0.0",
31+
32+
# TODO: remove later
33+
"typer[all]>=0.9.0,<0.10.0",
3534
"pandas>=2.0.3,<3.0.0",
36-
"python-dotenv>=1.0.0,<=2.0.0",
35+
"ratelimit>=2.2.1,<3.0.0",
3736
"tenacity>=8.2.2,<9.0.0",
3837
]
3938

src/databricks/labs/ucx/cli/app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99

1010
@app.command()
1111
def migrate_groups(config_file: Annotated[Path, typer.Argument(help="Path to config file")] = "migration_config.yml"):
12-
from databricks.labs.ucx.cli.utils import get_migration_config
12+
from databricks.labs.ucx.config import MigrationConfig
1313
from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit
1414

15-
config = get_migration_config(config_file)
15+
config = MigrationConfig.from_file(config_file)
1616
toolkit = GroupMigrationToolkit(config)
1717
toolkit.prepare_environment()
1818

src/databricks/labs/ucx/cli/utils.py

Lines changed: 0 additions & 11 deletions
This file was deleted.

src/databricks/labs/ucx/config.py

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
from dataclasses import dataclass
2+
from pathlib import Path
3+
14
from databricks.sdk.core import Config
2-
from pydantic import RootModel
3-
from pydantic.dataclasses import dataclass
45

56
from databricks.labs.ucx.__about__ import __version__
67

@@ -17,6 +18,10 @@ def __repr__(self):
1718
def to_spark(self):
1819
return self.__repr__()
1920

21+
@classmethod
22+
def from_dict(cls, raw: dict):
23+
return cls(**raw)
24+
2025

2126
@dataclass
2227
class GroupsConfig:
@@ -32,11 +37,19 @@ def __post_init__(self):
3237
msg = "No selected groups provided, but auto-collection is disabled"
3338
raise ValueError(msg)
3439

40+
@classmethod
41+
def from_dict(cls, raw: dict):
42+
return cls(**raw)
43+
3544

3645
@dataclass
3746
class InventoryConfig:
3847
table: InventoryTable
3948

49+
@classmethod
50+
def from_dict(cls, raw: dict):
51+
return cls(table=InventoryTable.from_dict(raw.get("table")))
52+
4053

4154
@dataclass
4255
class ConnectConfig:
@@ -72,6 +85,10 @@ def from_databricks_config(cfg: Config) -> "ConnectConfig":
7285
rate_limit=cfg.rate_limit,
7386
)
7487

88+
@classmethod
89+
def from_dict(cls, raw: dict):
90+
return cls(**raw)
91+
7592

7693
@dataclass
7794
class MigrationConfig:
@@ -83,10 +100,46 @@ class MigrationConfig:
83100
log_level: str | None = "INFO"
84101

85102
def __post_init__(self):
103+
if self.connect is None:
104+
self.connect = ConnectConfig()
86105
if self.with_table_acls:
87106
msg = "Table ACLS are not yet implemented"
88107
raise NotImplementedError(msg)
89108

109+
def as_dict(self) -> dict:
110+
from dataclasses import fields, is_dataclass
111+
112+
def inner(x):
113+
if is_dataclass(x):
114+
result = []
115+
for f in fields(x):
116+
value = inner(getattr(x, f.name))
117+
if not value:
118+
continue
119+
result.append((f.name, value))
120+
return dict(result)
121+
return x
122+
123+
return inner(self)
124+
125+
@classmethod
126+
def from_dict(cls, raw: dict) -> "MigrationConfig":
127+
return cls(
128+
inventory=InventoryConfig.from_dict(raw.get("inventory", {})),
129+
with_table_acls=raw.get("with_table_acls", False),
130+
groups=GroupsConfig.from_dict(raw.get("groups", {})),
131+
connect=ConnectConfig.from_dict(raw.get("connect", {})),
132+
num_threads=raw.get("num_threads", 4),
133+
log_level=raw.get("log_level", "INFO"),
134+
)
135+
136+
@classmethod
137+
def from_file(cls, config_file: Path) -> "MigrationConfig":
138+
from yaml import safe_load
139+
140+
raw = safe_load(config_file.read_text())
141+
return MigrationConfig.from_dict({} if not raw else raw)
142+
90143
def to_databricks_config(self) -> Config:
91144
connect = self.connect
92145
if connect is None:
@@ -109,6 +162,3 @@ def to_databricks_config(self) -> Config:
109162
product="ucx",
110163
product_version=__version__,
111164
)
112-
113-
def to_json(self) -> str:
114-
return RootModel[MigrationConfig](self).model_dump_json(indent=4)

src/databricks/labs/ucx/inventory/inventorizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def _prepare_permissions_inventory_item(self, scope: SecretScope) -> Permissions
208208
object_id=scope.name,
209209
logical_object_type=LogicalObjectType.SECRET_SCOPE,
210210
request_object_type=None,
211-
raw_object_permissions=json.dumps(acls_container.model_dump(mode="json")),
211+
raw_object_permissions=json.dumps(acls_container.as_dict()),
212212
)
213213

214214
def inventorize(self) -> list[PermissionsInventoryItem]:

src/databricks/labs/ucx/inventory/permissions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ def _scope_permissions_applicator(self, request_payload: SecretsPermissionReques
184184
scope=request_payload.object_id, principal=_acl_item.principal, permission=_acl_item.permission
185185
)
186186
logger.debug(f"Applied new permissions for scope {request_payload.object_id}: {_acl_item}")
187+
# TODO: add mixin to SDK
187188
# in-flight check for the applied permissions
188189
# the api might be inconsistent, therefore we need to check that the permissions were applied
189190
for _ in range(3):

src/databricks/labs/ucx/inventory/table.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def cleanup(self):
4747
def save(self, items: list[PermissionsInventoryItem]):
4848
# TODO: update instead of append
4949
logger.info(f"Saving {len(items)} items to inventory table {self.config.table}")
50-
serialized_items = pd.DataFrame([item.model_dump(mode="json") for item in items])
50+
serialized_items = pd.DataFrame([item.as_dict() for item in items])
5151
df = self.spark.createDataFrame(serialized_items, schema=self._table_schema)
5252
df.write.mode("append").format("delta").saveAsTable(self.config.table.to_spark())
5353
logger.info("Successfully saved the items to inventory table")

src/databricks/labs/ucx/inventory/types.py

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import json
2+
from dataclasses import asdict, dataclass
23

34
import pandas as pd
45
from databricks.sdk.service.iam import ObjectPermissions
56
from databricks.sdk.service.workspace import AclItem as SdkAclItem
67
from databricks.sdk.service.workspace import AclPermission as SdkAclPermission
7-
from pydantic import BaseModel
8-
from pydantic.tools import parse_obj_as
98

109
from databricks.labs.ucx.generic import StrEnum
1110

@@ -30,16 +29,6 @@ def __repr__(self):
3029
return self.value
3130

3231

33-
class SqlRequestObjectType(StrEnum):
34-
ALERTS = "alerts"
35-
DASHBOARDS = "dashboards"
36-
DATA_SOURCES = "data-sources"
37-
QUERIES = "queries"
38-
39-
def __repr__(self):
40-
return self.value
41-
42-
4332
class LogicalObjectType(StrEnum):
4433
ENTITLEMENTS = "ENTITLEMENTS"
4534
ROLES = "ROLES"
@@ -69,12 +58,18 @@ class AclPermission(StrEnum):
6958
MANAGE = "MANAGE"
7059

7160

72-
class AclItem(BaseModel):
61+
@dataclass
62+
class AclItem:
7363
principal: str
7464
permission: AclPermission
7565

66+
@classmethod
67+
def from_dict(cls, raw: dict):
68+
return cls(principal=raw.get("principal", None), permission=AclPermission(raw.get("permission")))
69+
7670

77-
class AclItemsContainer(BaseModel):
71+
@dataclass
72+
class AclItemsContainer:
7873
acls: list[AclItem]
7974

8075
@staticmethod
@@ -89,16 +84,25 @@ def to_sdk(self) -> list[SdkAclItem]:
8984
SdkAclItem(principal=acl.principal, permission=SdkAclPermission(acl.permission.value)) for acl in self.acls
9085
]
9186

87+
@classmethod
88+
def from_dict(cls, raw: dict) -> "AclItemsContainer":
89+
return cls(acls=[AclItem.from_dict(a) for a in raw.get("acls", [])])
90+
91+
def as_dict(self) -> dict:
92+
return asdict(self)
93+
9294

93-
class RolesAndEntitlements(BaseModel):
95+
@dataclass
96+
class RolesAndEntitlements:
9497
roles: list
9598
entitlements: list
9699

97100

98-
class PermissionsInventoryItem(BaseModel):
101+
@dataclass
102+
class PermissionsInventoryItem:
99103
object_id: str
100104
logical_object_type: LogicalObjectType
101-
request_object_type: RequestObjectType | SqlRequestObjectType | None
105+
request_object_type: RequestObjectType | None
102106
raw_object_permissions: str
103107

104108
@property
@@ -108,13 +112,27 @@ def object_permissions(self) -> dict:
108112
@property
109113
def typed_object_permissions(self) -> ObjectPermissions | AclItemsContainer | RolesAndEntitlements:
110114
if self.logical_object_type == LogicalObjectType.SECRET_SCOPE:
111-
return parse_obj_as(AclItemsContainer, self.object_permissions)
115+
return AclItemsContainer.from_dict(self.object_permissions)
112116
elif self.logical_object_type in [LogicalObjectType.ROLES, LogicalObjectType.ENTITLEMENTS]:
113-
return parse_obj_as(RolesAndEntitlements, self.object_permissions)
117+
return RolesAndEntitlements(**self.object_permissions)
114118
else:
115119
return ObjectPermissions.from_dict(self.object_permissions)
116120

117121
@staticmethod
118122
def from_pandas(source: pd.DataFrame) -> list["PermissionsInventoryItem"]:
119123
items = source.to_dict(orient="records")
120-
return [PermissionsInventoryItem(**item) for item in items]
124+
return [PermissionsInventoryItem.from_dict(item) for item in items]
125+
126+
def as_dict(self) -> dict:
127+
return asdict(self)
128+
129+
@classmethod
130+
def from_dict(cls, raw: dict) -> "PermissionsInventoryItem":
131+
return cls(
132+
object_id=raw["object_id"],
133+
logical_object_type=LogicalObjectType(raw["logical_object_type"]),
134+
request_object_type=RequestObjectType(raw["request_object_type"])
135+
if raw.get("request_object_type", None) is not None
136+
else None,
137+
raw_object_permissions=raw.get("raw_object_permissions", None),
138+
)

tests/integration/test_e2e.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,6 @@ def test_e2e(
176176
with_table_acls=False,
177177
inventory=InventoryConfig(table=inventory_table),
178178
groups=GroupsConfig(selected=[g[0].display_name for g in env.groups]),
179-
auth=None,
180179
log_level="DEBUG",
181180
)
182181
toolkit = GroupMigrationToolkit(config)

tests/unit/test_config.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55

66
import pytest
77
import yaml
8-
from pydantic import RootModel
98

10-
from databricks.labs.ucx.cli.utils import get_migration_config
119
from databricks.labs.ucx.config import (
1210
GroupsConfig,
1311
InventoryConfig,
@@ -61,8 +59,9 @@ def test_reader(tmp_path: Path):
6159
config: MigrationConfig = mc(with_table_acls=False)
6260
config_file = tmp_path / "config.yml"
6361

62+
as_dict = config.as_dict()
6463
with config_file.open("w") as writable:
65-
yaml.safe_dump(RootModel[MigrationConfig](config).model_dump(), writable)
64+
yaml.safe_dump(as_dict, writable)
6665

67-
loaded = get_migration_config(config_file)
66+
loaded = MigrationConfig.from_file(config_file)
6867
assert loaded == config

0 commit comments

Comments
 (0)