|
1 | 1 | import logging |
2 | 2 |
|
3 | | -from databricks.sdk import WorkspaceClient |
4 | | - |
5 | 3 | from databricks.labs.ucx.inventory.types import PermissionsInventoryItem |
6 | | -from databricks.labs.ucx.providers.spark import SparkMixin |
| 4 | +from databricks.labs.ucx.tacl._internal import CrawlerBase, SqlBackend |
7 | 5 |
|
8 | 6 | logger = logging.getLogger(__name__) |
9 | 7 |
|
10 | 8 |
|
11 | | -class PermissionsInventoryTable(SparkMixin): |
12 | | - def __init__(self, inventory_database: str, ws: WorkspaceClient): |
13 | | - super().__init__(ws) |
14 | | - self._table = f"hive_metastore.{inventory_database}.permissions" |
15 | | - |
16 | | - @property |
17 | | - def _table_schema(self): |
18 | | - from pyspark.sql.types import StringType, StructField, StructType |
19 | | - |
20 | | - return StructType( |
21 | | - [ |
22 | | - StructField("object_id", StringType(), True), |
23 | | - StructField("support", StringType(), True), |
24 | | - StructField("raw_object_permissions", StringType(), True), |
25 | | - ] |
26 | | - ) |
27 | | - |
28 | | - @property |
29 | | - def _df(self): |
30 | | - return self.spark.table(self._table) |
| 9 | +class PermissionsInventoryTable(CrawlerBase): |
| 10 | + def __init__(self, backend: SqlBackend, inventory_database: str): |
| 11 | + super().__init__(backend, "hive_metastore", inventory_database, "permissions") |
31 | 12 |
|
32 | 13 | def cleanup(self): |
33 | | - logger.info(f"Cleaning up inventory table {self._table}") |
34 | | - self.spark.sql(f"DROP TABLE IF EXISTS {self._table}") |
| 14 | + logger.info(f"Cleaning up inventory table {self._full_name}") |
| 15 | + self._exec(f"DROP TABLE IF EXISTS {self._full_name}") |
35 | 16 | logger.info("Inventory table cleanup complete") |
36 | 17 |
|
37 | 18 | def save(self, items: list[PermissionsInventoryItem]): |
38 | 19 | # TODO: update instead of append |
39 | | - logger.info(f"Saving {len(items)} items to inventory table {self._table}") |
40 | | - serialized_items = [item.as_dict() for item in items] |
41 | | - df = self.spark.createDataFrame(serialized_items, schema=self._table_schema) |
42 | | - df.write.mode("append").format("delta").saveAsTable(self._table) |
| 20 | + logger.info(f"Saving {len(items)} items to inventory table {self._full_name}") |
| 21 | + self._append_records(PermissionsInventoryItem, items) |
43 | 22 | logger.info("Successfully saved the items to inventory table") |
44 | 23 |
|
45 | 24 | def load_all(self) -> list[PermissionsInventoryItem]: |
46 | | - logger.info(f"Loading inventory table {self._table}") |
47 | | - df = self._df.toPandas() |
48 | | - |
49 | | - logger.info("Successfully loaded the inventory table") |
50 | | - return PermissionsInventoryItem.from_pandas(df) |
| 25 | + logger.info(f"Loading inventory table {self._full_name}") |
| 26 | + return [ |
| 27 | + PermissionsInventoryItem(object_id, support, raw_object_permissions) |
| 28 | + for object_id, support, raw_object_permissions in self._fetch( |
| 29 | + f"SELECT object_id, support, raw_object_permissions FROM {self._full_name}" |
| 30 | + ) |
| 31 | + ] |
0 commit comments