databrickslabs
diff --git a/‎examples/migration_config.yml‎
Lines changed: 0 additions & 3 deletions b/‎examples/migration_config.yml‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎notebooks/toolkit.py‎
Lines changed: 6 additions & 15 deletions b/‎notebooks/toolkit.py‎
Lines changed: 6 additions & 15 deletions
diff --git a/‎src/databricks/labs/ucx/assessment/crawlers.py‎
Lines changed: 1 addition & 2 deletions b/‎src/databricks/labs/ucx/assessment/crawlers.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/databricks/labs/ucx/config.py‎
Lines changed: 2 additions & 22 deletions b/‎src/databricks/labs/ucx/config.py‎
Lines changed: 2 additions & 22 deletions
diff --git a/‎src/databricks/labs/ucx/hive_metastore/__init__.py‎
Lines changed: 4 additions & 2 deletions b/‎src/databricks/labs/ucx/hive_metastore/__init__.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/databricks/labs/ucx/hive_metastore/grants.py‎
Lines changed: 16 additions & 22 deletions b/‎src/databricks/labs/ucx/hive_metastore/grants.py‎
Lines changed: 16 additions & 22 deletions
diff --git a/‎src/databricks/labs/ucx/hive_metastore/table_acls.py‎
Lines changed: 0 additions & 48 deletions b/‎src/databricks/labs/ucx/hive_metastore/table_acls.py‎
Lines changed: 0 additions & 48 deletions
diff --git a/‎src/databricks/labs/ucx/hive_metastore/tables.py‎
Lines changed: 21 additions & 21 deletions b/‎src/databricks/labs/ucx/hive_metastore/tables.py‎
Lines changed: 21 additions & 21 deletions
diff --git a/‎src/databricks/labs/ucx/install.py‎
Lines changed: 1 addition & 2 deletions b/‎src/databricks/labs/ucx/install.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/databricks/labs/ucx/runtime.py‎
Lines changed: 5 additions & 6 deletions b/‎src/databricks/labs/ucx/runtime.py‎
Lines changed: 5 additions & 6 deletions
@@ -1,8 +1,5 @@
 inventory_database: unity_catalog_migration
 
-tacl:
-  databases: [ "default" ]
-
 warehouse_id: None
 
 groups:
 
@@ -15,10 +15,10 @@
 from databricks.labs.ucx.config import (
     GroupsConfig,
     MigrationConfig,
-    TaclConfig,
 )
+from databricks.labs.ucx.framework.crawlers import RuntimeBackend
 from databricks.labs.ucx.workspace_access import GroupMigrationToolkit
-from databricks.labs.ucx.hive_metastore import TaclToolkit
+from databricks.labs.ucx.hive_metastore import TablesCrawler, GrantsCrawler
 
 # COMMAND ----------
 
@@ -40,22 +40,13 @@
         # use this option to select all groups automatically
         # auto=True
     ),
-    tacl=TaclConfig(
-        # use this option to select specific databases manually
-        databases=databases,
-        # use this option to select all databases automatically
-        # auto=True
-    ),
     log_level="DEBUG",
 )
 
 toolkit = GroupMigrationToolkit(config)
-tacltoolkit = TaclToolkit(
-    toolkit._ws,
-    inventory_catalog="hive_metastore",
-    inventory_schema=config.inventory_database,
-    databases=config.tacl.databases,
-)
+backend = RuntimeBackend()
+tables = TablesCrawler(backend, config.inventory_database)
+grants = GrantsCrawler(tables)
 
 # COMMAND ----------
 
@@ -100,7 +91,7 @@
 
 # COMMAND ----------
 
-tacltoolkit.grants_snapshot()
+grants.snapshot()
 
 # COMMAND ----------
 
 
@@ -4,8 +4,7 @@
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.service.jobs import BaseJob
 
-from databricks.labs.ucx.framework.crawlers import CrawlerBase
-from databricks.labs.ucx.hive_metastore.table_acls import SqlBackend
+from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
 
 INCOMPATIBLE_SPARK_CONFIG_KEYS = [
     "spark.databricks.passthrough.enabled",
 
@@ -68,37 +68,18 @@ def from_dict(cls, raw: dict):
         return cls(**raw)
 
 
-@dataclass
-class TaclConfig:
-    databases: list[str] | None = None
-    auto: bool | None = None
-
-    def __post_init__(self):
-        if not self.databases and self.auto is None:
-            msg = "Either selected or auto must be set"
-            raise ValueError(msg)
-        if self.databases and self.auto is False:
-            msg = "No selected groups provided, but auto-collection is disabled"
-            raise ValueError(msg)
-
-    @classmethod
-    def from_dict(cls, raw: dict):
-        return cls(**raw)
-
-
 # Used to set the right expectation about configuration file schema
 _CONFIG_VERSION = 1
 
 
 @dataclass
 class MigrationConfig:
     inventory_database: str
-    tacl: TaclConfig
     groups: GroupsConfig
     instance_pool_id: str = None
     warehouse_id: str = None
     connect: ConnectConfig | None = None
-    num_threads: int | None = 4
+    num_threads: int | None = 10
     log_level: str | None = "INFO"
 
     # Starting path for notebooks and directories crawler
@@ -137,12 +118,11 @@ def from_dict(cls, raw: dict) -> "MigrationConfig":
             raise ValueError(msg)
         return cls(
             inventory_database=raw.get("inventory_database"),
-            tacl=TaclConfig.from_dict(raw.get("tacl", {})),
             groups=GroupsConfig.from_dict(raw.get("groups", {})),
             connect=ConnectConfig.from_dict(raw.get("connect", {})),
             instance_pool_id=raw.get("instance_pool_id", None),
             warehouse_id=raw.get("warehouse_id", None),
-            num_threads=raw.get("num_threads", 8),
+            num_threads=raw.get("num_threads", 10),
             log_level=raw.get("log_level", "INFO"),
         )
 
 
@@ -1,3 +1,5 @@
-from databricks.labs.ucx.hive_metastore.table_acls import TaclToolkit
+from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler
+from databricks.labs.ucx.hive_metastore.list_mounts import Mounts
+from databricks.labs.ucx.hive_metastore.tables import TablesCrawler
 
-__all__ = ["TaclToolkit"]
+__all__ = ["TablesCrawler", "GrantsCrawler", "Mounts"]
@@ -122,28 +122,21 @@ def __init__(self, tc: TablesCrawler):
         super().__init__(tc._backend, tc._catalog, tc._schema, "grants")
         self._tc = tc
 
-    def snapshot(self, catalog: str, database: str) -> list[Grant]:
-        return self._snapshot(partial(self._try_load, catalog, database), partial(self._crawl, catalog, database))
+    def snapshot(self) -> list[Grant]:
+        return self._snapshot(partial(self._try_load), partial(self._crawl))
 
-    def _try_load(self, catalog: str, database: str):
-        for row in self._fetch(
-            f'SELECT * FROM {self._full_name} WHERE catalog = "{catalog}" AND database = "{database}"'
-        ):
+    def _try_load(self):
+        for row in self._fetch(f"SELECT * FROM {self._full_name}"):
             yield Grant(*row)
 
-    def _crawl(self, catalog: str, database: str) -> list[Grant]:
+    def _crawl(self) -> list[Grant]:
         """
-        Crawls and lists grants for tables and views within the specified catalog and database.
-
-        Args:
-            catalog (str): The catalog name.
-            database (str): The database name.
+        Crawls and lists grants for all databases, tables, and views within hive_metastore.
 
         Returns:
             list[Grant]: A list of Grant objects representing the listed grants.
 
         Behavior:
-        - Validates and prepares the provided catalog and database names.
         - Constructs a list of tasks to fetch grants using the `_grants` method, including both database-wide and
           table/view-specific grants.
         - Iterates through tables in the specified database using the `_tc.snapshot` method.
@@ -156,21 +149,22 @@ def _crawl(self, catalog: str, database: str) -> list[Grant]:
           database, table, view).
 
         Returns:
-        list[Grant]: A list of Grant objects representing the grants found in the specified catalog and database.
+        list[Grant]: A list of Grant objects representing the grants found in hive_metastore.
         """
-        catalog = self._valid(catalog)
-        database = self._valid(database)
-        tasks = [partial(self._grants, catalog=catalog), partial(self._grants, catalog=catalog, database=database)]
-        for table in self._tc.snapshot(catalog, database):
-            fn = partial(self._grants, catalog=catalog, database=database)
+        seen_databases = set()
+        catalog = "hive_metastore"
+        tasks = [partial(self._grants, catalog=catalog)]
+        for table in self._tc.snapshot():
+            if table.database not in seen_databases:
+                tasks.append(partial(self._grants, catalog=catalog, database=table.database))
+                seen_databases.add(table.database)
+            fn = partial(self._grants, catalog=catalog, database=table.database)
             if table.kind == "VIEW":
                 tasks.append(partial(fn, view=table.name))
             else:
                 tasks.append(partial(fn, table=table.name))
         return [
-            grant
-            for grants in ThreadedExecution.gather(f"listing grants for {catalog}.{database}", tasks)
-            for grant in grants
+            grant for grants in ThreadedExecution.gather(f"listing grants for {catalog}", tasks) for grant in grants
         ]
 
     def _grants(
 
@@ -70,54 +70,54 @@ def uc_create_sql(self, catalog):
 
 
 class TablesCrawler(CrawlerBase):
-    def __init__(self, backend: SqlBackend, catalog, schema):
+    def __init__(self, backend: SqlBackend, schema):
         """
         Initializes a TablesCrawler instance.
 
         Args:
             backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
-            catalog (str): The catalog name for the inventory persistence.
             schema: The schema name for the inventory persistence.
         """
-        super().__init__(backend, catalog, schema, "tables")
+        super().__init__(backend, "hive_metastore", schema, "tables")
 
     def _all_databases(self) -> Iterator[Row]:
         yield from self._fetch("SHOW DATABASES")
 
-    def snapshot(self, catalog: str, database: str) -> list[Table]:
+    def snapshot(self) -> list[Table]:
         """
         Takes a snapshot of tables in the specified catalog and database.
 
-        Args:
-            catalog (str): The catalog name.
-            database (str): The database name.
-
         Returns:
             list[Table]: A list of Table objects representing the snapshot of tables.
         """
-        return self._snapshot(partial(self._try_load, catalog, database), partial(self._crawl, catalog, database))
+        return self._snapshot(partial(self._try_load), partial(self._crawl))
 
-    def _try_load(self, catalog: str, database: str):
+    def _try_load(self):
         """Tries to load table information from the database or throws TABLE_OR_VIEW_NOT_FOUND error"""
-        for row in self._fetch(
-            f'SELECT * FROM {self._full_name} WHERE catalog = "{catalog}" AND database = "{database}"'
-        ):
+        for row in self._fetch(f"SELECT * FROM {self._full_name}"):
             yield Table(*row)
 
-    def _crawl(self, catalog: str, database: str) -> list[Table]:
+    def _crawl(self) -> list[Table]:
         """Crawls and lists tables within the specified catalog and database.
 
         After performing initial scan of all tables, starts making parallel
         DESCRIBE TABLE EXTENDED queries for every table.
+
+        Production tasks would most likely be executed through `tables.scala`
+        within `crawl_tables` task due to `spark.sharedState.externalCatalog`
+        lower-level APIs not requiring a roundtrip to storage, which is not
+        possible for Azure storage with credentials supplied through Spark
+        conf (see https://github.com/databrickslabs/ucx/issues/249).
+
+        See also https://github.com/databrickslabs/ucx/issues/247
         """
-        catalog = self._valid(catalog)
-        database = self._valid(database)
-        logger.debug(f"[{catalog}.{database}] listing tables")
         tasks = []
-        for _, table, _is_tmp in self._fetch(f"SHOW TABLES FROM {catalog}.{database}"):
-            tasks.append(partial(self._describe, catalog, database, table))
-        results = ThreadedExecution.gather(f"listing tables in {catalog}.{database}", tasks)
-
+        catalog = "hive_metastore"
+        for (database,) in self._all_databases():
+            logger.debug(f"[{catalog}.{database}] listing tables")
+            for _, table, _is_tmp in self._fetch(f"SHOW TABLES FROM {catalog}.{database}"):
+                tasks.append(partial(self._describe, catalog, database, table))
+        results = ThreadedExecution.gather(f"listing tables in {catalog}", tasks)
         return [x for x in results if x is not None]
 
     def _describe(self, catalog: str, database: str, table: str) -> Table | None:
 
@@ -17,7 +17,7 @@
 from databricks.sdk.service.workspace import ImportFormat
 
 from databricks.labs.ucx.__about__ import __version__
-from databricks.labs.ucx.config import GroupsConfig, MigrationConfig, TaclConfig
+from databricks.labs.ucx.config import GroupsConfig, MigrationConfig
 from databricks.labs.ucx.framework.dashboards import DashboardFromFiles
 from databricks.labs.ucx.framework.tasks import _TASKS, Task
 from databricks.labs.ucx.runtime import main
@@ -199,7 +199,6 @@ def _configure(self):
         self._config = MigrationConfig(
             inventory_database=inventory_database,
             groups=GroupsConfig(**groups_config_args),
-            tacl=TaclConfig(auto=True),
             warehouse_id=warehouse_id,
             log_level=log_level,
             num_threads=num_threads,
 
@@ -8,7 +8,7 @@
 from databricks.labs.ucx.config import MigrationConfig
 from databricks.labs.ucx.framework.crawlers import RuntimeBackend
 from databricks.labs.ucx.framework.tasks import task, trigger
-from databricks.labs.ucx.hive_metastore import TaclToolkit
+from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler
 from databricks.labs.ucx.hive_metastore.data_objects import ExternalLocationCrawler
 from databricks.labs.ucx.hive_metastore.list_mounts import Mounts
 from databricks.labs.ucx.workspace_access import GroupMigrationToolkit
@@ -48,11 +48,10 @@ def crawl_grants(cfg: MigrationConfig):
     setup. This approach not only safeguards data integrity and access control but also ensures a smooth and
     secure transition for our data assets. It reinforces our commitment to data security and compliance throughout the
     migration process and beyond"""
-    ws = WorkspaceClient(config=cfg.to_databricks_config())
-    tacls = TaclToolkit(
-        ws, inventory_catalog="hive_metastore", inventory_schema=cfg.inventory_database, databases=cfg.tacl.databases
-    )
-    tacls.grants_snapshot()
+    backend = RuntimeBackend()
+    tables = TablesCrawler(backend, cfg.inventory_database)
+    grants = GrantsCrawler(tables)
+    grants.snapshot()
 
 
 @task("assessment", depends_on=[setup_schema])