Skip to content

Commit c5e20d9

Browse files
FastLeenfx
andauthored
Fixed guess_external_locations failure with AttributeError: as_dict and added an integration test (#259)
Created unit testing Still seeing weird behavior of the Row class. Possibly related to Python version. Fix #256 --------- Co-authored-by: Serge Smertin <[email protected]>
1 parent e62bedd commit c5e20d9

File tree

2 files changed

+33
-3
lines changed

2 files changed

+33
-3
lines changed

src/databricks/labs/ucx/hive_metastore/data_objects.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
88
from databricks.labs.ucx.hive_metastore.list_mounts import Mounts
9+
from databricks.labs.ucx.mixins.sql import Row
910

1011

1112
@dataclass
@@ -20,9 +21,9 @@ def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
2021
super().__init__(sbe, "hive_metastore", schema, "external_locations")
2122
self._ws = ws
2223

23-
def _external_locations(self, tables, mounts):
24+
def _external_locations(self, tables: list[Row], mounts) -> list[ExternalLocation]:
2425
min_slash = 2
25-
external_locations = []
26+
external_locations: list[ExternalLocation] = []
2627
for table in tables:
2728
location = table.as_dict()["location"]
2829
if location is not None and len(location) > 0:
@@ -52,7 +53,7 @@ def _external_locations(self, tables, mounts):
5253
return external_locations
5354

5455
def _external_location_list(self):
55-
tables = self._backend.fetch(f"SELECT location FROM {self._schema}.tables WHERE location IS NOT NULL")
56+
tables = list(self._backend.fetch(f"SELECT location FROM {self._schema}.tables WHERE location IS NOT NULL"))
5657
mounts = Mounts(self._backend, self._ws, self._schema).snapshot()
5758
return self._external_locations(list(tables), list(mounts))
5859

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import logging
2+
import os
3+
4+
from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
5+
from databricks.labs.ucx.hive_metastore.data_objects import ExternalLocationCrawler
6+
from databricks.labs.ucx.hive_metastore.list_mounts import Mount
7+
from databricks.labs.ucx.hive_metastore.tables import Table
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
def test_table_inventory(ws, make_warehouse, make_schema):
13+
warehouse_id = os.environ["TEST_DEFAULT_WAREHOUSE_ID"]
14+
15+
logger.info("setting up fixtures")
16+
sbe = StatementExecutionBackend(ws, warehouse_id)
17+
tables = [
18+
Table("hive_metastore", "foo", "bar", "MANAGED", "delta", location="s3://test_location/test1/table1"),
19+
Table("hive_metastore", "foo", "bar", "EXTERNAL", "delta", location="s3://test_location/test2/table2"),
20+
Table("hive_metastore", "foo", "bar", "EXTERNAL", "delta", location="dbfs:/mnt/foo/test3/table3"),
21+
]
22+
schema = make_schema()
23+
sbe.save_table(f"{schema}.tables", tables)
24+
sbe.save_table(f"{schema}.mounts", [Mount("/mnt/foo", "s3://bar")])
25+
26+
crawler = ExternalLocationCrawler(ws, sbe, schema.split(".")[1])
27+
results = crawler.snapshot()
28+
assert len(results) == 2
29+
assert results[1].location == "s3://bar/test3/"

0 commit comments

Comments
 (0)