Skip to content

Commit a3ba965

Browse files
authored
Bug: Generate custom warning when doing table size check and encountering DELTA_INVALID_FORMAT exception (#2426)
## Changes When performing table size, we are getting exception for table with DELTA_INVALID_FORMAT , this PR converts the error to a warning and proceeds with rest of the table Resolves #1913 ### Functionality - [ ] added relevant user documentation - [ ] added new CLI command - [ ] modified existing command: `databricks labs ucx ...` - [ ] added a new workflow - [ ] modified existing workflow: `...` - [ ] added a new table - [ ] modified existing table: `...` ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [ ] manually tested - [X] added unit tests - [ ] added integration tests - [ ] verified on staging environment (screenshot attached)
1 parent ac779ef commit a3ba965

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

src/databricks/labs/ucx/hive_metastore/table_size.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ def _safe_get_table_size(self, table_full_name: str) -> int | None:
7979
if "[TABLE_OR_VIEW_NOT_FOUND]" in str(e) or "[DELTA_TABLE_NOT_FOUND]" in str(e):
8080
logger.warning(f"Failed to evaluate {table_full_name} table size. Table not found.")
8181
return None
82+
if "[DELTA_INVALID_FORMAT]" in str(e):
83+
logger.warning(
84+
f"Unable to read Delta table {table_full_name}, please check table structure and try again."
85+
)
86+
return None
8287
if "[DELTA_MISSING_TRANSACTION_LOG]" in str(e):
8388
logger.warning(f"Delta table {table_full_name} is corrupted: missing transaction log.")
8489
return None

tests/unit/hive_metastore/test_table_size.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,26 @@ def test_table_size_when_table_corrupted(mocker):
130130
results = tsc.snapshot()
131131

132132
assert len(results) == 0
133+
134+
135+
def test_table_size_when_delta_invalid_format_error(mocker):
136+
errors = {}
137+
rows = {
138+
"table_size": [],
139+
"hive_metastore.inventory_database.tables": [
140+
("hive_metastore", "db1", "table1", "MANAGED", "DELTA", "dbfs:/location/table", None),
141+
],
142+
"SHOW DATABASES": [("db1",)],
143+
}
144+
backend = MockBackend(fails_on_first=errors, rows=rows)
145+
pyspark_sql_session = mocker.Mock()
146+
sys.modules["pyspark.sql.session"] = pyspark_sql_session
147+
tsc = TableSizeCrawler(backend, "inventory_database")
148+
149+
tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
150+
"[DELTA_INVALID_FORMAT]"
151+
)
152+
153+
results = tsc.snapshot()
154+
155+
assert len(results) == 0

0 commit comments

Comments
 (0)