Skip to content

Commit 0bf43e7

Browse files
authored
Added a column to $inventory.tables to specify if a table might have been synchronised to Unity Catalog already or not (#306)
Closes #303
1 parent e516998 commit 0bf43e7

File tree

4 files changed

+44
-16
lines changed

4 files changed

+44
-16
lines changed

docs/table_persistence.md

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,17 @@ Table Utilization:
2424

2525
Holds Inventory of all tables in all databases and their relevant metadata.
2626

27-
| Column | Datatype | Description | Comments |
28-
|-----------|----------|-------------|----------|
29-
| catalog | string | Original catalog of the table. _hive_metastore_ by default |
30-
| database | string | Original schema of the table |
31-
| name |string|Name of the table|
32-
|object_type|string|MANAGED, EXTERNAL, or VIEW|
33-
|table_format|string|Table provider. Like delta or json or parquet.|
34-
|location|string|Location of the data for table|
35-
|view_text|nullable string|If the table is the view, then this column holds the definition of the view|
27+
| Column | Datatype | Description | Comments |
28+
|--------------|----------|-------------|----------|
29+
| catalog | string | Original catalog of the table. _hive_metastore_ by default |
30+
| database | string | Original schema of the table |
31+
| name |string|Name of the table|
32+
| object_type |string|MANAGED, EXTERNAL, or VIEW|
33+
| table_format |string|Table provider. Like delta or json or parquet.|
34+
| location |string|Location of the data for table|
35+
| view_text |nullable string|If the table is the view, then this column holds the definition of the view|
36+
| upgraded_to |string|Upgrade Target (3 level namespace)|
37+
3638

3739
<br/>
3840

@@ -78,11 +80,12 @@ List of DBFS mount points.
7880
#### _$inventory_.permissions
7981
Workspace object level permissions
8082

81-
| Column | Datatype | Description | Comments |
82-
|-----------|----------|-------------|----------|
83-
|object_id|string|Either:<br/>Group ID<br/>Workspace Object ID<br/>Redash Object ID<br/>Scope name
84-
|supports|string|One of:<br/>AUTHORIZATION<br/><br/>CLUSTERS<br/>CLUSTER_POLICIES<br/>DIRECTORIES<br/>EXPERIMENTS<br/>FILES<br/>INSTANCE_POOLS<br/>JOBS<br/>NOTEBOOKS<br/>PIPELINES<br/>REGISTERED_MODELS<br/>REPOS<br/>SERVING_ENDPOINTS<br/>SQL_WAREHOUSES
85-
|raw_object_permissions|JSON|JSON-serialized response of:<br/>Generic Permissions<br/>Secret ACL<br/>Group roles and entitlements<br/>Redash permissions|
83+
| Column | Datatype | Description | Comments |
84+
|------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
85+
| object_id | string | Either:<br/>Group ID<br/>Workspace Object ID<br/>Redash Object ID<br/>Scope name | |
86+
| supports | string | One of:<br/>AUTHORIZATION<br/><br/>CLUSTERS<br/>CLUSTER_POLICIES<br/>DIRECTORIES<br/>EXPERIMENTS<br/>FILES<br/>INSTANCE_POOLS<br/>JOBS<br/>NOTEBOOKS<br/>PIPELINES<br/>REGISTERED_MODELS<br/>REPOS<br/>SERVING_ENDPOINTS<br/>SQL_WAREHOUSES | |
87+
| raw_object_permissions | JSON | JSON-serialized response of:<br/>Generic Permissions<br/>Secret ACL<br/>Group roles and entitlements<br/>Redash permissions | |
88+
8689

8790
<br/>
8891

src/databricks/labs/ucx/hive_metastore/tables.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import logging
2+
import re
3+
import string
24
from collections.abc import Iterator
35
from dataclasses import dataclass
46
from functools import partial
@@ -22,6 +24,7 @@ class Table:
2224

2325
location: str = None
2426
view_text: str = None
27+
upgraded_to: str = None
2528

2629
@property
2730
def is_delta(self) -> bool:
@@ -93,6 +96,13 @@ def snapshot(self) -> list[Table]:
9396
"""
9497
return self._snapshot(partial(self._try_load), partial(self._crawl))
9598

99+
@staticmethod
100+
def _parse_table_props(tbl_props: string) -> {}:
101+
pattern = r"([^,\[\]]+)=([^,\[\]]+)"
102+
key_value_pairs = re.findall(pattern, tbl_props)
103+
# Convert key-value pairs to dictionary
104+
return dict(key_value_pairs)
105+
96106
def _try_load(self):
97107
"""Tries to load table information from the database or throws TABLE_OR_VIEW_NOT_FOUND error"""
98108
for row in self._fetch(f"SELECT * FROM {self._full_name}"):
@@ -140,6 +150,7 @@ def _describe(self, catalog: str, database: str, table: str) -> Table | None:
140150
table_format=describe.get("Provider", "").upper(),
141151
location=describe.get("Location", None),
142152
view_text=describe.get("View Text", None),
153+
upgraded_to=self._parse_table_props(describe.get("Table Properties", "")).get("upgraded_to", None),
143154
)
144155
except Exception as e:
145156
logger.error(f"Couldn't fetch information for table {full_name} : {e}")

src/databricks/labs/ucx/hive_metastore/tables.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import org.apache.spark.sql.DataFrame
77

88
// must follow the same structure as databricks.labs.ucx.hive_metastore.tables.Table
99
case class TableDetails(catalog: String, database: String, name: String, object_type: String,
10-
table_format: String, location: String, view_text: String)
10+
table_format: String, location: String, view_text: String, upgraded_to: String)
1111

1212
// recording error log in the database
1313
case class TableError(catalog: String, database: String, name: String, error: String)
@@ -36,8 +36,10 @@ def metadataForAllTables(databases: Seq[String], queue: ConcurrentLinkedQueue[Ta
3636
failures.add(TableError("hive_metastore", databaseName, tableName, s"result is null"))
3737
None
3838
} else {
39+
val upgraded_to=table.properties.get("upgraded_to")
3940
Some(TableDetails("hive_metastore", databaseName, tableName, table.tableType.name, table.provider.orNull,
40-
table.storage.locationUri.map(_.toString).orNull, table.viewText.orNull))
41+
table.storage.locationUri.map(_.toString).orNull, table.viewText.orNull,
42+
upgraded_to match {case Some(target) => target case None => null}))
4143
}
4244
} catch {
4345
case err: Throwable =>

tests/unit/hive_metastore/test_tables.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,18 @@ def test_tables_crawler_inventory_table():
8484
assert tc._table == "tables"
8585

8686

87+
def test_tables_crawler_parse_tp():
88+
tc = TablesCrawler(MockBackend(), "default")
89+
tp1 = tc._parse_table_props(
90+
"[delta.minReaderVersion=1,delta.minWriterVersion=2,upgraded_to=fake_cat.fake_ext.fake_delta]"
91+
)
92+
tp2 = tc._parse_table_props("[delta.minReaderVersion=1,delta.minWriterVersion=2]")
93+
assert len(tp1) == 3
94+
assert tp1.get("upgraded_to") == "fake_cat.fake_ext.fake_delta"
95+
assert len(tp2) == 2
96+
assert tp2.get("upgraded_to") is None
97+
98+
8799
def test_tables_returning_error_when_describing():
88100
errors = {"DESCRIBE TABLE EXTENDED hive_metastore.database.table1": "error"}
89101
rows = {

0 commit comments

Comments
 (0)