Skip to content

Commit bf75b50

Browse files
Fixed escaping SQL object names (#836)
1 parent 7cc5c8f commit bf75b50

File tree

8 files changed

+98
-33
lines changed

8 files changed

+98
-33
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import string
2+
3+
_allowed_object_chars = set(string.ascii_letters + string.digits + '_')
4+
5+
6+
def escape_sql_identifier(path: str, optional: bool | None = True) -> str:
7+
"""
8+
Escapes the path components to make them SQL safe.
9+
10+
Args:
11+
path (str): The dot-separated path of a catalog object.
12+
optional (bool): If `True` then do not escape if no special characters are present.
13+
14+
Returns:
15+
str: The path with all parts escaped in backticks.
16+
"""
17+
parts = path.split(".", maxsplit=2)
18+
escaped = []
19+
for part in parts:
20+
if not part.startswith("`") and not part.endswith("`"):
21+
part = part.strip("`")
22+
if not optional or not set(part) <= _allowed_object_chars:
23+
part = f"`{part}`"
24+
escaped.append(part)
25+
return ".".join(escaped)

src/databricks/labs/ucx/hive_metastore/grants.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from databricks.sdk.service.catalog import SchemaInfo, TableInfo
99

1010
from databricks.labs.ucx.framework.crawlers import CrawlerBase
11+
from databricks.labs.ucx.framework.utils import escape_sql_identifier
1112
from databricks.labs.ucx.hive_metastore.tables import TablesCrawler
1213
from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler
1314

@@ -95,13 +96,13 @@ def hive_grant_sql(self) -> list[str]:
9596

9697
def hive_revoke_sql(self) -> str:
9798
object_type, object_key = self.this_type_and_key()
98-
return f"REVOKE {self.action_type} ON {object_type} {object_key} FROM `{self.principal}`"
99+
return f"REVOKE {self.action_type} ON {object_type} {escape_sql_identifier(object_key)} FROM `{self.principal}`"
99100

100101
def _set_owner_sql(self, object_type, object_key):
101-
return f"ALTER {object_type} {object_key} OWNER TO `{self.principal}`"
102+
return f"ALTER {object_type} {escape_sql_identifier(object_key)} OWNER TO `{self.principal}`"
102103

103104
def _apply_grant_sql(self, action_type, object_type, object_key):
104-
return f"GRANT {action_type} ON {object_type} {object_key} TO `{self.principal}`"
105+
return f"GRANT {action_type} ON {object_type} {escape_sql_identifier(object_key)} TO `{self.principal}`"
105106

106107
def _uc_action(self, action_type):
107108
def inner(object_type, object_key):
@@ -155,7 +156,7 @@ def snapshot(self) -> Iterable[Grant]:
155156
return self._snapshot(partial(self._try_load), partial(self._crawl))
156157

157158
def _try_load(self):
158-
for row in self._fetch(f"SELECT * FROM {self._full_name}"):
159+
for row in self._fetch(f"SELECT * FROM {escape_sql_identifier(self._full_name)}"):
159160
yield Grant(*row)
160161

161162
def _crawl(self) -> Iterable[Grant]:
@@ -189,7 +190,7 @@ def _crawl(self) -> Iterable[Grant]:
189190
tasks.append(partial(self._grants, catalog=catalog, any_file=True))
190191
tasks.append(partial(self._grants, catalog=catalog, anonymous_function=True))
191192
# scan all databases, even empty ones
192-
for row in self._fetch(f"SHOW DATABASES FROM {catalog}"):
193+
for row in self._fetch(f"SHOW DATABASES FROM {escape_sql_identifier(catalog)}"):
193194
tasks.append(partial(self._grants, catalog=catalog, database=row.databaseName))
194195
for table in self._tc.snapshot():
195196
fn = partial(self._grants, catalog=catalog, database=table.database)
@@ -273,7 +274,7 @@ def _grants(
273274
"ANY_FILE": "ANY FILE",
274275
"ANONYMOUS_FUNCTION": "ANONYMOUS FUNCTION",
275276
}
276-
for row in self._fetch(f"SHOW GRANTS ON {on_type} {key}"):
277+
for row in self._fetch(f"SHOW GRANTS ON {on_type} {escape_sql_identifier(key)}"):
277278
(principal, action_type, object_type, _) = row
278279
object_type = object_type_normalization.get(object_type, object_type)
279280
if on_type != object_type:

src/databricks/labs/ucx/hive_metastore/locations.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from databricks.sdk.service.workspace import ImportFormat
1111

1212
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
13+
from databricks.labs.ucx.framework.utils import escape_sql_identifier
1314
from databricks.labs.ucx.mixins.sql import Row
1415

1516
logger = logging.getLogger(__name__)
@@ -110,7 +111,7 @@ def _add_jdbc_location(self, external_locations, location, table):
110111
def _external_location_list(self) -> Iterable[ExternalLocation]:
111112
tables = list(
112113
self._backend.fetch(
113-
f"SELECT location, storage_properties FROM {self._schema}.tables WHERE location IS NOT NULL"
114+
f"SELECT location, storage_properties FROM {escape_sql_identifier(self._schema)}.tables WHERE location IS NOT NULL"
114115
)
115116
)
116117
mounts = Mounts(self._backend, self._ws, self._schema).snapshot()
@@ -120,7 +121,9 @@ def snapshot(self) -> Iterable[ExternalLocation]:
120121
return self._snapshot(self._try_fetch, self._external_location_list)
121122

122123
def _try_fetch(self) -> Iterable[ExternalLocation]:
123-
for row in self._fetch(f"SELECT * FROM {self._schema}.{self._table}"):
124+
for row in self._fetch(
125+
f"SELECT * FROM {escape_sql_identifier(self._schema)}.{escape_sql_identifier(self._table)}"
126+
):
124127
yield ExternalLocation(*row)
125128

126129
def _get_ext_location_definitions(self, missing_locations: list[ExternalLocation]) -> list:
@@ -227,5 +230,7 @@ def snapshot(self) -> Iterable[Mount]:
227230
return self._snapshot(self._try_fetch, self._list_mounts)
228231

229232
def _try_fetch(self) -> Iterable[Mount]:
230-
for row in self._fetch(f"SELECT * FROM {self._schema}.{self._table}"):
233+
for row in self._fetch(
234+
f"SELECT * FROM {escape_sql_identifier(self._schema)}.{escape_sql_identifier(self._table)}"
235+
):
231236
yield Mount(*row)

src/databricks/labs/ucx/hive_metastore/mapping.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from databricks.labs.ucx.account import WorkspaceInfo
1515
from databricks.labs.ucx.framework.crawlers import SqlBackend
16+
from databricks.labs.ucx.framework.utils import escape_sql_identifier
1617
from databricks.labs.ucx.hive_metastore import TablesCrawler
1718
from databricks.labs.ucx.hive_metastore.tables import Table
1819

@@ -99,7 +100,7 @@ def skip_table(self, schema: str, table: str):
99100
# Marks a table to be skipped in the migration process by applying a table property
100101
try:
101102
self._backend.execute(
102-
f"ALTER TABLE `{schema}`.`{table}` SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)"
103+
f"ALTER TABLE {escape_sql_identifier(schema)}.{escape_sql_identifier(table)} SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)"
103104
)
104105
except NotFound as nf:
105106
if "[TABLE_OR_VIEW_NOT_FOUND]" in str(nf) or "[DELTA_TABLE_NOT_FOUND]" in str(nf):
@@ -112,7 +113,9 @@ def skip_table(self, schema: str, table: str):
112113
def skip_schema(self, schema: str):
113114
# Marks a schema to be skipped in the migration process by applying a table property
114115
try:
115-
self._backend.execute(f"ALTER SCHEMA `{schema}` SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)")
116+
self._backend.execute(
117+
f"ALTER SCHEMA {escape_sql_identifier(schema)} SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)"
118+
)
116119
except NotFound as nf:
117120
if "[SCHEMA_NOT_FOUND]" in str(nf):
118121
logger.error(f"Failed to apply skip marker for Schema {schema}. Schema not found.")
@@ -156,7 +159,7 @@ def _get_databases_in_scope(self, databases: set[str]):
156159

157160
def _get_database_in_scope_task(self, database: str) -> str | None:
158161
describe = {}
159-
for value in self._backend.fetch(f"DESCRIBE SCHEMA EXTENDED {database}"):
162+
for value in self._backend.fetch(f"DESCRIBE SCHEMA EXTENDED {escape_sql_identifier(database)}"):
160163
describe[value["database_description_item"]] = value["database_description_value"]
161164
if self.UCX_SKIP_PROPERTY in TablesCrawler.parse_database_props(describe.get("Properties", "").lower()):
162165
logger.info(f"Database {database} is marked to be skipped")
@@ -170,7 +173,9 @@ def _get_table_in_scope_task(self, table_to_migrate: TableToMigrate) -> TableToM
170173
if self._exists_in_uc(table, rule.as_uc_table_key):
171174
logger.info(f"The intended target for {table.key}, {rule.as_uc_table_key}, already exists.")
172175
return None
173-
result = self._backend.fetch(f"SHOW TBLPROPERTIES `{table.database}`.`{table.name}`")
176+
result = self._backend.fetch(
177+
f"SHOW TBLPROPERTIES {escape_sql_identifier(table.database)}.{escape_sql_identifier(table.name)}"
178+
)
174179
for value in result:
175180
if value["key"] == self.UCX_SKIP_PROPERTY:
176181
logger.info(f"{table.key} is marked to be skipped")

src/databricks/labs/ucx/hive_metastore/tables.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from databricks.labs.blueprint.parallel import Threads
99

1010
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
11+
from databricks.labs.ucx.framework.utils import escape_sql_identifier
1112
from databricks.labs.ucx.mixins.sql import Row
1213

1314
logger = logging.getLogger(__name__)
@@ -96,16 +97,16 @@ def is_databricks_dataset(self) -> bool:
9697
return False
9798

9899
def sql_migrate_external(self, target_table_key):
99-
return f"SYNC TABLE {target_table_key} FROM {self.key};"
100+
return f"SYNC TABLE {escape_sql_identifier(target_table_key)} FROM {escape_sql_identifier(self.key)};"
100101

101102
def sql_migrate_dbfs(self, target_table_key):
102103
if not self.is_delta:
103104
msg = f"{self.key} is not DELTA: {self.table_format}"
104105
raise ValueError(msg)
105-
return f"CREATE TABLE IF NOT EXISTS {target_table_key} DEEP CLONE {self.key};"
106+
return f"CREATE TABLE IF NOT EXISTS {escape_sql_identifier(target_table_key)} DEEP CLONE {escape_sql_identifier(self.key)};"
106107

107108
def sql_migrate_view(self, target_table_key):
108-
return f"CREATE VIEW IF NOT EXISTS {target_table_key} AS {self.view_text};"
109+
return f"CREATE VIEW IF NOT EXISTS {escape_sql_identifier(target_table_key)} AS {self.view_text};"
109110

110111

111112
@dataclass
@@ -163,7 +164,7 @@ def parse_database_props(tbl_props: str) -> dict:
163164

164165
def _try_load(self) -> Iterable[Table]:
165166
"""Tries to load table information from the database or throws TABLE_OR_VIEW_NOT_FOUND error"""
166-
for row in self._fetch(f"SELECT * FROM {self._full_name}"):
167+
for row in self._fetch(f"SELECT * FROM {escape_sql_identifier(self._full_name)}"):
167168
yield Table(*row)
168169

169170
def _crawl(self) -> Iterable[Table]:
@@ -184,7 +185,9 @@ def _crawl(self) -> Iterable[Table]:
184185
catalog = "hive_metastore"
185186
for (database,) in self._all_databases():
186187
logger.debug(f"[{catalog}.{database}] listing tables")
187-
for _, table, _is_tmp in self._fetch(f"SHOW TABLES FROM {catalog}.{database}"):
188+
for _, table, _is_tmp in self._fetch(
189+
f"SHOW TABLES FROM {escape_sql_identifier(catalog)}.{escape_sql_identifier(database)}"
190+
):
188191
tasks.append(partial(self._describe, catalog, database, table))
189192
catalog_tables, errors = Threads.gather(f"listing tables in {catalog}", tasks)
190193
if len(errors) > 0:
@@ -209,7 +212,7 @@ def _describe(self, catalog: str, database: str, table: str) -> Table | None:
209212
try:
210213
logger.debug(f"[{full_name}] fetching table metadata")
211214
describe = {}
212-
for key, value, _ in self._fetch(f"DESCRIBE TABLE EXTENDED {full_name}"):
215+
for key, value, _ in self._fetch(f"DESCRIBE TABLE EXTENDED {escape_sql_identifier(full_name)}"):
213216
describe[key] = value
214217
return Table(
215218
catalog=catalog.lower(),

src/databricks/labs/ucx/hive_metastore/udfs.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from databricks.labs.blueprint.parallel import Threads
77

88
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
9+
from databricks.labs.ucx.framework.utils import escape_sql_identifier
910
from databricks.labs.ucx.mixins.sql import Row
1011

1112
logger = logging.getLogger(__name__)
@@ -54,7 +55,7 @@ def snapshot(self) -> list[Udf]:
5455

5556
def _try_load(self) -> Iterable[Udf]:
5657
"""Tries to load udf information from the database or throws TABLE_OR_VIEW_NOT_FOUND error"""
57-
for row in self._fetch(f"SELECT * FROM {self._full_name}"):
58+
for row in self._fetch(f"SELECT * FROM {escape_sql_identifier(self._full_name)}"):
5859
yield Udf(*row)
5960

6061
def _crawl(self) -> Iterable[Udf]:
@@ -63,10 +64,12 @@ def _crawl(self) -> Iterable[Udf]:
6364
catalog = "hive_metastore"
6465
# need to set the current catalog otherwise "SHOW USER FUNCTIONS FROM" is raising error:
6566
# "target schema <database> is not in the current catalog"
66-
self._exec(f"USE CATALOG {catalog};")
67+
self._exec(f"USE CATALOG {escape_sql_identifier(catalog)};")
6768
for (database,) in self._all_databases():
6869
logger.debug(f"[{catalog}.{database}] listing udfs")
69-
for (udf,) in self._fetch(f"SHOW USER FUNCTIONS FROM {catalog}.{database};"):
70+
for (udf,) in self._fetch(
71+
f"SHOW USER FUNCTIONS FROM {escape_sql_identifier(catalog)}.{escape_sql_identifier(database)};"
72+
):
7073
if udf.startswith(f"{catalog}.{database}"):
7174
udf_name = udf[udf.rfind(".") + 1 :] # remove catalog and database info from the name
7275
tasks.append(partial(self._describe, catalog, database, udf_name))
@@ -83,7 +86,7 @@ def _describe(self, catalog: str, database: str, udf: str) -> Udf | None:
8386
try:
8487
logger.debug(f"[{full_name}] fetching udf metadata")
8588
describe = {}
86-
for key_value in self._fetch(f"DESCRIBE FUNCTION EXTENDED {full_name}"):
89+
for key_value in self._fetch(f"DESCRIBE FUNCTION EXTENDED {escape_sql_identifier(full_name)}"):
8790
if ":" in key_value: # skip free text configs that don't have a key
8891
key, value = key_value.split(":")
8992
describe[key] = value.strip()

tests/unit/framework/test_utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from databricks.labs.ucx.framework.utils import escape_sql_identifier
2+
3+
4+
def test_escaped_path():
5+
# test with optional escaping
6+
assert "a" == escape_sql_identifier("a")
7+
assert "a.b" == escape_sql_identifier("a.b")
8+
assert "a.b.c" == escape_sql_identifier("a.b.c")
9+
assert "a" == escape_sql_identifier("a")
10+
assert "a.b" == escape_sql_identifier("a.b")
11+
assert "`a`.`b`.`c`" == escape_sql_identifier("`a`.`b`.`c`")
12+
assert "`a.b`.`c`" == escape_sql_identifier("`a.b`.`c`")
13+
assert "`a-b`.c.d" == escape_sql_identifier("a-b.c.d")
14+
assert "a.`b-c`.d" == escape_sql_identifier("a.b-c.d")
15+
assert "a.b.`c-d`" == escape_sql_identifier("a.b.c-d")
16+
assert "`✨`.`🍰`.`✨`" == escape_sql_identifier("✨.🍰.✨")
17+
assert "a.b.`c.d`" == escape_sql_identifier("a.b.c.d")
18+
# test with escaping enforced
19+
assert "`a`" == escape_sql_identifier("a", False)
20+
assert "`a`.`b`" == escape_sql_identifier("a.b", False)
21+
assert "`a`.`b`.`c`" == escape_sql_identifier("a.b.c", False)
22+
assert "`a-b`.`c`.`d`" == escape_sql_identifier("a-b.c.d", False)
23+
assert "`a`.`b-c`.`d`" == escape_sql_identifier("a.b-c.d", False)
24+
assert "`a`.`b`.`c-d`" == escape_sql_identifier("a.b.c-d", False)
25+
assert "`a`.`b`.`c.d`" == escape_sql_identifier("a.b.c.d", False)

tests/unit/hive_metastore/test_mapping.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,10 @@ def test_skip_happy_path(mocker, caplog):
131131
sbe = mocker.patch("databricks.labs.ucx.framework.crawlers.StatementExecutionBackend.__init__")
132132
mapping = TableMapping(ws, sbe)
133133
mapping.skip_table(schema="schema", table="table")
134-
sbe.execute.assert_called_with(
135-
f"ALTER TABLE `schema`.`table` SET TBLPROPERTIES('{mapping.UCX_SKIP_PROPERTY}' = true)"
136-
)
134+
sbe.execute.assert_called_with(f"ALTER TABLE schema.table SET TBLPROPERTIES('{mapping.UCX_SKIP_PROPERTY}' = true)")
137135
assert len(caplog.records) == 0
138136
mapping.skip_schema(schema="schema")
139-
sbe.execute.assert_called_with(f"ALTER SCHEMA `schema` SET DBPROPERTIES('{mapping.UCX_SKIP_PROPERTY}' = true)")
137+
sbe.execute.assert_called_with(f"ALTER SCHEMA schema SET DBPROPERTIES('{mapping.UCX_SKIP_PROPERTY}' = true)")
140138
assert len(caplog.records) == 0
141139

142140

@@ -174,13 +172,13 @@ def test_skip_tables_marked_for_skipping_or_upgraded():
174172
["test_schema2"],
175173
["test_schema3"],
176174
],
177-
"SHOW TBLPROPERTIES `test_schema1`.`test_table1`": [
175+
"SHOW TBLPROPERTIES test_schema1.test_table1": [
178176
{"key": "upgraded_to", "value": "fake_dest"},
179177
],
180-
"SHOW TBLPROPERTIES `test_schema1`.`test_view1`": [
178+
"SHOW TBLPROPERTIES test_schema1.test_view1": [
181179
{"key": "databricks.labs.ucx.skip", "value": "true"},
182180
],
183-
"SHOW TBLPROPERTIES `test_schema1`.`test_table2`": [
181+
"SHOW TBLPROPERTIES test_schema1.test_table2": [
184182
{"key": "upgraded_to", "value": "fake_dest"},
185183
],
186184
"DESCRIBE SCHEMA EXTENDED test_schema1": [],
@@ -280,7 +278,7 @@ def test_skip_tables_marked_for_skipping_or_upgraded():
280278
def test_table_with_no_target_reverted():
281279
errors = {}
282280
rows = {
283-
"SHOW TBLPROPERTIES `schema1`.`table1`": [
281+
"SHOW TBLPROPERTIES schema1.table1": [
284282
{"key": "upgraded_to", "value": "non.existing.table"},
285283
],
286284
}
@@ -429,8 +427,8 @@ def test_skipping_rules_database_skipped():
429427
]
430428
table_mapping.get_tables_to_migrate(tables_crawler)
431429

432-
assert "SHOW TBLPROPERTIES `schema1`.`table1`" in backend.queries
433-
assert "SHOW TBLPROPERTIES `schema2`.`table2`" not in backend.queries
430+
assert "SHOW TBLPROPERTIES schema1.table1" in backend.queries
431+
assert "SHOW TBLPROPERTIES schema2.table2" not in backend.queries
434432

435433

436434
def test_skip_missing_table_in_snapshot():

0 commit comments

Comments
 (0)