Skip to content

Commit 22e37d2

Browse files
authored
fix: unicode in model name databricks (#5465)
1 parent c67a2fd commit 22e37d2

File tree

3 files changed

+40
-1
lines changed

3 files changed

+40
-1
lines changed

sqlmesh/core/engine_adapter/databricks.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ class DatabricksEngineAdapter(SparkEngineAdapter):
3434
SUPPORTS_CLONING = True
3535
SUPPORTS_MATERIALIZED_VIEWS = True
3636
SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True
37+
# Spark has this set to false for compatibility when mixing with Trino but that isn't a concern with Databricks
38+
QUOTE_IDENTIFIERS_IN_VIEWS = True
3739
SCHEMA_DIFFER_KWARGS = {
3840
"support_positional_add": True,
3941
"nested_support": NestedSupport.ALL,

tests/core/engine_adapter/integration/test_integration.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3990,3 +3990,40 @@ def _set_config(gateway: str, config: Config) -> None:
39903990
was_evaluated=True,
39913991
day_delta=4,
39923992
)
3993+
3994+
3995+
def test_unicode_characters(ctx: TestContext, tmp_path: Path):
3996+
# Engines that don't quote identifiers in views are incompatible with unicode characters in model names
3997+
# at the time of writing this is Spark/Trino and they do this for compatibility reasons.
3998+
# I also think Spark may not support unicode in general but that would need to be verified.
3999+
if not ctx.engine_adapter.QUOTE_IDENTIFIERS_IN_VIEWS:
4000+
pytest.skip("Skipping as these engines have issues with unicode characters in model names")
4001+
4002+
model_name = "客户数据"
4003+
table = ctx.table(model_name).sql(dialect=ctx.dialect)
4004+
(tmp_path / "models").mkdir(exist_ok=True)
4005+
4006+
model_def = f"""
4007+
MODEL (
4008+
name {table},
4009+
kind FULL,
4010+
dialect '{ctx.dialect}'
4011+
);
4012+
SELECT 1 as id
4013+
"""
4014+
4015+
(tmp_path / "models" / "客户数据.sql").write_text(model_def)
4016+
4017+
context = ctx.create_context(path=tmp_path)
4018+
context.plan(auto_apply=True, no_prompts=True)
4019+
4020+
results = ctx.get_metadata_results()
4021+
assert len(results.views) == 1
4022+
assert results.views[0].lower() == model_name
4023+
4024+
schema = d.to_schema(ctx.schema(), dialect=ctx.dialect)
4025+
schema_name = schema.args["db"].this
4026+
schema.args["db"].set("this", "sqlmesh__" + schema_name)
4027+
table_results = ctx.get_metadata_results(schema)
4028+
assert len(table_results.tables) == 1
4029+
assert table_results.tables[0].lower().startswith(schema_name.lower() + "________")

tests/core/engine_adapter/test_databricks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def test_materialized_view_properties(mocker: MockFixture, make_mocked_engine_ad
195195
sql_calls = to_sql_calls(adapter)
196196
# https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html#syntax
197197
assert sql_calls == [
198-
"CREATE OR REPLACE MATERIALIZED VIEW test_table PARTITIONED BY (ds) AS SELECT 1",
198+
"CREATE OR REPLACE MATERIALIZED VIEW `test_table` PARTITIONED BY (`ds`) AS SELECT 1",
199199
]
200200

201201

0 commit comments

Comments
 (0)