fix: unicode in model name databricks (#5465)

eakmanrq · web-flow · commit 22e37d25d888 · 2025-10-02T08:16:28.000-07:00
diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py
@@ -34,6 +34,8 @@ class DatabricksEngineAdapter(SparkEngineAdapter):
     SUPPORTS_CLONING = True
     SUPPORTS_MATERIALIZED_VIEWS = True
     SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True
+    # Spark has this set to false for compatibility when mixing with Trino but that isn't a concern with Databricks
+    QUOTE_IDENTIFIERS_IN_VIEWS = True
     SCHEMA_DIFFER_KWARGS = {
         "support_positional_add": True,
         "nested_support": NestedSupport.ALL,
diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py
@@ -3990,3 +3990,40 @@ def _set_config(gateway: str, config: Config) -> None:
         was_evaluated=True,
         day_delta=4,
     )
+
+
+def test_unicode_characters(ctx: TestContext, tmp_path: Path):
+    # Engines that don't quote identifiers in views are incompatible with unicode characters in model names
+    # at the time of writing this is Spark/Trino and they do this for compatibility reasons.
+    # I also think Spark may not support unicode in general but that would need to be verified.
+    if not ctx.engine_adapter.QUOTE_IDENTIFIERS_IN_VIEWS:
+        pytest.skip("Skipping as these engines have issues with unicode characters in model names")
+
+    model_name = "客户数据"
+    table = ctx.table(model_name).sql(dialect=ctx.dialect)
+    (tmp_path / "models").mkdir(exist_ok=True)
+
+    model_def = f"""
+    MODEL (
+        name {table},
+        kind FULL,
+        dialect '{ctx.dialect}'
+    );
+    SELECT 1 as id
+    """
+
+    (tmp_path / "models" / "客户数据.sql").write_text(model_def)
+
+    context = ctx.create_context(path=tmp_path)
+    context.plan(auto_apply=True, no_prompts=True)
+
+    results = ctx.get_metadata_results()
+    assert len(results.views) == 1
+    assert results.views[0].lower() == model_name
+
+    schema = d.to_schema(ctx.schema(), dialect=ctx.dialect)
+    schema_name = schema.args["db"].this
+    schema.args["db"].set("this", "sqlmesh__" + schema_name)
+    table_results = ctx.get_metadata_results(schema)
+    assert len(table_results.tables) == 1
+    assert table_results.tables[0].lower().startswith(schema_name.lower() + "________")
diff --git a/tests/core/engine_adapter/test_databricks.py b/tests/core/engine_adapter/test_databricks.py
@@ -195,7 +195,7 @@ def test_materialized_view_properties(mocker: MockFixture, make_mocked_engine_ad
     sql_calls = to_sql_calls(adapter)
     # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html#syntax
     assert sql_calls == [
-        "CREATE OR REPLACE MATERIALIZED VIEW test_table PARTITIONED BY (ds) AS SELECT 1",
+        "CREATE OR REPLACE MATERIALIZED VIEW `test_table` PARTITIONED BY (`ds`) AS SELECT 1",
     ]
 
 

Original file line number	Diff line number	Diff line change
`@@ -195,7 +195,7 @@ def test_materialized_view_properties(mocker: MockFixture, make_mocked_engine_ad`
`195`	`195`	`sql_calls = to_sql_calls(adapter)`
`196`	`196`	`# https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html#syntax`
`197`	`197`	`assert sql_calls == [`
`198`		`- "CREATE OR REPLACE MATERIALIZED VIEW test_table PARTITIONED BY (ds) AS SELECT 1",`
	`198`	+ "CREATE OR REPLACE MATERIALIZED VIEW `test_table` PARTITIONED BY (`ds`) AS SELECT 1",
`199`	`199`	`]`
`200`	`200`
`201`	`201`