@@ -3990,3 +3990,40 @@ def _set_config(gateway: str, config: Config) -> None:
39903990 was_evaluated = True ,
39913991 day_delta = 4 ,
39923992 )
3993+
3994+
3995+ def test_unicode_characters (ctx : TestContext , tmp_path : Path ):
3996+ # Engines that don't quote identifiers in views are incompatible with unicode characters in model names
3997+ # at the time of writing this is Spark/Trino and they do this for compatibility reasons.
3998+ # I also think Spark may not support unicode in general but that would need to be verified.
3999+ if not ctx .engine_adapter .QUOTE_IDENTIFIERS_IN_VIEWS :
4000+ pytest .skip ("Skipping as these engines have issues with unicode characters in model names" )
4001+
4002+ model_name = "客户数据"
4003+ table = ctx .table (model_name ).sql (dialect = ctx .dialect )
4004+ (tmp_path / "models" ).mkdir (exist_ok = True )
4005+
4006+ model_def = f"""
4007+ MODEL (
4008+ name { table } ,
4009+ kind FULL,
4010+ dialect '{ ctx .dialect } '
4011+ );
4012+ SELECT 1 as id
4013+ """
4014+
4015+ (tmp_path / "models" / "客户数据.sql" ).write_text (model_def )
4016+
4017+ context = ctx .create_context (path = tmp_path )
4018+ context .plan (auto_apply = True , no_prompts = True )
4019+
4020+ results = ctx .get_metadata_results ()
4021+ assert len (results .views ) == 1
4022+ assert results .views [0 ].lower () == model_name
4023+
4024+ schema = d .to_schema (ctx .schema (), dialect = ctx .dialect )
4025+ schema_name = schema .args ["db" ].this
4026+ schema .args ["db" ].set ("this" , "sqlmesh__" + schema_name )
4027+ table_results = ctx .get_metadata_results (schema )
4028+ assert len (table_results .tables ) == 1
4029+ assert table_results .tables [0 ].lower ().startswith (schema_name .lower () + "________" )
0 commit comments