update partition handling in integration tests

xinge-ji · xinge-ji · commit fee745c9400f · 2025-10-15T02:39:01.000+08:00
diff --git a/sqlmesh/core/engine_adapter/doris.py b/sqlmesh/core/engine_adapter/doris.py
@@ -41,7 +41,7 @@ class DorisEngineAdapter(
     LogicalMergeMixin, PandasNativeFetchDFSupportMixin, NonTransactionalTruncateMixin
 ):
     DIALECT = "doris"
-    DEFAULT_BATCH_SIZE = 200
+    DEFAULT_BATCH_SIZE = 5000
     SUPPORTS_TRANSACTIONS = False
     COMMENT_CREATION_TABLE = CommentCreationTable.IN_SCHEMA_DEF_NO_CTAS
     COMMENT_CREATION_VIEW = CommentCreationView.IN_SCHEMA_DEF_NO_COMMANDS
@@ -54,8 +54,6 @@ class DorisEngineAdapter(
     SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True
     SUPPORTS_CREATE_DROP_CATALOG = False
     INSERT_OVERWRITE_STRATEGY = InsertOverwriteStrategy.DELETE_INSERT
-    # default setting `enable_unicode_name_support=false` so it is incompatible with unicode characters in model names
-    QUOTE_IDENTIFIERS_IN_VIEWS = False
 
     def create_schema(
         self,
@@ -937,33 +935,6 @@ def _parse_trigger_string(
             if add_partition:
                 partitions = table_properties_copy.pop("partitions", None)
 
-                # If partitioned_by is provided but partitions is not, add dynamic partition properties
-                # Skip dynamic partitions for materialized views as they use different partitioning
-                if partitioned_by and not partitions and not is_materialized_view:
-                    # Define the required dynamic partition properties
-                    dynamic_partition_props = {
-                        "dynamic_partition.enable": "true",
-                        "dynamic_partition.time_unit": "DAY",
-                        "dynamic_partition.start": "-490",
-                        "dynamic_partition.end": "10",
-                        "dynamic_partition.prefix": "p",
-                        "dynamic_partition.buckets": "32",
-                        "dynamic_partition.create_history_partition": "true",
-                    }
-
-                    # Use partition_interval_unit if provided to set the time_unit
-                    if partition_interval_unit:
-                        if hasattr(partition_interval_unit, "value"):
-                            time_unit = partition_interval_unit.value.upper()
-                        else:
-                            time_unit = str(partition_interval_unit).upper()
-                        dynamic_partition_props["dynamic_partition.time_unit"] = time_unit
-
-                    # Add missing dynamic partition properties to table_properties_copy
-                    for key, value in dynamic_partition_props.items():
-                        if key not in table_properties_copy:
-                            table_properties_copy[key] = value
-
                 # Build partition expression - different for materialized views vs tables
                 if is_materialized_view:
                     # For materialized views, use PartitionedByProperty
diff --git a/tests/core/engine_adapter/integration/__init__.py b/tests/core/engine_adapter/integration/__init__.py
@@ -266,6 +266,7 @@ def timestamp_columns(self) -> t.List[str]:
             for k, v in self.columns_to_types.items()
             if v.sql().lower().startswith("timestamp")
             or (v.sql().lower() == "datetime" and self.dialect == "bigquery")
+            or (v.sql().lower() == "datetime" and self.dialect == "doris")
         ]
 
     @property
diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py
@@ -21,7 +21,6 @@
 import pytest
 import pytz
 import time_machine
-from tenacity import Retrying, stop_after_delay, wait_fixed, retry_if_exception_type
 from sqlglot import exp, parse_one
 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
 from sqlglot.optimizer.qualify_columns import quote_identifiers
@@ -459,18 +458,18 @@ def test_materialized_view(ctx_query_and_df: TestContext):
     # Make sure that dropping a materialized view also works
     if ctx.engine_adapter.dialect == "doris":
         # Wait for the materialized view to be created by retrying drop until it succeeds
-        for attempt in Retrying(
-            stop=stop_after_delay(5),
-            wait=wait_fixed(1),
-            retry=retry_if_exception_type(Exception),
-            reraise=True,
-        ):
-            with attempt:
+        def drop_view_success():
+            try:
                 ctx.engine_adapter.drop_view(
                     view,
                     materialized=True,
                     view_properties={"materialized_type": "SYNC", "source_table": source_table},
                 )
+                return True
+            except Exception:
+                return False
+
+        wait_until(drop_view_success, attempts=5, wait=1)
     else:
         ctx.engine_adapter.drop_view(view, materialized=True)
         results = ctx.get_metadata_results()
@@ -813,37 +812,37 @@ def test_insert_overwrite_by_time_partition(ctx_query_and_df: TestContext):
     if ctx.dialect == "tsql":
         ds_type = "varchar(max)"
     if ctx.dialect == "doris":
-        ds_type = "date"
+        ds_type = "datetime"
 
-    # Get current year and create dates for testing. Doris cannot have more than 500 history partitions.
-    current_year = datetime.now().year
-    current_date = datetime(current_year, 1, 1)
-    if ctx.dialect == "doris":
-        # For Doris with DATE type, use pandas date objects
-        date_1 = current_date.date()
-        date_2 = (current_date + timedelta(days=1)).date()
-        date_3 = (current_date + timedelta(days=2)).date()
-        date_4 = (current_date + timedelta(days=3)).date()
-        date_5 = (current_date + timedelta(days=4)).date()
-    else:
-        date_1 = current_date.strftime("%Y-%m-%d")
-        date_2 = (current_date + timedelta(days=1)).strftime("%Y-%m-%d")
-        date_3 = (current_date + timedelta(days=2)).strftime("%Y-%m-%d")
-        date_4 = (current_date + timedelta(days=3)).strftime("%Y-%m-%d")
-        date_5 = (current_date + timedelta(days=4)).strftime("%Y-%m-%d")
+    # Get current create date for testing.
+    current_date = datetime.now()
+    date_1 = current_date.strftime("%Y-%m-%d")
+    date_2 = (current_date + timedelta(days=1)).strftime("%Y-%m-%d")
+    date_3 = (current_date + timedelta(days=2)).strftime("%Y-%m-%d")
+    date_4 = (current_date + timedelta(days=3)).strftime("%Y-%m-%d")
+    date_5 = (current_date + timedelta(days=4)).strftime("%Y-%m-%d")
+    date_6 = (current_date + timedelta(days=5)).strftime("%Y-%m-%d")
 
     ctx.columns_to_types = {"id": "int", "ds": ds_type}
     table = ctx.table("test_table")
     if ctx.dialect == "bigquery":
         partitioned_by = ["DATE(ds)"]
     else:
         partitioned_by = ctx.partitioned_by  # type: ignore
+    if ctx.dialect == "doris":
+        table_properties = {
+            "partitions": f"FROM ('{date_1}') TO ('{date_6}') INTERVAL 1 DAY",
+        }
+    else:
+        table_properties = {}
+
     ctx.engine_adapter.create_table(
         table,
         ctx.columns_to_types,
         partitioned_by=partitioned_by,
         partition_interval_unit="DAY",
         table_format=ctx.default_table_format,
+        table_properties=table_properties,
     )
     input_data = pd.DataFrame(
         [
@@ -922,24 +921,16 @@ def test_insert_overwrite_by_time_partition_source_columns(ctx_query_and_df: Tes
     if ctx.dialect == "tsql":
         ds_type = "varchar(max)"
     if ctx.dialect == "doris":
-        ds_type = "date"
+        ds_type = "datetime"
 
-    # Get current year and create dates for testing. Doris cannot have more than 500 history partitions.
-    current_year = datetime.now().year
-    current_date = datetime(current_year, 1, 1)
-    if ctx.dialect == "doris":
-        # For Doris with DATE type, use pandas date objects
-        date_1 = current_date.date()
-        date_2 = (current_date + timedelta(days=1)).date()
-        date_3 = (current_date + timedelta(days=2)).date()
-        date_4 = (current_date + timedelta(days=3)).date()
-        date_5 = (current_date + timedelta(days=4)).date()
-    else:
-        date_1 = current_date.strftime("%Y-%m-%d")
-        date_2 = (current_date + timedelta(days=1)).strftime("%Y-%m-%d")
-        date_3 = (current_date + timedelta(days=2)).strftime("%Y-%m-%d")
-        date_4 = (current_date + timedelta(days=3)).strftime("%Y-%m-%d")
-        date_5 = (current_date + timedelta(days=4)).strftime("%Y-%m-%d")
+    # Get current create date for testing.
+    current_date = datetime.now()
+    date_1 = current_date.strftime("%Y-%m-%d")
+    date_2 = (current_date + timedelta(days=1)).strftime("%Y-%m-%d")
+    date_3 = (current_date + timedelta(days=2)).strftime("%Y-%m-%d")
+    date_4 = (current_date + timedelta(days=3)).strftime("%Y-%m-%d")
+    date_5 = (current_date + timedelta(days=4)).strftime("%Y-%m-%d")
+    date_6 = (current_date + timedelta(days=5)).strftime("%Y-%m-%d")
 
     ctx.columns_to_types = {"id": "int", "ds": ds_type}
     columns_to_types = {
@@ -952,12 +943,20 @@ def test_insert_overwrite_by_time_partition_source_columns(ctx_query_and_df: Tes
         partitioned_by = ["DATE(ds)"]
     else:
         partitioned_by = ctx.partitioned_by  # type: ignore
+    if ctx.dialect == "doris":
+        table_properties = {
+            "partitions": f"FROM ('{date_1}') TO ('{date_6}') INTERVAL 1 DAY",
+        }
+    else:
+        table_properties = {}
+
     ctx.engine_adapter.create_table(
         table,
         columns_to_types,
         partitioned_by=partitioned_by,
         partition_interval_unit="DAY",
         table_format=ctx.default_table_format,
+        table_properties=table_properties,
     )
     input_data = pd.DataFrame(
         [
@@ -2181,6 +2180,18 @@ def _mutate_config(gateway: str, config: Config) -> None:
             )
             context._models.update({model_key: model})
 
+    # Doris requires partitions to be set in physical_properties for INCREMENTAL_BY_TIME_RANGE models
+    if ctx.dialect == "doris":
+        for model_key, model in context._models.items():
+            if model.kind.name == "INCREMENTAL_BY_TIME_RANGE":
+                end_plus_1day = to_date(end + timedelta(days=1))
+                partitions = f"FROM ('{start.strftime('%Y-%m-%d')}') TO ('{end_plus_1day.strftime('%Y-%m-%d')}') INTERVAL 1 DAY"
+
+                model_physical_props = model.copy(
+                    update={"physical_properties": {"partitions": partitions}}
+                )
+                context._models.update({model_key: model_physical_props})
+
     plan: Plan = context.plan(
         environment="test_prod",
         start=start,
@@ -4086,6 +4097,11 @@ def test_unicode_characters(ctx: TestContext, tmp_path: Path):
     # I also think Spark may not support unicode in general but that would need to be verified.
     if not ctx.engine_adapter.QUOTE_IDENTIFIERS_IN_VIEWS:
         pytest.skip("Skipping as these engines have issues with unicode characters in model names")
+    # Doris default setting `enable_unicode_name_support=false` so it is incompatible with unicode characters in model names
+    if ctx.dialect == "doris":
+        pytest.skip(
+            "Skipping as Doris default setting has issues with unicode characters in model names"
+        )
 
     model_name = "客户数据"
     table = ctx.table(model_name).sql(dialect=ctx.dialect)

Original file line number	Diff line number	Diff line change
`@@ -266,6 +266,7 @@ def timestamp_columns(self) -> t.List[str]:`
`266`	`266`	`for k, v in self.columns_to_types.items()`
`267`	`267`	`if v.sql().lower().startswith("timestamp")`
`268`	`268`	`or (v.sql().lower() == "datetime" and self.dialect == "bigquery")`
	`269`	`+ or (v.sql().lower() == "datetime" and self.dialect == "doris")`
`269`	`270`	`]`
`270`	`271`
`271`	`272`	`@property`