Feat: Add the ability to opt-out of the time_column being included in partitioned_by (#8)

erindru · web-flow · commit a3222d8febf1 · 2025-03-13T10:28:35.000+13:00
diff --git a/.github/actions/setup-base/action.yaml b/.github/actions/setup-base/action.yaml
@@ -16,4 +16,4 @@ runs:
         cache: 'pip'
     - name: Install dependencies
       shell: bash
-      run: make install
+      run: make install-dev
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-install:
+install-dev:
 	pip install -e ".[dev]"
 
 test:
diff --git a/README.md b/README.md
@@ -65,12 +65,31 @@ The properties are as follows:
 
 #### time_column
 
-This is the column in the dataset that contains the timestamp. It follows the [same syntax](https://sqlmesh.readthedocs.io/en/latest/concepts/models/model_kinds/#time-column) as upstream `INCREMENTAL_BY_TIME_RANGE`.
+This is the column in the dataset that contains the timestamp. It follows the [same syntax](https://sqlmesh.readthedocs.io/en/latest/concepts/models/model_kinds/#time-column) as upstream `INCREMENTAL_BY_TIME_RANGE` and also the same rules with regards to respecting the project [time_column_format](https://sqlmesh.readthedocs.io/en/stable/reference/configuration/#environments) property and being automatically added to the model `partition_by` field list.
 
 #### primary_key
 
 This is the column or combination of columns that uniquely identifies a record.
 
 The columns listed here are used in the `ON` clause of the SQL Merge to join the source and target datasets.
 
-Note that the `time_column` is **not** automatically injected into this list (to allow timestamps on records to be updated), so if the `time_column` does actually form part of the primary key in your dataset then it needs to be added here.
+Note that the `time_column` is **not** automatically injected into this list (to allow timestamps on records to be updated), so if the `time_column` does actually form part of the primary key in your dataset then it needs to be added here.
+
+#### partition_by_time_column
+
+By default, the `time_column` will get added to the list of fields in the model `partitioned_by` property, causing it to be included in the table partition key. This may be undesirable in some circumstances.
+
+To opt out of this behaviour, you can set `partition_by_time_column = false` like so:
+
+```
+MODEL (
+    name my_db.my_model,
+    kind CUSTOM (
+        materialization 'non_idempotent_incremental_by_time_range',
+        materialization_properties (
+            ...,
+            partition_by_time_column = false
+        )
+    )
+);
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ description = "Utilities for SQLMesh"
 readme = "README.md"
 requires-python = ">= 3.9"
 dependencies = [
-    "sqlmesh>=0.160.0"
+    "sqlmesh>=0.163.0"
 ]
 
 [project.optional-dependencies]
diff --git a/sqlmesh_utils/materializations/non_idempotent_incremental_by_time_range.py b/sqlmesh_utils/materializations/non_idempotent_incremental_by_time_range.py
@@ -7,7 +7,7 @@
 from sqlmesh.utils.date import make_inclusive
 from sqlmesh.utils.errors import ConfigError, SQLMeshError
 from pydantic import model_validator
-from sqlmesh.utils.pydantic import list_of_fields_validator
+from sqlmesh.utils.pydantic import list_of_fields_validator, bool_validator
 from sqlmesh.utils.date import TimeLike
 from sqlmesh.core.engine_adapter.base import MERGE_SOURCE_ALIAS, MERGE_TARGET_ALIAS
 from sqlmesh import CustomKind
@@ -22,6 +22,8 @@ class NonIdempotentIncrementalByTimeRangeKind(CustomKind):
     # this is deliberately primary_key instead of unique_key to direct away from INCREMENTAL_BY_UNIQUE_KEY
     _primary_key: t.List[exp.Expression]
 
+    _partition_by_time_column: bool
+
     @model_validator(mode="after")
     def _validate_model(self):
         self._time_column = TimeColumn.create(
@@ -44,6 +46,10 @@ def _validate_model(self):
                 "`primary_key` cannot be just the time_column. Please list the columns that when combined, uniquely identify a row"
             )
 
+        self._partition_by_time_column = bool_validator(
+            self.materialization_properties.get("partition_by_time_column", True)
+        )
+
         return self
 
     @property
@@ -54,6 +60,10 @@ def time_column(self) -> TimeColumn:
     def primary_key(self) -> t.List[exp.Expression]:
         return self._primary_key
 
+    @property
+    def partition_by_time_column(self) -> bool:
+        return self._partition_by_time_column
+
 
 class NonIdempotentIncrementalByTimeRangeMaterialization(
     CustomMaterialization[NonIdempotentIncrementalByTimeRangeKind]
diff --git a/tests/materializations/test_non_idempotent_incremental_by_time_range.py b/tests/materializations/test_non_idempotent_incremental_by_time_range.py
@@ -51,6 +51,9 @@ def test_kind(make_model: ModelMaker):
     model = make_model(["time_column = ds", "primary_key = (id, ds)"])
     assert isinstance(model.kind, NonIdempotentIncrementalByTimeRangeKind)
 
+    assert model.partitioned_by == [exp.to_column("ds", quoted=True)]
+    assert model.kind.partition_by_time_column
+
     assert model.kind.time_column.column == exp.to_column("ds", quoted=True)
     assert model.kind.primary_key == [
         exp.to_column("id", quoted=True),
@@ -157,3 +160,13 @@ def test_append(make_model: ModelMaker, make_mocked_engine_adapter: MockedEngine
             dialect=adapter.dialect,
         ).sql(dialect=adapter.dialect)
     ]
+
+
+def test_partition_by_time_column_opt_out(make_model: ModelMaker):
+    model = make_model(
+        ["time_column = ds", "primary_key = name", "partition_by_time_column = false"]
+    )
+
+    assert isinstance(model.kind, NonIdempotentIncrementalByTimeRangeKind)
+    assert not model.kind.partition_by_time_column
+    assert model.partitioned_by == []

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-install:`
	`1`	`+install-dev:`
`2`	`2`	`pip install -e ".[dev]"`
`3`	`3`
`4`	`4`	`test:`
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@ description = "Utilities for SQLMesh"`
`5`	`5`	`readme = "README.md"`
`6`	`6`	`requires-python = ">= 3.9"`
`7`	`7`	`dependencies = [`
`8`		`- "sqlmesh>=0.160.0"`
	`8`	`+ "sqlmesh>=0.163.0"`
`9`	`9`	`]`
`10`	`10`
`11`	`11`	`[project.optional-dependencies]`