Skip to content

Commit a3222d8

Browse files
authored
Feat: Add the ability to opt-out of the time_column being included in partitioned_by (#8)
1 parent 6b8099c commit a3222d8

File tree

6 files changed

+48
-6
lines changed

6 files changed

+48
-6
lines changed

.github/actions/setup-base/action.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ runs:
1616
cache: 'pip'
1717
- name: Install dependencies
1818
shell: bash
19-
run: make install
19+
run: make install-dev

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
install:
1+
install-dev:
22
pip install -e ".[dev]"
33

44
test:

README.md

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,31 @@ The properties are as follows:
6565

6666
#### time_column
6767

68-
This is the column in the dataset that contains the timestamp. It follows the [same syntax](https://sqlmesh.readthedocs.io/en/latest/concepts/models/model_kinds/#time-column) as upstream `INCREMENTAL_BY_TIME_RANGE`.
68+
This is the column in the dataset that contains the timestamp. It follows the [same syntax](https://sqlmesh.readthedocs.io/en/latest/concepts/models/model_kinds/#time-column) as upstream `INCREMENTAL_BY_TIME_RANGE` and also the same rules with regards to respecting the project [time_column_format](https://sqlmesh.readthedocs.io/en/stable/reference/configuration/#environments) property and being automatically added to the model `partition_by` field list.
6969

7070
#### primary_key
7171

7272
This is the column or combination of columns that uniquely identifies a record.
7373

7474
The columns listed here are used in the `ON` clause of the SQL Merge to join the source and target datasets.
7575

76-
Note that the `time_column` is **not** automatically injected into this list (to allow timestamps on records to be updated), so if the `time_column` does actually form part of the primary key in your dataset then it needs to be added here.
76+
Note that the `time_column` is **not** automatically injected into this list (to allow timestamps on records to be updated), so if the `time_column` does actually form part of the primary key in your dataset then it needs to be added here.
77+
78+
#### partition_by_time_column
79+
80+
By default, the `time_column` will get added to the list of fields in the model `partitioned_by` property, causing it to be included in the table partition key. This may be undesirable in some circumstances.
81+
82+
To opt out of this behaviour, you can set `partition_by_time_column = false` like so:
83+
84+
```
85+
MODEL (
86+
name my_db.my_model,
87+
kind CUSTOM (
88+
materialization 'non_idempotent_incremental_by_time_range',
89+
materialization_properties (
90+
...,
91+
partition_by_time_column = false
92+
)
93+
)
94+
);
95+
```

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description = "Utilities for SQLMesh"
55
readme = "README.md"
66
requires-python = ">= 3.9"
77
dependencies = [
8-
"sqlmesh>=0.160.0"
8+
"sqlmesh>=0.163.0"
99
]
1010

1111
[project.optional-dependencies]

sqlmesh_utils/materializations/non_idempotent_incremental_by_time_range.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sqlmesh.utils.date import make_inclusive
88
from sqlmesh.utils.errors import ConfigError, SQLMeshError
99
from pydantic import model_validator
10-
from sqlmesh.utils.pydantic import list_of_fields_validator
10+
from sqlmesh.utils.pydantic import list_of_fields_validator, bool_validator
1111
from sqlmesh.utils.date import TimeLike
1212
from sqlmesh.core.engine_adapter.base import MERGE_SOURCE_ALIAS, MERGE_TARGET_ALIAS
1313
from sqlmesh import CustomKind
@@ -22,6 +22,8 @@ class NonIdempotentIncrementalByTimeRangeKind(CustomKind):
2222
# this is deliberately primary_key instead of unique_key to direct away from INCREMENTAL_BY_UNIQUE_KEY
2323
_primary_key: t.List[exp.Expression]
2424

25+
_partition_by_time_column: bool
26+
2527
@model_validator(mode="after")
2628
def _validate_model(self):
2729
self._time_column = TimeColumn.create(
@@ -44,6 +46,10 @@ def _validate_model(self):
4446
"`primary_key` cannot be just the time_column. Please list the columns that when combined, uniquely identify a row"
4547
)
4648

49+
self._partition_by_time_column = bool_validator(
50+
self.materialization_properties.get("partition_by_time_column", True)
51+
)
52+
4753
return self
4854

4955
@property
@@ -54,6 +60,10 @@ def time_column(self) -> TimeColumn:
5460
def primary_key(self) -> t.List[exp.Expression]:
5561
return self._primary_key
5662

63+
@property
64+
def partition_by_time_column(self) -> bool:
65+
return self._partition_by_time_column
66+
5767

5868
class NonIdempotentIncrementalByTimeRangeMaterialization(
5969
CustomMaterialization[NonIdempotentIncrementalByTimeRangeKind]

tests/materializations/test_non_idempotent_incremental_by_time_range.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ def test_kind(make_model: ModelMaker):
5151
model = make_model(["time_column = ds", "primary_key = (id, ds)"])
5252
assert isinstance(model.kind, NonIdempotentIncrementalByTimeRangeKind)
5353

54+
assert model.partitioned_by == [exp.to_column("ds", quoted=True)]
55+
assert model.kind.partition_by_time_column
56+
5457
assert model.kind.time_column.column == exp.to_column("ds", quoted=True)
5558
assert model.kind.primary_key == [
5659
exp.to_column("id", quoted=True),
@@ -157,3 +160,13 @@ def test_append(make_model: ModelMaker, make_mocked_engine_adapter: MockedEngine
157160
dialect=adapter.dialect,
158161
).sql(dialect=adapter.dialect)
159162
]
163+
164+
165+
def test_partition_by_time_column_opt_out(make_model: ModelMaker):
166+
model = make_model(
167+
["time_column = ds", "primary_key = name", "partition_by_time_column = false"]
168+
)
169+
170+
assert isinstance(model.kind, NonIdempotentIncrementalByTimeRangeKind)
171+
assert not model.kind.partition_by_time_column
172+
assert model.partitioned_by == []

0 commit comments

Comments
 (0)