Skip to content

Commit 3fc74df

Browse files
authored
MERGE EXTENSION: when not matched by source - update (#866)
Signed-off-by: Dmitry Volodin <[email protected]>
1 parent 40925aa commit 3fc74df

File tree

5 files changed

+100
-11
lines changed

5 files changed

+100
-11
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
## dbt-databricks 1.9.1 (TBD)
22

3+
### Features
4+
5+
- Merge strategy now supports the `update set ...` action with the explicit list of updates for `when not matched by source` ([866](https://github.com/databricks/dbt-databricks/pull/866)) (thanks @mi-volodin).
6+
37
### Under the Hood
48

59
- Removed pins for pandas and pydantic to ease user burdens ([874](https://github.com/databricks/dbt-databricks/pull/874))

dbt/include/databricks/macros/materializations/incremental/strategies.sql

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,15 @@ select {{source_cols_csv}} from {{ source_relation }}
9191

9292
{%- set not_matched_by_source_action = config.get('not_matched_by_source_action') -%}
9393
{%- set not_matched_by_source_condition = config.get('not_matched_by_source_condition') -%}
94-
9594

95+
{%- set not_matched_by_source_action_trimmed = not_matched_by_source_action | lower | trim(' \n\t') %}
96+
{%- set not_matched_by_source_action_is_set = (
97+
not_matched_by_source_action_trimmed == 'delete'
98+
or not_matched_by_source_action_trimmed.startswith('update')
99+
)
100+
%}
101+
102+
96103
{% if unique_key %}
97104
{% if unique_key is sequence and unique_key is not mapping and unique_key is not string %}
98105
{% for key in unique_key %}
@@ -137,12 +144,12 @@ select {{source_cols_csv}} from {{ source_relation }}
137144
then insert
138145
{{ get_merge_insert(on_schema_change, source_columns, source_alias) }}
139146
{%- endif %}
140-
{%- if not_matched_by_source_action == 'delete' %}
147+
{%- if not_matched_by_source_action_is_set %}
141148
when not matched by source
142149
{%- if not_matched_by_source_condition %}
143150
and ({{ not_matched_by_source_condition }})
144151
{%- endif %}
145-
then delete
152+
then {{ not_matched_by_source_action }}
146153
{%- endif %}
147154
{% endmacro %}
148155

docs/databricks-merge.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ From v.1.9 onwards `merge` behavior can be tuned by setting the additional param
1818

1919
- `skip_matched_step`: if set to `true`, dbt will completely skip the `matched` clause of the merge statement.
2020
- `skip_not_matched_step`: similarly if `true` the `not matched` clause will be skipped.
21-
- `not_matched_by_source_action`: if set to `delete` the corresponding `when not matched by source ... then delete` clause will be added to the merge statement.
21+
- `not_matched_by_source_action`: can be set to an action for the case the record does not exist in a source dataset.
22+
- if set to `delete` the corresponding `when not matched by source ... then delete` clause will be added to the merge statement.
23+
- if the action starts with `update` then the format `update set <actions>` is assumed, which will run update statement syntactically as provided.
24+
Can be multiline formatted.
25+
- in other cases by default no action is taken and now error raised.
2226
- `merge_with_schema_evolution`: when set to `true` dbt generates the merge statement with `WITH SCHEMA EVOLUTION` clause.
2327

2428
- Step conditions that are expressed with an explicit SQL predicates allow to execute corresponding action only in case the conditions are met in addition to matching by the `unique_key`.
@@ -40,7 +44,11 @@ Example below illustrates how these parameters affect the merge statement genera
4044
matched_condition='t.tech_change_ts < s.tech_change_ts',
4145
not_matched_condition='s.attr1 IS NOT NULL',
4246
not_matched_by_source_condition='t.tech_change_ts < current_timestamp()',
43-
not_matched_by_source_action='delete',
47+
not_matched_by_source_action='''
48+
update set
49+
t.attr1 = 'deleted',
50+
t.tech_change_ts = current_timestamp()
51+
''',
4452
merge_with_schema_evolution=true
4553
) }}
4654

@@ -93,5 +101,7 @@ when not matched
93101

94102
when not matched by source
95103
and t.tech_change_ts < current_timestamp()
96-
then delete
104+
then update set
105+
t.attr1 = 'deleted',
106+
t.tech_change_ts = current_timestamp()
97107
```

tests/functional/adapter/incremental/fixtures.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,14 @@
264264
4,Baron,Harkonnen,1
265265
"""
266266

267-
not_matched_by_source_expected = """id,first,second,V
267+
not_matched_by_source_then_del_expected = """id,first,second,V
268+
2,Paul,Atreides,0
269+
3,Dunkan,Aidaho,1
270+
4,Baron,Harkonnen,1
271+
"""
272+
273+
not_matched_by_source_then_upd_expected = """id,first,second,V
274+
1,--,--,-1
268275
2,Paul,Atreides,0
269276
3,Dunkan,Aidaho,1
270277
4,Baron,Harkonnen,1
@@ -411,7 +418,7 @@
411418
{% endif %}
412419
"""
413420

414-
not_matched_by_source_model = """
421+
not_matched_by_source_then_delete_model = """
415422
{{ config(
416423
materialized = 'incremental',
417424
unique_key = 'id',
@@ -446,6 +453,46 @@
446453
{% endif %}
447454
"""
448455

456+
not_matched_by_source_then_update_model = """
457+
{{ config(
458+
materialized = 'incremental',
459+
unique_key = 'id',
460+
incremental_strategy='merge',
461+
target_alias='t',
462+
source_alias='s',
463+
skip_matched_step=true,
464+
not_matched_by_source_condition='t.V > 0',
465+
not_matched_by_source_action='''
466+
update set
467+
t.first = \\\'--\\\',
468+
t.second = \\\'--\\\',
469+
t.V = -1
470+
''',
471+
) }}
472+
473+
{% if not is_incremental() %}
474+
475+
-- data for first invocation of model
476+
477+
select 1 as id, 'Vasya' as first, 'Pupkin' as second, 1 as V
478+
union all
479+
select 2 as id, 'Paul' as first, 'Atreides' as second, 0 as V
480+
union all
481+
select 3 as id, 'Dunkan' as first, 'Aidaho' as second, 1 as V
482+
483+
{% else %}
484+
485+
-- data for subsequent incremental update
486+
487+
-- id = 1 should be updated with
488+
-- id = 2 should be kept as condition doesn't hold (t.V = 0)
489+
select 3 as id, 'Dunkan' as first, 'Aidaho' as second, 2 as V -- No update, skipped
490+
union all
491+
select 4 as id, 'Baron' as first, 'Harkonnen' as second, 1 as V -- should append
492+
493+
{% endif %}
494+
"""
495+
449496
merge_schema_evolution_model = """
450497
{{ config(
451498
materialized = 'incremental',

tests/functional/adapter/incremental/test_incremental_strategies.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,17 +288,38 @@ def test_merge(self, project):
288288
)
289289

290290

291-
class TestNotMatchedBySourceAndCondition(IncrementalBase):
291+
class TestNotMatchedBySourceAndConditionThenDelete(IncrementalBase):
292292
@pytest.fixture(scope="class")
293293
def seeds(self):
294294
return {
295-
"not_matched_by_source_expected.csv": fixtures.not_matched_by_source_expected,
295+
"not_matched_by_source_expected.csv": fixtures.not_matched_by_source_then_del_expected,
296296
}
297297

298298
@pytest.fixture(scope="class")
299299
def models(self):
300300
return {
301-
"not_matched_by_source.sql": fixtures.not_matched_by_source_model,
301+
"not_matched_by_source.sql": fixtures.not_matched_by_source_then_delete_model,
302+
}
303+
304+
def test_merge(self, project):
305+
self.seed_and_run_twice()
306+
util.check_relations_equal(
307+
project.adapter,
308+
["not_matched_by_source", "not_matched_by_source_expected"],
309+
)
310+
311+
312+
class TestNotMatchedBySourceAndConditionThenUpdate(IncrementalBase):
313+
@pytest.fixture(scope="class")
314+
def seeds(self):
315+
return {
316+
"not_matched_by_source_expected.csv": fixtures.not_matched_by_source_then_upd_expected,
317+
}
318+
319+
@pytest.fixture(scope="class")
320+
def models(self):
321+
return {
322+
"not_matched_by_source.sql": fixtures.not_matched_by_source_then_update_model,
302323
}
303324

304325
def test_merge(self, project):

0 commit comments

Comments
 (0)