Skip to content

Commit cffd655

Browse files
fix: extract test owner from primary model only (CORE-196) (#905)
* fix: extract test owner from primary model only (CORE-196) Previously, the flatten_test macro collected owners from ALL parent models that a test references. This was incorrect for multi-parent tests like relationship tests, where only the primary model's owner should be attributed. Changes: - Remove owner collection from the parent models loop - Add owner extraction from the primary model inside the tested_model_node block - Keep tag collection from all parent models unchanged This ensures that: - Single parent tests: Owner attribution unchanged (backward compatible) - Multi-parent tests: Only primary model's owner shown - Relationship tests: Only the 'tested' model's owner attributed - Override scenarios: Respect override_primary_test_model_id configuration Co-Authored-By: Yosef Arbiv <[email protected]> * fix: add deduplication for test_models_owners Address CodeRabbit review comment: restore the unique filter for test_models_owners to ensure no duplicate owners appear in model_owners, maintaining backward compatibility with the original behavior. Co-Authored-By: Yosef Arbiv <[email protected]> * test: add integration tests for test owner attribution (CORE-196) Co-Authored-By: Yosef Arbiv <[email protected]> * fix: use new dbt test arguments format for fusion compatibility Co-Authored-By: Yosef Arbiv <[email protected]> * fix: use robust query for relationship tests across dbt versions Co-Authored-By: Yosef Arbiv <[email protected]> * fix: use explicit test names for reliable querying across dbt versions Co-Authored-By: Yosef Arbiv <[email protected]> * fix: address CodeRabbit review comments - Fix JSON parsing logic in _parse_model_owners to return [parsed] instead of [model_owners_value] when json.loads succeeds with a non-list value - Remove unused tmp_path parameter from all 4 test functions Co-Authored-By: Yosef Arbiv <[email protected]> * refactor: add type hints and create model SQL utility function Co-Authored-By: Yosef Arbiv <[email protected]> * fix: use arguments format for relationship tests in dbt fusion Co-Authored-By: Yosef Arbiv <[email protected]> * test: skip relationship tests on dbt fusion (primary model detection differs) Co-Authored-By: Yosef Arbiv <[email protected]> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Yosef Arbiv <[email protected]>
1 parent 7b5fec6 commit cffd655

File tree

2 files changed

+363
-12
lines changed

2 files changed

+363
-12
lines changed
Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
"""
2+
Integration tests for test owner attribution in dbt_tests artifact table.
3+
Tests that test ownership is correctly extracted from the primary model only,
4+
not aggregated from all parent models (CORE-196).
5+
"""
6+
import contextlib
7+
import json
8+
import uuid
9+
from pathlib import Path
10+
from typing import Generator, List, Optional, Union
11+
12+
import pytest
13+
from dbt_project import DbtProject
14+
15+
16+
@contextlib.contextmanager
17+
def cleanup_file(path: Path) -> Generator[None, None, None]:
18+
"""Context manager to clean up a file after the test."""
19+
try:
20+
yield
21+
finally:
22+
if path.exists():
23+
path.unlink()
24+
25+
26+
def _parse_model_owners(
27+
model_owners_value: Optional[Union[str, List[str]]]
28+
) -> List[str]:
29+
"""
30+
Parse the model_owners column value which may be a JSON string or list.
31+
Returns a list of owner strings.
32+
"""
33+
if model_owners_value is None:
34+
return []
35+
if isinstance(model_owners_value, list):
36+
return model_owners_value
37+
if isinstance(model_owners_value, str):
38+
if not model_owners_value or model_owners_value == "[]":
39+
return []
40+
try:
41+
parsed = json.loads(model_owners_value)
42+
return parsed if isinstance(parsed, list) else [parsed]
43+
except json.JSONDecodeError:
44+
return [model_owners_value]
45+
return []
46+
47+
48+
def _create_model_sql(owner: Optional[str] = None, columns: str = "1 as id") -> str:
49+
"""
50+
Create a dbt model SQL string with optional owner configuration.
51+
52+
Args:
53+
owner: The owner to set in the model's meta config. If None, no config is added.
54+
columns: The SELECT clause columns. Defaults to "1 as id".
55+
56+
Returns:
57+
A dbt model SQL string.
58+
"""
59+
if owner is not None:
60+
return f"""
61+
{{{{ config(meta={{'owner': '{owner}'}}) }}}}
62+
select {columns}
63+
"""
64+
return f"""
65+
select {columns}
66+
"""
67+
68+
69+
def test_single_parent_test_owner_attribution(dbt_project: DbtProject) -> None:
70+
"""
71+
Test that a test on a single model correctly inherits the owner from that model.
72+
This is the baseline case - single parent tests should have the parent's owner.
73+
"""
74+
unique_id = str(uuid.uuid4()).replace("-", "_")
75+
model_name = f"model_single_owner_{unique_id}"
76+
owner_name = "Alice"
77+
78+
model_sql = _create_model_sql(owner=owner_name)
79+
80+
schema_yaml = {
81+
"version": 2,
82+
"models": [
83+
{
84+
"name": model_name,
85+
"description": "A model with a single owner for testing",
86+
"columns": [{"name": "id", "tests": ["unique"]}],
87+
}
88+
],
89+
}
90+
91+
dbt_model_path = dbt_project.models_dir_path / "tmp" / f"{model_name}.sql"
92+
with cleanup_file(dbt_model_path):
93+
with dbt_project.write_yaml(
94+
schema_yaml, name=f"schema_single_owner_{unique_id}.yml"
95+
):
96+
dbt_model_path.parent.mkdir(parents=True, exist_ok=True)
97+
dbt_model_path.write_text(model_sql)
98+
99+
dbt_project.dbt_runner.vars["disable_dbt_artifacts_autoupload"] = False
100+
dbt_project.dbt_runner.run(select=model_name)
101+
102+
tests = dbt_project.read_table(
103+
"dbt_tests",
104+
where=f"parent_model_unique_id LIKE '%{model_name}'",
105+
raise_if_empty=True,
106+
)
107+
108+
assert len(tests) == 1, f"Expected 1 test, got {len(tests)}"
109+
test_row = tests[0]
110+
model_owners = _parse_model_owners(test_row.get("model_owners"))
111+
112+
assert model_owners == [
113+
owner_name
114+
], f"Expected model_owners to be ['{owner_name}'], got {model_owners}"
115+
116+
117+
@pytest.mark.skip_targets(["dremio"])
118+
@pytest.mark.skip_for_dbt_fusion
119+
def test_relationship_test_uses_primary_model_owner_only(
120+
dbt_project: DbtProject,
121+
) -> None:
122+
"""
123+
Test that a relationship test between two models with different owners
124+
only uses the owner from the PRIMARY model (the one being tested),
125+
not from the referenced model.
126+
127+
This is the key test for CORE-196 - previously owners were aggregated
128+
from all parent models, now only the primary model's owner should be used.
129+
"""
130+
unique_id = str(uuid.uuid4()).replace("-", "_")
131+
primary_model_name = f"model_primary_{unique_id}"
132+
referenced_model_name = f"model_referenced_{unique_id}"
133+
# Use explicit test name for reliable querying across dbt versions
134+
test_name = f"rel_primary_owner_{unique_id}"
135+
primary_owner = "Alice"
136+
referenced_owner = "Bob"
137+
138+
primary_model_sql = _create_model_sql(
139+
owner=primary_owner, columns="1 as id, 1 as ref_id"
140+
)
141+
referenced_model_sql = _create_model_sql(owner=referenced_owner)
142+
143+
schema_yaml = {
144+
"version": 2,
145+
"models": [
146+
{
147+
"name": primary_model_name,
148+
"description": "Primary model with owner Alice",
149+
"columns": [
150+
{"name": "id"},
151+
{
152+
"name": "ref_id",
153+
"tests": [
154+
{
155+
"relationships": {
156+
"name": test_name,
157+
"arguments": {
158+
"to": f"ref('{referenced_model_name}')",
159+
"field": "id",
160+
},
161+
}
162+
}
163+
],
164+
},
165+
],
166+
},
167+
{
168+
"name": referenced_model_name,
169+
"description": "Referenced model with owner Bob",
170+
"columns": [{"name": "id"}],
171+
},
172+
],
173+
}
174+
175+
primary_model_path = (
176+
dbt_project.models_dir_path / "tmp" / f"{primary_model_name}.sql"
177+
)
178+
referenced_model_path = (
179+
dbt_project.models_dir_path / "tmp" / f"{referenced_model_name}.sql"
180+
)
181+
182+
with cleanup_file(primary_model_path), cleanup_file(referenced_model_path):
183+
with dbt_project.write_yaml(
184+
schema_yaml, name=f"schema_relationship_{unique_id}.yml"
185+
):
186+
primary_model_path.parent.mkdir(parents=True, exist_ok=True)
187+
primary_model_path.write_text(primary_model_sql)
188+
referenced_model_path.write_text(referenced_model_sql)
189+
190+
dbt_project.dbt_runner.vars["disable_dbt_artifacts_autoupload"] = False
191+
dbt_project.dbt_runner.run(
192+
select=f"{primary_model_name} {referenced_model_name}"
193+
)
194+
195+
# Query by explicit test name - more robust across dbt versions
196+
tests = dbt_project.read_table(
197+
"dbt_tests",
198+
where=f"name LIKE '%{test_name}%'",
199+
raise_if_empty=False,
200+
)
201+
202+
assert (
203+
len(tests) == 1
204+
), f"Expected 1 relationship test with name containing '{test_name}', got {len(tests)}. Tests found: {[t.get('name') for t in tests]}"
205+
test_row = tests[0]
206+
model_owners = _parse_model_owners(test_row.get("model_owners"))
207+
208+
assert model_owners == [
209+
primary_owner
210+
], f"Expected model_owners to be ['{primary_owner}'] (primary model only), got {model_owners}. Referenced model owner '{referenced_owner}' should NOT be included."
211+
212+
213+
@pytest.mark.skip_targets(["dremio"])
214+
@pytest.mark.skip_for_dbt_fusion
215+
def test_relationship_test_no_owner_on_primary_model(dbt_project: DbtProject) -> None:
216+
"""
217+
Test that when the primary model has no owner but the referenced model does,
218+
the test should have empty model_owners (not inherit from referenced model).
219+
"""
220+
unique_id = str(uuid.uuid4()).replace("-", "_")
221+
primary_model_name = f"model_no_owner_{unique_id}"
222+
referenced_model_name = f"model_with_owner_{unique_id}"
223+
# Use explicit test name for reliable querying across dbt versions
224+
test_name = f"rel_no_owner_{unique_id}"
225+
referenced_owner = "Bob"
226+
227+
primary_model_sql = _create_model_sql(owner=None, columns="1 as id, 1 as ref_id")
228+
referenced_model_sql = _create_model_sql(owner=referenced_owner)
229+
230+
schema_yaml = {
231+
"version": 2,
232+
"models": [
233+
{
234+
"name": primary_model_name,
235+
"description": "Primary model with NO owner",
236+
"columns": [
237+
{"name": "id"},
238+
{
239+
"name": "ref_id",
240+
"tests": [
241+
{
242+
"relationships": {
243+
"name": test_name,
244+
"arguments": {
245+
"to": f"ref('{referenced_model_name}')",
246+
"field": "id",
247+
},
248+
}
249+
}
250+
],
251+
},
252+
],
253+
},
254+
{
255+
"name": referenced_model_name,
256+
"description": "Referenced model with owner Bob",
257+
"columns": [{"name": "id"}],
258+
},
259+
],
260+
}
261+
262+
primary_model_path = (
263+
dbt_project.models_dir_path / "tmp" / f"{primary_model_name}.sql"
264+
)
265+
referenced_model_path = (
266+
dbt_project.models_dir_path / "tmp" / f"{referenced_model_name}.sql"
267+
)
268+
269+
with cleanup_file(primary_model_path), cleanup_file(referenced_model_path):
270+
with dbt_project.write_yaml(
271+
schema_yaml, name=f"schema_no_owner_{unique_id}.yml"
272+
):
273+
primary_model_path.parent.mkdir(parents=True, exist_ok=True)
274+
primary_model_path.write_text(primary_model_sql)
275+
referenced_model_path.write_text(referenced_model_sql)
276+
277+
dbt_project.dbt_runner.vars["disable_dbt_artifacts_autoupload"] = False
278+
dbt_project.dbt_runner.run(
279+
select=f"{primary_model_name} {referenced_model_name}"
280+
)
281+
282+
# Query by explicit test name - more robust across dbt versions
283+
tests = dbt_project.read_table(
284+
"dbt_tests",
285+
where=f"name LIKE '%{test_name}%'",
286+
raise_if_empty=False,
287+
)
288+
289+
assert (
290+
len(tests) == 1
291+
), f"Expected 1 relationship test with name containing '{test_name}', got {len(tests)}. Tests found: {[t.get('name') for t in tests]}"
292+
test_row = tests[0]
293+
model_owners = _parse_model_owners(test_row.get("model_owners"))
294+
295+
assert (
296+
model_owners == []
297+
), f"Expected model_owners to be empty (primary model has no owner), got {model_owners}. Referenced model owner '{referenced_owner}' should NOT be inherited."
298+
299+
300+
def test_owner_deduplication(dbt_project: DbtProject) -> None:
301+
"""
302+
Test that duplicate owners in a model's owner field are deduplicated.
303+
For example, if owner is "Alice,Bob,Alice", the result should be ["Alice", "Bob"].
304+
"""
305+
unique_id = str(uuid.uuid4()).replace("-", "_")
306+
model_name = f"model_dup_owner_{unique_id}"
307+
308+
model_sql = _create_model_sql(owner="Alice,Bob,Alice")
309+
310+
schema_yaml = {
311+
"version": 2,
312+
"models": [
313+
{
314+
"name": model_name,
315+
"description": "A model with duplicate owners for testing deduplication",
316+
"columns": [{"name": "id", "tests": ["unique"]}],
317+
}
318+
],
319+
}
320+
321+
dbt_model_path = dbt_project.models_dir_path / "tmp" / f"{model_name}.sql"
322+
with cleanup_file(dbt_model_path):
323+
with dbt_project.write_yaml(
324+
schema_yaml, name=f"schema_dup_owner_{unique_id}.yml"
325+
):
326+
dbt_model_path.parent.mkdir(parents=True, exist_ok=True)
327+
dbt_model_path.write_text(model_sql)
328+
329+
dbt_project.dbt_runner.vars["disable_dbt_artifacts_autoupload"] = False
330+
dbt_project.dbt_runner.run(select=model_name)
331+
332+
tests = dbt_project.read_table(
333+
"dbt_tests",
334+
where=f"parent_model_unique_id LIKE '%{model_name}'",
335+
raise_if_empty=True,
336+
)
337+
338+
assert len(tests) == 1, f"Expected 1 test, got {len(tests)}"
339+
test_row = tests[0]
340+
model_owners = _parse_model_owners(test_row.get("model_owners"))
341+
342+
assert (
343+
len(model_owners) == 2
344+
), f"Expected 2 unique owners, got {len(model_owners)}: {model_owners}"
345+
assert (
346+
"Alice" in model_owners
347+
), f"Expected 'Alice' in model_owners, got {model_owners}"
348+
assert (
349+
"Bob" in model_owners
350+
), f"Expected 'Bob' in model_owners, got {model_owners}"

macros/edr/dbt_artifacts/upload_dbt_tests.sql

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,23 +83,11 @@
8383
{% set test_models_tags = [] %}
8484
{% for test_model_node in test_model_nodes %}
8585
{% set flatten_test_model_node = elementary.flatten_node(test_model_node) %}
86-
{% set test_model_owner = flatten_test_model_node.get('owner') %}
87-
{% if test_model_owner %}
88-
{% if test_model_owner is string %}
89-
{% set owners = test_model_owner.split(',') %}
90-
{% for owner in owners %}
91-
{% do test_models_owners.append(owner | trim) %}
92-
{% endfor %}
93-
{% elif test_model_owner is iterable %}
94-
{% do test_models_owners.extend(test_model_owner) %}
95-
{% endif %}
96-
{% endif %}
9786
{% set test_model_tags = flatten_test_model_node.get('tags') %}
9887
{% if test_model_tags and test_model_tags is sequence %}
9988
{% do test_models_tags.extend(test_model_tags) %}
10089
{% endif %}
10190
{% endfor %}
102-
{% set test_models_owners = test_models_owners | unique | list %}
10391
{% set test_models_tags = test_models_tags | unique | list %}
10492

10593
{% set test_kwargs = elementary.safe_get_with_default(test_metadata, 'kwargs', {}) %}
@@ -143,8 +131,21 @@
143131
{% set primary_test_model_database = tested_model_node.get('database') %}
144132
{% set primary_test_model_schema = tested_model_node.get('schema') %}
145133
{% set group_name = group_name or tested_model_node.get('group') %}
134+
{% set flatten_primary_model_node = elementary.flatten_node(tested_model_node) %}
135+
{% set primary_model_owner = flatten_primary_model_node.get('owner') %}
136+
{% if primary_model_owner %}
137+
{% if primary_model_owner is string %}
138+
{% set owners = primary_model_owner.split(',') %}
139+
{% for owner in owners %}
140+
{% do test_models_owners.append(owner | trim) %}
141+
{% endfor %}
142+
{% elif primary_model_owner is iterable %}
143+
{% do test_models_owners.extend(primary_model_owner) %}
144+
{% endif %}
145+
{% endif %}
146146
{%- endif -%}
147147
{%- endif -%}
148+
{% set test_models_owners = test_models_owners | unique | list %}
148149

149150
{%- if primary_test_model_database is none or primary_test_model_schema is none -%}
150151
{# This is mainly here to support singular test cases with multiple referred models, in this case the tested node is being used to extract the db and schema #}

0 commit comments

Comments
 (0)