Skip to content

Commit c45cba3

Browse files
committed
Merge remote-tracking branch 'origin/master' into devin-ELE-4850-1753859795
2 parents d481311 + 1671dc0 commit c45cba3

File tree

6 files changed

+273
-3
lines changed

6 files changed

+273
-3
lines changed

.github/workflows/test-warehouse.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ jobs:
123123
run: |
124124
mkdir -p ~/.dbt
125125
DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g')
126-
UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/-/_/g")
126+
UNDERSCORED_REF_NAME=$(echo "${{ inputs.warehouse-type }}_dbt_${DBT_VERSION}_${BRANCH_NAME}" | awk '{print tolower($0)}' | head -c 40 | sed "s/[-\/]/_/g")
127127
echo "$PROFILES_YML" | base64 -d | sed "s/<SCHEMA_NAME>/dbt_pkg_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml
128128
129129
- name: Check DWH connection

integration_tests/tests/dbt_project.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def test(
109109
materialization: str = "table", # Only relevant if as_model=True
110110
test_vars: Optional[dict] = None,
111111
elementary_enabled: bool = True,
112+
model_config: Optional[Dict[str, Any]] = None,
112113
*,
113114
multiple_results: Literal[False] = False,
114115
) -> Dict[str, Any]:
@@ -128,6 +129,7 @@ def test(
128129
materialization: str = "table", # Only relevant if as_model=True
129130
test_vars: Optional[dict] = None,
130131
elementary_enabled: bool = True,
132+
model_config: Optional[Dict[str, Any]] = None,
131133
*,
132134
multiple_results: Literal[True],
133135
) -> List[Dict[str, Any]]:
@@ -146,6 +148,7 @@ def test(
146148
materialization: str = "table", # Only relevant if as_model=True
147149
test_vars: Optional[dict] = None,
148150
elementary_enabled: bool = True,
151+
model_config: Optional[Dict[str, Any]] = None,
149152
*,
150153
multiple_results: bool = False,
151154
) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
@@ -161,6 +164,9 @@ def test(
161164
test_args = test_args or {}
162165
table_yaml: Dict[str, Any] = {"name": test_id}
163166

167+
if model_config:
168+
table_yaml.update(model_config)
169+
164170
if columns:
165171
table_yaml["columns"] = columns
166172

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
import json
2+
3+
import pytest
4+
from dbt_project import DbtProject
5+
6+
COLUMN_NAME = "value"
7+
8+
9+
SAMPLES_QUERY = """
10+
with latest_elementary_test_result as (
11+
select id
12+
from {{{{ ref("elementary_test_results") }}}}
13+
where lower(table_name) = lower('{test_id}')
14+
order by created_at desc, id desc
15+
limit 1
16+
)
17+
18+
select result_row
19+
from {{{{ ref("test_result_rows") }}}}
20+
where elementary_test_results_id in (select * from latest_elementary_test_result)
21+
"""
22+
23+
TEST_SAMPLE_ROW_COUNT = 7
24+
25+
26+
@pytest.mark.skip_targets(["clickhouse"])
27+
def test_sampling_pii_disabled(test_id: str, dbt_project: DbtProject):
28+
"""Test that PII-tagged tables don't upload samples even when tests fail"""
29+
null_count = 50
30+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
31+
32+
test_result = dbt_project.test(
33+
test_id,
34+
"not_null",
35+
dict(column_name=COLUMN_NAME),
36+
data=data,
37+
as_model=True,
38+
model_config={"config": {"tags": ["pii"]}},
39+
test_vars={
40+
"enable_elementary_test_materialization": True,
41+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
42+
"disable_samples_on_pii_tags": True,
43+
"pii_tags": ["pii", "sensitive"],
44+
},
45+
)
46+
assert test_result["status"] == "fail"
47+
48+
samples = [
49+
json.loads(row["result_row"])
50+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
51+
]
52+
assert len(samples) == 0
53+
54+
55+
@pytest.mark.skip_targets(["clickhouse"])
56+
def test_sampling_pii_disabled_with_default_config_and_casing(
57+
test_id: str, dbt_project: DbtProject
58+
):
59+
null_count = 50
60+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
61+
62+
test_result = dbt_project.test(
63+
test_id,
64+
"not_null",
65+
dict(column_name=COLUMN_NAME),
66+
data=data,
67+
as_model=True,
68+
model_config={"config": {"tags": ["pIi"]}},
69+
test_vars={
70+
"enable_elementary_test_materialization": True,
71+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
72+
"disable_samples_on_pii_tags": True,
73+
},
74+
)
75+
assert test_result["status"] == "fail"
76+
77+
samples = [
78+
json.loads(row["result_row"])
79+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
80+
]
81+
assert len(samples) == 0
82+
83+
84+
@pytest.mark.skip_targets(["clickhouse"])
85+
def test_sampling_pii_enabled_with_default_config(
86+
test_id: str, dbt_project: DbtProject
87+
):
88+
"""Test that PII-tagged tables don't upload samples even when tests fail"""
89+
null_count = 50
90+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
91+
92+
test_result = dbt_project.test(
93+
test_id,
94+
"not_null",
95+
dict(column_name=COLUMN_NAME),
96+
data=data,
97+
as_model=True,
98+
model_config={"config": {"tags": ["pii"]}},
99+
test_vars={
100+
"enable_elementary_test_materialization": True,
101+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
102+
},
103+
)
104+
assert test_result["status"] == "fail"
105+
106+
samples = [
107+
json.loads(row["result_row"])
108+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
109+
]
110+
assert len(samples) == TEST_SAMPLE_ROW_COUNT
111+
112+
113+
@pytest.mark.skip_targets(["clickhouse"])
114+
def test_sampling_non_pii_enabled(test_id: str, dbt_project: DbtProject):
115+
"""Test that non-PII tables still collect samples normally"""
116+
null_count = 50
117+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
118+
119+
test_result = dbt_project.test(
120+
test_id,
121+
"not_null",
122+
dict(column_name=COLUMN_NAME),
123+
data=data,
124+
as_model=True,
125+
model_config={"config": {"tags": ["normal"]}},
126+
test_vars={
127+
"enable_elementary_test_materialization": True,
128+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
129+
"disable_samples_on_pii_tags": True,
130+
"pii_tags": ["pii", "sensitive"],
131+
},
132+
)
133+
assert test_result["status"] == "fail"
134+
135+
samples = [
136+
json.loads(row["result_row"])
137+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
138+
]
139+
assert len(samples) == TEST_SAMPLE_ROW_COUNT
140+
141+
142+
@pytest.mark.skip_targets(["clickhouse"])
143+
def test_sampling_pii_feature_disabled(test_id: str, dbt_project: DbtProject):
144+
"""Test that when PII feature is disabled, PII tables still collect samples"""
145+
null_count = 50
146+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
147+
148+
test_result = dbt_project.test(
149+
test_id,
150+
"not_null",
151+
dict(column_name=COLUMN_NAME),
152+
data=data,
153+
as_model=True,
154+
model_config={"config": {"tags": ["pii"]}},
155+
test_vars={
156+
"enable_elementary_test_materialization": True,
157+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
158+
"disable_samples_on_pii_tags": False,
159+
"pii_tags": ["pii", "sensitive"],
160+
},
161+
)
162+
assert test_result["status"] == "fail"
163+
164+
samples = [
165+
json.loads(row["result_row"])
166+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
167+
]
168+
assert len(samples) == TEST_SAMPLE_ROW_COUNT
169+
170+
171+
@pytest.mark.skip_targets(["clickhouse"])
172+
def test_sampling_disable_samples_overrides_pii(test_id: str, dbt_project: DbtProject):
173+
"""Test that disable_test_samples flag overrides PII detection when both are present"""
174+
null_count = 50
175+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
176+
177+
test_result = dbt_project.test(
178+
test_id,
179+
"not_null",
180+
dict(column_name=COLUMN_NAME),
181+
data=data,
182+
as_model=True,
183+
model_config={
184+
"config": {"meta": {"disable_test_samples": True}, "tags": ["pii"]}
185+
},
186+
test_vars={
187+
"enable_elementary_test_materialization": True,
188+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
189+
"disable_samples_on_pii_tags": True,
190+
"pii_tags": ["pii"],
191+
},
192+
)
193+
assert test_result["status"] == "fail"
194+
195+
samples = [
196+
json.loads(row["result_row"])
197+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
198+
]
199+
assert len(samples) == 0
200+
201+
202+
@pytest.mark.skip_targets(["clickhouse"])
203+
def test_sampling_disable_samples_false_allows_samples(
204+
test_id: str, dbt_project: DbtProject
205+
):
206+
"""Test that disable_test_samples: false allows sample collection normally"""
207+
null_count = 50
208+
data = [{COLUMN_NAME: None} for _ in range(null_count)]
209+
210+
test_result = dbt_project.test(
211+
test_id,
212+
"not_null",
213+
dict(column_name=COLUMN_NAME),
214+
data=data,
215+
as_model=True,
216+
model_config={
217+
"config": {"meta": {"disable_test_samples": False}, "tags": ["normal"]}
218+
},
219+
test_vars={
220+
"enable_elementary_test_materialization": True,
221+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
222+
"disable_samples_on_pii_tags": False,
223+
"pii_tags": ["pii"],
224+
},
225+
)
226+
assert test_result["status"] == "fail"
227+
228+
samples = [
229+
json.loads(row["result_row"])
230+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
231+
]
232+
assert len(samples) == TEST_SAMPLE_ROW_COUNT

macros/edr/materializations/test/test.sql

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,20 @@
5050

5151
{% macro handle_dbt_test(flattened_test, materialization_macro) %}
5252
{% set result = materialization_macro() %}
53-
{% set result_rows = elementary.query_test_result_rows(sample_limit=elementary.get_config_var('test_sample_row_count'),
53+
{% set sample_limit = elementary.get_config_var('test_sample_row_count') %}
54+
55+
{% set disable_test_samples = false %}
56+
{% if "meta" in flattened_test and "disable_test_samples" in flattened_test["meta"] %}
57+
{% set disable_test_samples = flattened_test["meta"]["disable_test_samples"] %}
58+
{% endif %}
59+
60+
{% if disable_test_samples %}
61+
{% set sample_limit = 0 %}
62+
{% elif elementary.is_pii_table(flattened_test) %}
63+
{% set sample_limit = 0 %}
64+
{% endif %}
65+
66+
{% set result_rows = elementary.query_test_result_rows(sample_limit=sample_limit,
5467
ignore_passed_tests=true,
5568
flattened_test=flattened_test) %}
5669
{% set elementary_test_results_row = elementary.get_dbt_test_result_row(flattened_test, result_rows) %}

macros/edr/system/system_utils/get_config_var.sql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@
8383
},
8484
'include_other_warehouse_specific_columns': false,
8585
'fail_on_zero': false,
86-
'anomaly_exclude_metrics': none
86+
'anomaly_exclude_metrics': none,
87+
'disable_samples_on_pii_tags': false,
88+
'pii_tags': ['pii']
8789
} %}
8890
{{- return(default_config) -}}
8991
{%- endmacro -%}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{% macro is_pii_table(flattened_test) %}
2+
{% set disable_samples_on_pii_tags = elementary.get_config_var('disable_samples_on_pii_tags') %}
3+
{% if not disable_samples_on_pii_tags %}
4+
{% do return(false) %}
5+
{% endif %}
6+
7+
{% set raw_pii_tags = elementary.get_config_var('pii_tags') %}
8+
{% set pii_tags = (raw_pii_tags if raw_pii_tags is iterable else [raw_pii_tags]) | map('lower') | list %}
9+
10+
{% set raw_model_tags = elementary.insensitive_get_dict_value(flattened_test, 'model_tags', []) %}
11+
{% set model_tags = (raw_model_tags if raw_model_tags is iterable else [raw_model_tags]) | map('lower') | list %}
12+
13+
{% set intersection = elementary.lists_intersection(model_tags, pii_tags) %}
14+
{% set is_pii = intersection | length > 0 %}
15+
16+
{% do return(is_pii) %}
17+
{% endmacro %}

0 commit comments

Comments
 (0)