Skip to content

Commit 949b357

Browse files
committed
fixed to support builtin tests
1 parent 4907483 commit 949b357

File tree

4 files changed

+354
-41
lines changed

4 files changed

+354
-41
lines changed

integration_tests/tests/test_column_pii_sampling.py

Lines changed: 165 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@
2727
def test_column_pii_sampling_enabled(test_id: str, dbt_project: DbtProject):
2828
"""Test that PII columns are excluded when column-level PII protection is enabled"""
2929
data = [
30-
{SENSITIVE_COLUMN: f"user{i}@example.com", SAFE_COLUMN: None} for i in range(10)
30+
{SENSITIVE_COLUMN: "[email protected]", SAFE_COLUMN: None} for i in range(10)
3131
]
3232

3333
test_result = dbt_project.test(
3434
test_id,
35-
"not_null",
36-
test_args=dict(column_name=SAFE_COLUMN),
35+
"unique",
36+
test_args=dict(column_name=SENSITIVE_COLUMN),
3737
data=data,
3838
columns=[
3939
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
@@ -53,27 +53,25 @@ def test_column_pii_sampling_enabled(test_id: str, dbt_project: DbtProject):
5353
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
5454
]
5555

56-
assert len(samples) == TEST_SAMPLE_ROW_COUNT
57-
for sample in samples:
58-
assert SENSITIVE_COLUMN not in sample
59-
assert SAFE_COLUMN in sample
56+
assert len(samples) == 1
57+
assert samples[0]["n_records"] == 10
58+
assert len(samples[0]) == 1
6059

6160

6261
@pytest.mark.skip_targets(["clickhouse"])
6362
def test_column_pii_sampling_disabled(test_id: str, dbt_project: DbtProject):
6463
"""Test that all columns are included when column-level PII protection is disabled"""
6564
data = [
66-
{SENSITIVE_COLUMN: f"user{i}@example.com", SAFE_COLUMN: None} for i in range(10)
65+
{SENSITIVE_COLUMN: "[email protected]", SAFE_COLUMN: None} for i in range(10)
6766
]
6867

6968
test_result = dbt_project.test(
7069
test_id,
71-
"not_null",
72-
test_args=dict(column_name=SAFE_COLUMN),
70+
"unique",
71+
test_args=dict(column_name=SENSITIVE_COLUMN),
7372
data=data,
7473
columns=[
7574
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
76-
{"name": SAFE_COLUMN},
7775
],
7876
test_vars={
7977
"enable_elementary_test_materialization": True,
@@ -88,30 +86,31 @@ def test_column_pii_sampling_disabled(test_id: str, dbt_project: DbtProject):
8886
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
8987
]
9088

91-
assert len(samples) == TEST_SAMPLE_ROW_COUNT
89+
# sample should be {'unique_field': '[email protected]', 'n_records': 10}
90+
assert len(samples) == 1
9291
for sample in samples:
93-
assert SENSITIVE_COLUMN in sample
94-
assert SAFE_COLUMN in sample
92+
# The original column name is mapped to 'unique_field' in unique tests
93+
assert "unique_field" in sample
94+
assert "n_records" in sample
9595

9696

9797
@pytest.mark.skip_targets(["clickhouse"])
9898
def test_column_pii_sampling_tags_exist_but_flag_disabled(
9999
test_id: str, dbt_project: DbtProject
100100
):
101101
"""Test that when PII tags exist but disable_samples_on_pii_tags is false, samples are collected normally"""
102-
data = [
103-
{SENSITIVE_COLUMN: f"user{i}@example.com", SAFE_COLUMN: None} for i in range(10)
104-
]
102+
data = [{SENSITIVE_COLUMN: "[email protected]", SAFE_COLUMN: 1} for i in range(10)]
105103

106104
test_result = dbt_project.test(
107105
test_id,
108-
"not_null",
106+
"unique",
109107
test_args=dict(column_name=SAFE_COLUMN),
110108
data=data,
111109
columns=[
112110
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
113111
{"name": SAFE_COLUMN},
114112
],
113+
test_column=None,
115114
test_vars={
116115
"enable_elementary_test_materialization": True,
117116
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
@@ -126,12 +125,12 @@ def test_column_pii_sampling_tags_exist_but_flag_disabled(
126125
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
127126
]
128127

129-
assert len(samples) == TEST_SAMPLE_ROW_COUNT
128+
# When flag is disabled, we get the full sample (not limited by PII filtering)
129+
assert len(samples) == 1
130130
for sample in samples:
131-
assert (
132-
SENSITIVE_COLUMN in sample
133-
) # PII column should be included when flag is disabled
134-
assert SAFE_COLUMN in sample
131+
# The original column name is mapped to 'unique_field' in unique tests
132+
assert "unique_field" in sample
133+
assert "n_records" in sample
135134

136135

137136
@pytest.mark.skip_targets(["clickhouse"])
@@ -150,6 +149,7 @@ def test_column_pii_sampling_all_columns_pii(test_id: str, dbt_project: DbtProje
150149
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
151150
{"name": SAFE_COLUMN, "config": {"tags": ["pii"]}},
152151
],
152+
test_column=None,
153153
test_vars={
154154
"enable_elementary_test_materialization": True,
155155
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
@@ -164,9 +164,150 @@ def test_column_pii_sampling_all_columns_pii(test_id: str, dbt_project: DbtProje
164164
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
165165
]
166166

167+
# When all columns are PII, no samples should be collected
168+
assert len(samples) == 0
169+
170+
171+
@pytest.mark.skip_targets(["clickhouse"])
172+
def test_unique_test_column_mapping(test_id: str, dbt_project: DbtProject):
173+
"""Test that column mapping correctly maps unique test columns"""
174+
data = [{SENSITIVE_COLUMN: "[email protected]", SAFE_COLUMN: i} for i in range(10)]
175+
176+
test_result = dbt_project.test(
177+
test_id,
178+
"unique",
179+
test_args=dict(column_name=SENSITIVE_COLUMN),
180+
data=data,
181+
columns=[
182+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
183+
{"name": SAFE_COLUMN},
184+
],
185+
test_vars={
186+
"enable_elementary_test_materialization": True,
187+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
188+
"disable_samples_on_pii_tags": True,
189+
"pii_tags": ["pii"],
190+
},
191+
)
192+
assert test_result["status"] == "fail"
193+
194+
samples = [
195+
json.loads(row["result_row"])
196+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
197+
]
198+
199+
# Should only contain n_records, not unique_field (which contains PII)
200+
assert len(samples) == 1
201+
assert "n_records" in samples[0]
202+
assert "unique_field" not in samples[0]
203+
assert len(samples[0]) == 1
204+
205+
206+
@pytest.mark.skip_targets(["clickhouse"])
207+
def test_accepted_values_test_column_mapping(test_id: str, dbt_project: DbtProject):
208+
"""Test that column mapping correctly maps accepted_values test columns"""
209+
data = [{SENSITIVE_COLUMN: "invalid_value", SAFE_COLUMN: i} for i in range(10)]
210+
211+
test_result = dbt_project.test(
212+
test_id,
213+
"accepted_values",
214+
test_args=dict(column_name=SENSITIVE_COLUMN, values=["valid1", "valid2"]),
215+
data=data,
216+
columns=[
217+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
218+
{"name": SAFE_COLUMN},
219+
],
220+
test_vars={
221+
"enable_elementary_test_materialization": True,
222+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
223+
"disable_samples_on_pii_tags": True,
224+
"pii_tags": ["pii"],
225+
},
226+
)
227+
assert test_result["status"] == "fail"
228+
229+
samples = [
230+
json.loads(row["result_row"])
231+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
232+
]
233+
234+
# Should only contain n_records, not value (which contains PII)
235+
assert len(samples) == 1
236+
assert "n_records" in samples[0]
237+
assert "value" not in samples[0]
238+
assert len(samples[0]) == 1
239+
240+
241+
@pytest.mark.skip_targets(["clickhouse"])
242+
def test_not_null_test_column_mapping(test_id: str, dbt_project: DbtProject):
243+
"""Test that column mapping correctly handles not_null test columns"""
244+
data = [{SENSITIVE_COLUMN: None, SAFE_COLUMN: i} for i in range(10)]
245+
246+
test_result = dbt_project.test(
247+
test_id,
248+
"not_null",
249+
test_args=dict(column_name=SENSITIVE_COLUMN),
250+
data=data,
251+
columns=[
252+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
253+
{"name": SAFE_COLUMN},
254+
],
255+
test_vars={
256+
"enable_elementary_test_materialization": True,
257+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
258+
"disable_samples_on_pii_tags": True,
259+
"pii_tags": ["pii"],
260+
},
261+
)
262+
assert test_result["status"] == "fail"
263+
264+
samples = [
265+
json.loads(row["result_row"])
266+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
267+
]
268+
269+
# Should only contain _no_non_excluded_columns when all columns are PII
167270
assert len(samples) == TEST_SAMPLE_ROW_COUNT
168271
for sample in samples:
169272
assert "_no_non_excluded_columns" in sample
170-
assert sample["_no_non_excluded_columns"] == 1
171273
assert SENSITIVE_COLUMN not in sample
172274
assert SAFE_COLUMN not in sample
275+
276+
277+
@pytest.mark.skip_targets(["clickhouse"])
278+
def test_multiple_pii_columns_mapping(test_id: str, dbt_project: DbtProject):
279+
"""Test that column mapping handles multiple PII columns correctly"""
280+
data = [
281+
{SENSITIVE_COLUMN: "[email protected]", "phone": "123-456-7890", SAFE_COLUMN: i}
282+
for i in range(10)
283+
]
284+
285+
test_result = dbt_project.test(
286+
test_id,
287+
"unique",
288+
test_args=dict(column_name=SENSITIVE_COLUMN),
289+
data=data,
290+
columns=[
291+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
292+
{"name": "phone", "config": {"tags": ["pii"]}},
293+
{"name": SAFE_COLUMN},
294+
],
295+
test_vars={
296+
"enable_elementary_test_materialization": True,
297+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
298+
"disable_samples_on_pii_tags": True,
299+
},
300+
)
301+
assert test_result["status"] == "fail"
302+
303+
samples = [
304+
json.loads(row["result_row"])
305+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
306+
]
307+
308+
# Should only contain n_records, not unique_field or phone (which contain PII)
309+
assert len(samples) == 1
310+
assert "n_records" in samples[0]
311+
assert "unique_field" not in samples[0]
312+
assert "phone" not in samples[0]
313+
assert len(samples[0]) == 1

integration_tests/tests/test_disable_samples_config.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_disable_samples_config_prevents_sampling(
3030
columns = [
3131
{
3232
"name": COLUMN_NAME,
33-
"config": {"disable_samples": True},
33+
"config": {"disable_test_samples": True},
3434
"tests": [{"not_null": {}}],
3535
}
3636
]
@@ -66,7 +66,7 @@ def test_disable_samples_false_allows_sampling(test_id: str, dbt_project: DbtPro
6666
columns = [
6767
{
6868
"name": COLUMN_NAME,
69-
"config": {"disable_samples": False},
69+
"config": {"disable_test_samples": False},
7070
"tests": [{"not_null": {}}],
7171
}
7272
]
@@ -103,7 +103,7 @@ def test_disable_samples_config_overrides_pii_tags(
103103
columns = [
104104
{
105105
"name": COLUMN_NAME,
106-
"config": {"disable_samples": True, "tags": ["pii"]},
106+
"config": {"disable_test_samples": True, "tags": ["pii"]},
107107
"tests": [{"not_null": {}}],
108108
}
109109
]
@@ -134,15 +134,15 @@ def test_disable_samples_config_overrides_pii_tags(
134134

135135
@pytest.mark.skip_targets(["clickhouse"])
136136
def test_disable_samples_and_pii_interaction(test_id: str, dbt_project: DbtProject):
137-
"""Test that disable_samples and PII columns both get excluded"""
137+
"""Test that disable_test_samples and PII columns both get excluded"""
138138
data = [
139139
{"col1": None, "col2": f"pii{i}", "col3": f"disabled{i}"} for i in range(10)
140140
]
141141

142142
columns = [
143143
{"name": "col1", "tests": [{"not_null": {}}]},
144144
{"name": "col2", "config": {"tags": ["pii"]}},
145-
{"name": "col3", "config": {"disable_samples": True}},
145+
{"name": "col3", "config": {"disable_test_samples": True}},
146146
]
147147

148148
test_result = dbt_project.test(
@@ -173,13 +173,13 @@ def test_disable_samples_and_pii_interaction(test_id: str, dbt_project: DbtProje
173173

174174
@pytest.mark.skip_targets(["clickhouse"])
175175
def test_disable_samples_with_multiple_columns(test_id: str, dbt_project: DbtProject):
176-
"""Test that disable_samples excludes only the disabled column"""
176+
"""Test that disable_test_samples excludes only the disabled column"""
177177
data = [{"col1": None, "col2": f"value{i}"} for i in range(10)]
178178

179179
columns = [
180180
{
181181
"name": "col1",
182-
"config": {"disable_samples": True},
182+
"config": {"disable_test_samples": True},
183183
"tests": [{"not_null": {}}],
184184
},
185185
{"name": "col2"},

0 commit comments

Comments
 (0)