2727def test_column_pii_sampling_enabled (test_id : str , dbt_project : DbtProject ):
2828 """Test that PII columns are excluded when column-level PII protection is enabled"""
2929 data = [
30- {SENSITIVE_COLUMN : f "user{ i } @example.com" , SAFE_COLUMN : None } for i in range (10 )
30+ {
SENSITIVE_COLUMN :
"[email protected] " ,
SAFE_COLUMN :
None }
for i in range (
10 )
3131 ]
3232
3333 test_result = dbt_project .test (
3434 test_id ,
35- "not_null " ,
36- test_args = dict (column_name = SAFE_COLUMN ),
35+ "unique " ,
36+ test_args = dict (column_name = SENSITIVE_COLUMN ),
3737 data = data ,
3838 columns = [
3939 {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
@@ -53,27 +53,25 @@ def test_column_pii_sampling_enabled(test_id: str, dbt_project: DbtProject):
5353 for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
5454 ]
5555
56- assert len (samples ) == TEST_SAMPLE_ROW_COUNT
57- for sample in samples :
58- assert SENSITIVE_COLUMN not in sample
59- assert SAFE_COLUMN in sample
56+ assert len (samples ) == 1
57+ assert samples [0 ]["n_records" ] == 10
58+ assert len (samples [0 ]) == 1
6059
6160
6261@pytest .mark .skip_targets (["clickhouse" ])
6362def test_column_pii_sampling_disabled (test_id : str , dbt_project : DbtProject ):
6463 """Test that all columns are included when column-level PII protection is disabled"""
6564 data = [
66- {SENSITIVE_COLUMN : f "user{ i } @example.com" , SAFE_COLUMN : None } for i in range (10 )
65+ {
SENSITIVE_COLUMN :
"[email protected] " ,
SAFE_COLUMN :
None }
for i in range (
10 )
6766 ]
6867
6968 test_result = dbt_project .test (
7069 test_id ,
71- "not_null " ,
72- test_args = dict (column_name = SAFE_COLUMN ),
70+ "unique " ,
71+ test_args = dict (column_name = SENSITIVE_COLUMN ),
7372 data = data ,
7473 columns = [
7574 {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
76- {"name" : SAFE_COLUMN },
7775 ],
7876 test_vars = {
7977 "enable_elementary_test_materialization" : True ,
@@ -88,30 +86,31 @@ def test_column_pii_sampling_disabled(test_id: str, dbt_project: DbtProject):
8886 for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
8987 ]
9088
91- assert len (samples ) == TEST_SAMPLE_ROW_COUNT
89+ # sample should be {'unique_field': '[email protected] ', 'n_records': 10} 90+ assert len (samples ) == 1
9291 for sample in samples :
93- assert SENSITIVE_COLUMN in sample
94- assert SAFE_COLUMN in sample
92+ # The original column name is mapped to 'unique_field' in unique tests
93+ assert "unique_field" in sample
94+ assert "n_records" in sample
9595
9696
9797@pytest .mark .skip_targets (["clickhouse" ])
9898def test_column_pii_sampling_tags_exist_but_flag_disabled (
9999 test_id : str , dbt_project : DbtProject
100100):
101101 """Test that when PII tags exist but disable_samples_on_pii_tags is false, samples are collected normally"""
102- data = [
103- {SENSITIVE_COLUMN : f"user{ i } @example.com" , SAFE_COLUMN : None } for i in range (10 )
104- ]
102+ data = [{
SENSITIVE_COLUMN :
"[email protected] " ,
SAFE_COLUMN :
1 }
for i in range (
10 )]
105103
106104 test_result = dbt_project .test (
107105 test_id ,
108- "not_null " ,
106+ "unique " ,
109107 test_args = dict (column_name = SAFE_COLUMN ),
110108 data = data ,
111109 columns = [
112110 {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
113111 {"name" : SAFE_COLUMN },
114112 ],
113+ test_column = None ,
115114 test_vars = {
116115 "enable_elementary_test_materialization" : True ,
117116 "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
@@ -126,12 +125,12 @@ def test_column_pii_sampling_tags_exist_but_flag_disabled(
126125 for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
127126 ]
128127
129- assert len (samples ) == TEST_SAMPLE_ROW_COUNT
128+ # When flag is disabled, we get the full sample (not limited by PII filtering)
129+ assert len (samples ) == 1
130130 for sample in samples :
131- assert (
132- SENSITIVE_COLUMN in sample
133- ) # PII column should be included when flag is disabled
134- assert SAFE_COLUMN in sample
131+ # The original column name is mapped to 'unique_field' in unique tests
132+ assert "unique_field" in sample
133+ assert "n_records" in sample
135134
136135
137136@pytest .mark .skip_targets (["clickhouse" ])
@@ -150,6 +149,7 @@ def test_column_pii_sampling_all_columns_pii(test_id: str, dbt_project: DbtProje
150149 {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
151150 {"name" : SAFE_COLUMN , "config" : {"tags" : ["pii" ]}},
152151 ],
152+ test_column = None ,
153153 test_vars = {
154154 "enable_elementary_test_materialization" : True ,
155155 "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
@@ -164,9 +164,150 @@ def test_column_pii_sampling_all_columns_pii(test_id: str, dbt_project: DbtProje
164164 for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
165165 ]
166166
167+ # When all columns are PII, no samples should be collected
168+ assert len (samples ) == 0
169+
170+
171+ @pytest .mark .skip_targets (["clickhouse" ])
172+ def test_unique_test_column_mapping (test_id : str , dbt_project : DbtProject ):
173+ """Test that column mapping correctly maps unique test columns"""
174+ data = [{
SENSITIVE_COLUMN :
"[email protected] " ,
SAFE_COLUMN :
i }
for i in range (
10 )]
175+
176+ test_result = dbt_project .test (
177+ test_id ,
178+ "unique" ,
179+ test_args = dict (column_name = SENSITIVE_COLUMN ),
180+ data = data ,
181+ columns = [
182+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
183+ {"name" : SAFE_COLUMN },
184+ ],
185+ test_vars = {
186+ "enable_elementary_test_materialization" : True ,
187+ "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
188+ "disable_samples_on_pii_tags" : True ,
189+ "pii_tags" : ["pii" ],
190+ },
191+ )
192+ assert test_result ["status" ] == "fail"
193+
194+ samples = [
195+ json .loads (row ["result_row" ])
196+ for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
197+ ]
198+
199+ # Should only contain n_records, not unique_field (which contains PII)
200+ assert len (samples ) == 1
201+ assert "n_records" in samples [0 ]
202+ assert "unique_field" not in samples [0 ]
203+ assert len (samples [0 ]) == 1
204+
205+
206+ @pytest .mark .skip_targets (["clickhouse" ])
207+ def test_accepted_values_test_column_mapping (test_id : str , dbt_project : DbtProject ):
208+ """Test that column mapping correctly maps accepted_values test columns"""
209+ data = [{SENSITIVE_COLUMN : "invalid_value" , SAFE_COLUMN : i } for i in range (10 )]
210+
211+ test_result = dbt_project .test (
212+ test_id ,
213+ "accepted_values" ,
214+ test_args = dict (column_name = SENSITIVE_COLUMN , values = ["valid1" , "valid2" ]),
215+ data = data ,
216+ columns = [
217+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
218+ {"name" : SAFE_COLUMN },
219+ ],
220+ test_vars = {
221+ "enable_elementary_test_materialization" : True ,
222+ "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
223+ "disable_samples_on_pii_tags" : True ,
224+ "pii_tags" : ["pii" ],
225+ },
226+ )
227+ assert test_result ["status" ] == "fail"
228+
229+ samples = [
230+ json .loads (row ["result_row" ])
231+ for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
232+ ]
233+
234+ # Should only contain n_records, not value (which contains PII)
235+ assert len (samples ) == 1
236+ assert "n_records" in samples [0 ]
237+ assert "value" not in samples [0 ]
238+ assert len (samples [0 ]) == 1
239+
240+
241+ @pytest .mark .skip_targets (["clickhouse" ])
242+ def test_not_null_test_column_mapping (test_id : str , dbt_project : DbtProject ):
243+ """Test that column mapping correctly handles not_null test columns"""
244+ data = [{SENSITIVE_COLUMN : None , SAFE_COLUMN : i } for i in range (10 )]
245+
246+ test_result = dbt_project .test (
247+ test_id ,
248+ "not_null" ,
249+ test_args = dict (column_name = SENSITIVE_COLUMN ),
250+ data = data ,
251+ columns = [
252+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
253+ {"name" : SAFE_COLUMN },
254+ ],
255+ test_vars = {
256+ "enable_elementary_test_materialization" : True ,
257+ "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
258+ "disable_samples_on_pii_tags" : True ,
259+ "pii_tags" : ["pii" ],
260+ },
261+ )
262+ assert test_result ["status" ] == "fail"
263+
264+ samples = [
265+ json .loads (row ["result_row" ])
266+ for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
267+ ]
268+
269+ # Should only contain _no_non_excluded_columns when all columns are PII
167270 assert len (samples ) == TEST_SAMPLE_ROW_COUNT
168271 for sample in samples :
169272 assert "_no_non_excluded_columns" in sample
170- assert sample ["_no_non_excluded_columns" ] == 1
171273 assert SENSITIVE_COLUMN not in sample
172274 assert SAFE_COLUMN not in sample
275+
276+
277+ @pytest .mark .skip_targets (["clickhouse" ])
278+ def test_multiple_pii_columns_mapping (test_id : str , dbt_project : DbtProject ):
279+ """Test that column mapping handles multiple PII columns correctly"""
280+ data = [
281+ {
SENSITIVE_COLUMN :
"[email protected] " ,
"phone" :
"123-456-7890" ,
SAFE_COLUMN :
i }
282+ for i in range (10 )
283+ ]
284+
285+ test_result = dbt_project .test (
286+ test_id ,
287+ "unique" ,
288+ test_args = dict (column_name = SENSITIVE_COLUMN ),
289+ data = data ,
290+ columns = [
291+ {"name" : SENSITIVE_COLUMN , "config" : {"tags" : ["pii" ]}},
292+ {"name" : "phone" , "config" : {"tags" : ["pii" ]}},
293+ {"name" : SAFE_COLUMN },
294+ ],
295+ test_vars = {
296+ "enable_elementary_test_materialization" : True ,
297+ "test_sample_row_count" : TEST_SAMPLE_ROW_COUNT ,
298+ "disable_samples_on_pii_tags" : True ,
299+ },
300+ )
301+ assert test_result ["status" ] == "fail"
302+
303+ samples = [
304+ json .loads (row ["result_row" ])
305+ for row in dbt_project .run_query (SAMPLES_QUERY .format (test_id = test_id ))
306+ ]
307+
308+ # Should only contain n_records, not unique_field or phone (which contain PII)
309+ assert len (samples ) == 1
310+ assert "n_records" in samples [0 ]
311+ assert "unique_field" not in samples [0 ]
312+ assert "phone" not in samples [0 ]
313+ assert len (samples [0 ]) == 1
0 commit comments