Skip to content

Commit d481311

Browse files
Address GitHub feedback: rename global_tags to column_tags and add test for disabled flag
- Rename global_tags variable to column_tags in is_pii_column.sql for clarity - Add test_column_pii_sampling_tags_exist_but_flag_disabled test case - Verify that when PII tags exist but disable_samples_on_pii_columns=false, samples are collected normally including PII columns Addresses GitHub comments from arbiv on PR #833 Co-Authored-By: Yosef Arbiv <[email protected]>
1 parent 6014bac commit d481311

File tree

2 files changed

+42
-2
lines changed

2 files changed

+42
-2
lines changed

integration_tests/tests/test_column_pii_sampling.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,46 @@ def test_column_pii_sampling_disabled(test_id: str, dbt_project: DbtProject):
9494
assert SAFE_COLUMN in sample
9595

9696

97+
@pytest.mark.skip_targets(["clickhouse"])
98+
def test_column_pii_sampling_tags_exist_but_flag_disabled(
99+
test_id: str, dbt_project: DbtProject
100+
):
101+
"""Test that when PII tags exist but disable_samples_on_pii_columns is false, samples are collected normally"""
102+
data = [
103+
{SENSITIVE_COLUMN: f"user{i}@example.com", SAFE_COLUMN: None} for i in range(10)
104+
]
105+
106+
test_result = dbt_project.test(
107+
test_id,
108+
"not_null",
109+
test_args=dict(column_name=SAFE_COLUMN),
110+
data=data,
111+
columns=[
112+
{"name": SENSITIVE_COLUMN, "config": {"tags": ["pii"]}},
113+
{"name": SAFE_COLUMN},
114+
],
115+
test_vars={
116+
"enable_elementary_test_materialization": True,
117+
"test_sample_row_count": TEST_SAMPLE_ROW_COUNT,
118+
"disable_samples_on_pii_columns": False, # Flag is disabled
119+
"pii_column_tags": ["pii"],
120+
},
121+
)
122+
assert test_result["status"] == "fail"
123+
124+
samples = [
125+
json.loads(row["result_row"])
126+
for row in dbt_project.run_query(SAMPLES_QUERY.format(test_id=test_id))
127+
]
128+
129+
assert len(samples) == TEST_SAMPLE_ROW_COUNT
130+
for sample in samples:
131+
assert (
132+
SENSITIVE_COLUMN in sample
133+
) # PII column should be included when flag is disabled
134+
assert SAFE_COLUMN in sample
135+
136+
97137
@pytest.mark.skip_targets(["clickhouse"])
98138
def test_column_pii_sampling_all_columns_pii(test_id: str, dbt_project: DbtProject):
99139
"""Test behavior when all columns are tagged as PII"""

macros/edr/system/system_utils/is_pii_column.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@
2525
{% for column_node in column_nodes.values() %}
2626
{% set config_dict = column_node.get('config', {}) %}
2727
{% set config_tags = config_dict.get('tags', []) %}
28-
{% set global_tags = column_node.get('tags', []) %}
28+
{% set column_tags = column_node.get('tags', []) %}
2929
{% set meta_dict = column_node.get('meta', {}) %}
3030
{% set meta_tags = meta_dict.get('tags', []) %}
31-
{% set all_column_tags = config_tags + global_tags + meta_tags %}
31+
{% set all_column_tags = config_tags + column_tags + meta_tags %}
3232

3333
{% for pii_tag in pii_column_tags %}
3434
{% if pii_tag in all_column_tags %}

0 commit comments

Comments
 (0)