Skip to content

Commit 89e49de

Browse files
Address Itamar's code review comments
- Rename is_pii_column.sql to get_pii_columns_from_parent_model.sql to match macro name - Extract get_column_tags helper macro for better code organization - Add model-level PII tag support - if model has PII tags, all columns are considered PII - Add clarifying comment about intentional over-censoring with * queries in CTEs - Keep flattened_test parameter in query_test_result_rows for future functionality Fixes all outstanding code review feedback from haritamar on PR #833 Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
1 parent 057a2cc commit 89e49de

File tree

3 files changed

+73
-47
lines changed

3 files changed

+73
-47
lines changed

macros/edr/materializations/test/test.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@
175175
{% set test_query_lower = test_query.lower() %}
176176

177177
{# Check if query uses * (select all columns) #}
178+
{# Note: This is intentionally conservative and may over-censor in cases like
179+
"SELECT * FROM other_table" in CTEs, but it's better to be safe with PII data #}
178180
{% if '*' in test_query_lower %}
179181
{% do return(true) %}
180182
{% endif %}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
{% macro get_column_tags(column_node) %}
2+
{# column -> tags #}
3+
{% set column_tags = column_node.get('tags', []) %}
4+
5+
{# column -> config -> tags #}
6+
{% set config_dict = column_node.get('config', {}) %}
7+
{% set config_tags = config_dict.get('tags', []) %}
8+
9+
{# column -> meta -> tags #}
10+
{% set meta_dict = column_node.get('meta', {}) %}
11+
{% set meta_tags = meta_dict.get('tags', []) %}
12+
13+
{% set all_column_tags = config_tags + column_tags + meta_tags %}
14+
{% do return(all_column_tags | map('lower') | list) %}
15+
{% endmacro %}
16+
17+
{% macro get_pii_columns_from_parent_model(flattened_test) %}
18+
{% set pii_columns = [] %}
19+
20+
{% if not elementary.get_config_var('disable_samples_on_pii_tags') %}
21+
{% do return(pii_columns) %}
22+
{% endif %}
23+
24+
{% set parent_model_unique_id = elementary.insensitive_get_dict_value(flattened_test, 'parent_model_unique_id') %}
25+
{% set parent_model = elementary.get_node(parent_model_unique_id) %}
26+
27+
{% if not parent_model %}
28+
{% do return(pii_columns) %}
29+
{% endif %}
30+
31+
{% set raw_pii_tags = elementary.get_config_var('pii_tags') %}
32+
{% set pii_tags = (raw_pii_tags if raw_pii_tags is iterable else [raw_pii_tags]) | map('lower') | list %}
33+
34+
{# Check if the model itself has PII tags - if so, all columns are considered PII #}
35+
{% set model_tags = parent_model.get('tags', []) %}
36+
{% set model_config_tags = parent_model.get('config', {}).get('tags', []) %}
37+
{% set model_meta_tags = parent_model.get('meta', {}).get('tags', []) %}
38+
{% set all_model_tags = (model_tags + model_config_tags + model_meta_tags) | map('lower') | list %}
39+
40+
{% for pii_tag in pii_tags %}
41+
{% if pii_tag in all_model_tags %}
42+
{# Model has PII tag, return all column names #}
43+
{% set column_nodes = parent_model.get("columns") %}
44+
{% if column_nodes %}
45+
{% for column_node in column_nodes.values() %}
46+
{% do pii_columns.append(column_node.get('name')) %}
47+
{% endfor %}
48+
{% endif %}
49+
{% do return(pii_columns) %}
50+
{% endif %}
51+
{% endfor %}
52+
53+
{# Model doesn't have PII tags, check individual columns #}
54+
{% set column_nodes = parent_model.get("columns") %}
55+
{% if not column_nodes %}
56+
{% do return(pii_columns) %}
57+
{% endif %}
58+
59+
{% for column_node in column_nodes.values() %}
60+
{% set all_column_tags_lower = elementary.get_column_tags(column_node) %}
61+
62+
{% for pii_tag in pii_tags %}
63+
{% if pii_tag in all_column_tags_lower %}
64+
{% do pii_columns.append(column_node.get('name')) %}
65+
{% break %}
66+
{% endif %}
67+
{% endfor %}
68+
{% endfor %}
69+
70+
{% do return(pii_columns) %}
71+
{% endmacro %}

macros/edr/system/system_utils/is_pii_column.sql

Lines changed: 0 additions & 47 deletions
This file was deleted.

0 commit comments

Comments
 (0)