Skip to content

Commit c5b9f45

Browse files
Fix string/list handling in PII column detection macros
- Fix get_column_tags to properly handle string tags vs lists - Fix pii_tags processing to handle scalar strings correctly - Apply consistent string/list normalization to model tags - Addresses CodeRabbit comments r2250021209 and r2250021214 The core issue was that Jinja2's 'is iterable' test returns True for strings, causing scalar string tags like 'pii' to be treated as character arrays ['p','i','i'] instead of single-element lists ['pii']. Fixed by using explicit string checking with 'src is string' to properly distinguish between strings and actual lists/arrays. Co-Authored-By: Yosef Arbiv <[email protected]>
1 parent 89e49de commit c5b9f45

File tree

1 file changed

+27
-16
lines changed

1 file changed

+27
-16
lines changed

macros/edr/system/system_utils/get_pii_columns_from_parent_model.sql

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
{% macro get_column_tags(column_node) %}
2-
{# column -> tags #}
3-
{% set column_tags = column_node.get('tags', []) %}
2+
{% set _tags_sources = [
3+
column_node.get('tags', []),
4+
column_node.get('config', {}).get('tags', []),
5+
column_node.get('meta', {}).get('tags', []),
6+
] %}
47

5-
{# column -> config -> tags #}
6-
{% set config_dict = column_node.get('config', {}) %}
7-
{% set config_tags = config_dict.get('tags', []) %}
8-
9-
{# column -> meta -> tags #}
10-
{% set meta_dict = column_node.get('meta', {}) %}
11-
{% set meta_tags = meta_dict.get('tags', []) %}
8+
{% set all_column_tags = [] %}
9+
{% for src in _tags_sources %}
10+
{% set tags_list = src if src is iterable and not (src is string) else [src] %}
11+
{% do all_column_tags.extend(tags_list) %}
12+
{% endfor %}
1213

13-
{% set all_column_tags = config_tags + column_tags + meta_tags %}
14-
{% do return(all_column_tags | map('lower') | list) %}
14+
{% do return(all_column_tags | map('lower') | unique | list) %}
1515
{% endmacro %}
1616

1717
{% macro get_pii_columns_from_parent_model(flattened_test) %}
@@ -29,13 +29,24 @@
2929
{% endif %}
3030

3131
{% set raw_pii_tags = elementary.get_config_var('pii_tags') %}
32-
{% set pii_tags = (raw_pii_tags if raw_pii_tags is iterable else [raw_pii_tags]) | map('lower') | list %}
32+
{% if raw_pii_tags is string %}
33+
{% set pii_tags = [raw_pii_tags|lower] %}
34+
{% else %}
35+
{% set pii_tags = (raw_pii_tags or []) | map('lower') | list %}
36+
{% endif %}
3337

3438
{# Check if the model itself has PII tags - if so, all columns are considered PII #}
35-
{% set model_tags = parent_model.get('tags', []) %}
36-
{% set model_config_tags = parent_model.get('config', {}).get('tags', []) %}
37-
{% set model_meta_tags = parent_model.get('meta', {}).get('tags', []) %}
38-
{% set all_model_tags = (model_tags + model_config_tags + model_meta_tags) | map('lower') | list %}
39+
{% set _model_tags_sources = [
40+
parent_model.get('tags', []),
41+
parent_model.get('config', {}).get('tags', []),
42+
parent_model.get('meta', {}).get('tags', [])
43+
] %}
44+
{% set all_model_tags = [] %}
45+
{% for src in _model_tags_sources %}
46+
{% set tags_list = src if src is iterable and not (src is string) else [src] %}
47+
{% do all_model_tags.extend(tags_list) %}
48+
{% endfor %}
49+
{% set all_model_tags = all_model_tags | map('lower') | unique | list %}
3950

4051
{% for pii_tag in pii_tags %}
4152
{% if pii_tag in all_model_tags %}

0 commit comments

Comments
 (0)