Skip to content

Commit 38a8910

Browse files
authored
Fix: escape and truncate default query tag values (#1273)
<!-- Please review our pull request review process in CONTRIBUTING.md before your proceed. --> Resolves # <!--- Include the number of the issue addressed by this PR above if applicable. Example: resolves #1234 Please review our pull request review process in CONTRIBUTING.md before your proceed. --> ### Description Fix to ensure that default (i.e. auto) query tag values are not invalid - Escape special characters (FWIW, model names are more restrictive than query tag values but doesn't hurt to have extra layer of defense) - Truncate values greater than 128 characters Tested using a model with name `accepted_values_backhaulcontractschedule_site_category_3__Non_Served_Sites__New_Sites__Served_Sites__Non_HS_Sites__Served_Site__CHARACTERS_BEYOND_128` <img width="786" height="209" alt="Screenshot 2025-11-25 at 3 11 37 PM" src="https://github.com/user-attachments/assets/924fc881-020f-4637-832a-36140df6c400" /> ### Checklist - [ ] I have run this code in development and it appears to resolve the stated issue - [ ] This PR includes tests, or tests are not required/relevant for this PR - [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-databricks next" section.
1 parent bfcb5c7 commit 38a8910

File tree

3 files changed

+131
-27
lines changed

3 files changed

+131
-27
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
## dbt-databricks 1.11.3 (TBD)
22

3+
### Fixes
4+
5+
- Truncate (128 charactesr max) and escape special characters for default query tag values
6+
37
## dbt-databricks 1.11.2 (Nov 18, 2025)
48

59
### Fixes

dbt/adapters/databricks/utils.py

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,20 @@ def parse_query_tags(query_tags_str: Optional[str]) -> dict[str, str]:
142142
except json.JSONDecodeError as e:
143143
raise DbtValidationError(f"Invalid JSON in query_tags: {e}")
144144

145+
@staticmethod
146+
def escape_tag_value(key: str, value: str, source: str = "") -> str:
147+
"""Escape special characters in tag values (backslash, comma, colon)."""
148+
source_prefix = f"{source}: " if source else ""
149+
150+
if re.search(r"[\\,:]", value):
151+
logger.warning(
152+
f"{source_prefix}Query tag value for key '{key}' contains unescaped "
153+
f"character(s): {value}. Escaping..."
154+
)
155+
value = value.replace("\\", r"\\").replace(",", r"\,").replace(":", r"\:")
156+
157+
return value
158+
145159
@staticmethod
146160
def validate_query_tags(tags: dict[str, str], source: str = "") -> None:
147161
"""Validate query tags for reserved keys and limits."""
@@ -156,17 +170,11 @@ def validate_query_tags(tags: dict[str, str], source: str = "") -> None:
156170
f"Reserved keys are: {', '.join(sorted(QueryTagsUtils.RESERVED_KEYS))}"
157171
)
158172

159-
# Escape commas, colons, and backslashes in tag values
173+
# Escape values (modifies dict in place)
160174
for key in tags.keys():
161-
value = tags[key]
162-
if re.search(r"[\\,:]", value):
163-
logger.warning(
164-
f"{source_prefix}Query tag value for key '{key}' contains unescaped "
165-
f"character(s): {value}. Escaping..."
166-
)
167-
tags[key] = value.replace("\\", "\\\\").replace(",", "\\,").replace(":", "\\:")
175+
tags[key] = QueryTagsUtils.escape_tag_value(key, tags[key], source)
168176

169-
# Validate that no tag value exceeds 128 characters
177+
# Validate that no tag value exceeds 128 characters (after escaping)
170178
long_values = {k: v for k, v in tags.items() if len(v) > 128}
171179
if long_values:
172180
raise DbtValidationError(
@@ -181,6 +189,28 @@ def validate_query_tags(tags: dict[str, str], source: str = "") -> None:
181189
f"Maximum allowed is {QueryTagsUtils.MAX_TAGS}"
182190
)
183191

192+
@staticmethod
193+
def process_default_tags(tags: dict[str, str]) -> dict[str, str]:
194+
"""
195+
Process default tags: truncate long values, then escape special characters.
196+
197+
Note: We truncate BEFORE escaping to avoid cutting escape sequences in half,
198+
which would create invalid sequences that can't be deserialized.
199+
"""
200+
processed = {}
201+
for key, value in tags.items():
202+
if len(value) > 128:
203+
logger.debug(
204+
f"Default tags: Query tag value for key '{key}' exceeds 128 characters "
205+
f"({len(value)} chars). Truncating to 128 characters."
206+
)
207+
value = value[:128]
208+
209+
escaped_value = QueryTagsUtils.escape_tag_value(key, value, "Default tags")
210+
processed[key] = escaped_value
211+
212+
return processed
213+
184214
@staticmethod
185215
def merge_query_tags(
186216
connection_tags: dict[str, str],
@@ -191,23 +221,20 @@ def merge_query_tags(
191221
Merge query tags with precedence: model > connection > default.
192222
Validates that no reserved keys are used and tag limits are respected.
193223
"""
194-
# All sources are now already parsed dicts
195-
conn_tags = connection_tags
196-
model_tags_dict = model_tags
197-
default_tags_dict = default_tags
224+
# Process default tags (escape and truncate, don't validate reserved keys)
225+
processed_default_tags = QueryTagsUtils.process_default_tags(default_tags)
198226

199-
# Validate each source (user-provided tags cannot use reserved keys)
200-
QueryTagsUtils.validate_query_tags(conn_tags, "Connection config")
201-
QueryTagsUtils.validate_query_tags(model_tags_dict, "Model config")
227+
# Validate user-provided tags (cannot use reserved keys)
228+
QueryTagsUtils.validate_query_tags(connection_tags, "Connection config")
229+
QueryTagsUtils.validate_query_tags(model_tags, "Model config")
202230

203231
# Merge with precedence: model > connection > default
204232
merged = {}
205-
merged.update(default_tags_dict)
206-
merged.update(conn_tags)
207-
merged.update(model_tags_dict)
233+
merged.update(processed_default_tags)
234+
merged.update(connection_tags)
235+
merged.update(model_tags)
208236

209-
# Final validation of merged tags (only check total count, not reserved keys
210-
# since default tags are allowed to use reserved keys)
237+
# Final validation of merged tags (only check total count)
211238
if len(merged) > QueryTagsUtils.MAX_TAGS:
212239
raise DbtValidationError(
213240
f"Too many total query tags ({len(merged)}). "

tests/unit/test_query_tags.py

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,25 +88,25 @@ def test_validate_query_tags_escapes_comma(self):
8888
"""Test that commas in tag values are escaped."""
8989
tags = {"team": "marketing,sales"}
9090
QueryTagsUtils.validate_query_tags(tags)
91-
assert tags["team"] == "marketing\\,sales"
91+
assert tags["team"] == r"marketing\,sales"
9292

9393
def test_validate_query_tags_escapes_colon(self):
9494
"""Test that colons in tag values are escaped."""
9595
tags = {"description": "project:alpha"}
9696
QueryTagsUtils.validate_query_tags(tags)
97-
assert tags["description"] == "project\\:alpha"
97+
assert tags["description"] == r"project\:alpha"
9898

9999
def test_validate_query_tags_escapes_backslash(self):
100100
"""Test that backslashes in tag values are escaped."""
101-
tags = {"path": "folder\\subfolder"}
101+
tags = {"path": r"folder\subfolder"}
102102
QueryTagsUtils.validate_query_tags(tags)
103-
assert tags["path"] == "folder\\\\subfolder"
103+
assert tags["path"] == r"folder\\subfolder"
104104

105105
def test_validate_query_tags_escapes_multiple_special_chars(self):
106106
"""Test that multiple special characters are all escaped."""
107-
tags = {"complex": "value:with,comma\\and\\backslash"}
107+
tags = {"complex": r"value:with,comma\and\backslash"}
108108
QueryTagsUtils.validate_query_tags(tags)
109-
assert tags["complex"] == "value\\:with\\,comma\\\\and\\\\backslash"
109+
assert tags["complex"] == r"value\:with\,comma\\and\\backslash"
110110

111111
def test_validate_query_tags_multiple_values_too_long(self):
112112
tags = {
@@ -138,6 +138,79 @@ def test_validate_query_tags_value_after_escaping_too_long(self):
138138
with pytest.raises(DbtValidationError, match=expected_msg):
139139
QueryTagsUtils.validate_query_tags(tags)
140140

141+
def test_process_default_tags_escapes_special_chars(self):
142+
"""Test that process_default_tags escapes special characters."""
143+
tags = {
144+
"key1": "value:with:colons",
145+
"key2": "value,with,commas",
146+
"key3": r"value\with\backslashes",
147+
"key4": r"path\to:file,v1",
148+
"key5": r"a\b:c,d\e:f,g",
149+
"key6": r"start\,middle:,end",
150+
}
151+
result = QueryTagsUtils.process_default_tags(tags)
152+
153+
assert result["key1"] == r"value\:with\:colons"
154+
assert result["key2"] == r"value\,with\,commas"
155+
assert result["key3"] == r"value\\with\\backslashes"
156+
assert result["key4"] == r"path\\to\:file\,v1"
157+
assert result["key5"] == r"a\\b\:c\,d\\e\:f\,g"
158+
assert result["key6"] == r"start\\\,middle\:\,end"
159+
160+
def test_process_default_tags_truncates_long_values(self):
161+
"""Test that process_default_tags truncates values exceeding 128 characters."""
162+
long_value = "x" * 150
163+
tags = {"long_key": long_value}
164+
165+
result = QueryTagsUtils.process_default_tags(tags)
166+
167+
# Should be truncated to 128 characters
168+
assert len(result["long_key"]) == 128
169+
assert result["long_key"] == "x" * 128
170+
171+
def test_process_default_tags_truncates_before_escaping(self):
172+
"""Test that truncation happens before escaping to avoid cutting escape sequences."""
173+
# Create a value longer than 128 chars that contains special characters
174+
# 126 x's + 3 colons = 129 chars (exceeds limit)
175+
value = "x" * 126 + ":::"
176+
tags = {"key": value}
177+
178+
result = QueryTagsUtils.process_default_tags(tags)
179+
180+
# Should truncate to 128 first (removing 1 char): "xxx...xxx::"
181+
# Then escape the remaining colons: "xxx...xxx\:\:"
182+
# Result: 126 x's + 4 chars from escaped colons = 130 chars (longer than 128, but safe)
183+
assert len(result["key"]) == 130
184+
assert result["key"] == ("x" * 126 + r"\:\:")
185+
186+
def test_process_default_tags_truncation_avoids_broken_escapes(self):
187+
"""Test that truncating before escaping avoids creating invalid escape sequences."""
188+
# If we truncated after escaping, we could cut "value\," to "value\"
189+
# which would be an invalid/incomplete escape sequence
190+
value = "x" * 127 + "," # 128 chars exactly
191+
tags = {"key": value}
192+
193+
result = QueryTagsUtils.process_default_tags(tags)
194+
195+
# Should keep all 128 chars, then escape the comma: 127 x's + r"\," (2 chars) = 129 chars
196+
assert len(result["key"]) == 129
197+
assert result["key"] == ("x" * 127 + r"\,")
198+
199+
def test_process_default_tags_allows_reserved_keys(self):
200+
"""Test that process_default_tags allows reserved keys (unlike validate_query_tags)."""
201+
tags = {
202+
"@@dbt_model_name": "test_model",
203+
"@@dbt_core_version": "1.5.0",
204+
"custom_tag": "value",
205+
}
206+
207+
# Should not raise error even with reserved keys
208+
result = QueryTagsUtils.process_default_tags(tags)
209+
210+
assert result["@@dbt_model_name"] == "test_model"
211+
assert result["@@dbt_core_version"] == "1.5.0"
212+
assert result["custom_tag"] == "value"
213+
141214
def test_merge_query_tags_precedence(self):
142215
"""Test that model tags override connection tags."""
143216
connection_tags = {"team": "marketing", "cost_center": "3000"}

0 commit comments

Comments
 (0)