Skip to content

Commit e5dd858

Browse files
committed
Handle non-word letter endings or startings
1 parent fca08ae commit e5dd858

File tree

1 file changed

+15
-3
lines changed

1 file changed

+15
-3
lines changed

src/guardrails/checks/text/keywords.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,21 @@ def _compile_pattern(keywords: tuple[str, ...]) -> re.Pattern[str]:
7373
Returns:
7474
re.Pattern[str]: Compiled regex pattern to match any given keyword.
7575
"""
76-
escaped_keywords = tuple(re.escape(keyword) for keyword in keywords)
77-
# (?<!\w)/(?!\w) emulate Unicode-aware word boundaries (letters, digits, underscore).
78-
pattern_text = r"(?<!\w)(?:" + "|".join(escaped_keywords) + r")(?!\w)"
76+
# Build individual patterns with conditional boundary assertions
77+
# Only apply (?<!\w) if keyword starts with word char, (?!\w) if it ends with word char
78+
patterns = []
79+
for keyword in keywords:
80+
escaped = re.escape(keyword)
81+
# Check first and last character of the original keyword for word character status
82+
starts_with_word_char = keyword and keyword[0].isalnum() or (keyword and keyword[0] == "_")
83+
ends_with_word_char = keyword and keyword[-1].isalnum() or (keyword and keyword[-1] == "_")
84+
85+
prefix = r"(?<!\w)" if starts_with_word_char else ""
86+
suffix = r"(?!\w)" if ends_with_word_char else ""
87+
patterns.append(f"{prefix}{escaped}{suffix}")
88+
89+
# (?<!\w) and (?!\w) emulate Unicode-aware word boundaries (letters, digits, underscore).
90+
pattern_text = "(?:" + "|".join(patterns) + ")"
7991

8092
return re.compile(pattern_text, re.IGNORECASE | re.UNICODE)
8193

0 commit comments

Comments
 (0)