File tree Expand file tree Collapse file tree 1 file changed +15
-3
lines changed
src/guardrails/checks/text Expand file tree Collapse file tree 1 file changed +15
-3
lines changed Original file line number Diff line number Diff line change @@ -73,9 +73,21 @@ def _compile_pattern(keywords: tuple[str, ...]) -> re.Pattern[str]:
7373 Returns:
7474 re.Pattern[str]: Compiled regex pattern to match any given keyword.
7575 """
76- escaped_keywords = tuple (re .escape (keyword ) for keyword in keywords )
77- # (?<!\w)/(?!\w) emulate Unicode-aware word boundaries (letters, digits, underscore).
78- pattern_text = r"(?<!\w)(?:" + "|" .join (escaped_keywords ) + r")(?!\w)"
76+ # Build individual patterns with conditional boundary assertions
77+ # Only apply (?<!\w) if keyword starts with word char, (?!\w) if it ends with word char
78+ patterns = []
79+ for keyword in keywords :
80+ escaped = re .escape (keyword )
81+ # Check first and last character of the original keyword for word character status
82+ starts_with_word_char = keyword and keyword [0 ].isalnum () or (keyword and keyword [0 ] == "_" )
83+ ends_with_word_char = keyword and keyword [- 1 ].isalnum () or (keyword and keyword [- 1 ] == "_" )
84+
85+ prefix = r"(?<!\w)" if starts_with_word_char else ""
86+ suffix = r"(?!\w)" if ends_with_word_char else ""
87+ patterns .append (f"{ prefix } { escaped } { suffix } " )
88+
89+ # (?<!\w) and (?!\w) emulate Unicode-aware word boundaries (letters, digits, underscore).
90+ pattern_text = "(?:" + "|" .join (patterns ) + ")"
7991
8092 return re .compile (pattern_text , re .IGNORECASE | re .UNICODE )
8193
You can’t perform that action at this time.
0 commit comments