Skip to content

Commit 4dd9232

Browse files
Implement matches override (#319)
This implements an override of the `matches` function in celpy, which allows us to perform quasi-re2 operations. Since google-re2 [does not yet support Python 3.13](google/re2#516), we cannot use it in protovalidate-python. So, as a good compromise until then, we are simulating re2 behavior much like protovalidate-es does by failing on syntax not allowed in re2. Note that protovalidate-es also parses regex flags at the beginning of regex patterns and applies them at the end. This is because ECMAScript regex does not support flags at the beginning. However, Python's re package _does_ support this. So really all this does for now is to simply fail on invalid re2 syntax. A future PR will add the ability for users to specify their own re2 engine via a config if they would like to override this behavior (which would not be a breaking change). --------- Signed-off-by: Sri Krishna <[email protected]> Co-authored-by: Sri Krishna <[email protected]>
1 parent e88b0da commit 4dd9232

File tree

3 files changed

+113
-1
lines changed

3 files changed

+113
-1
lines changed

protovalidate/internal/extra_func.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from celpy import celtypes
2222

2323
from protovalidate.internal import string_format
24+
from protovalidate.internal.matches import cel_matches
2425
from protovalidate.internal.rules import MessageType, field_to_cel
2526

2627
# See https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
@@ -1554,12 +1555,13 @@ def __peek(self, char: str) -> bool:
15541555

15551556

15561557
def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]:
1557-
# TODO(#257): Fix types and add tests for StringFormat.
15581558
# For now, ignoring the type.
15591559
string_fmt = string_format.StringFormat(locale) # type: ignore
15601560
return {
15611561
# Missing standard functions
15621562
"format": string_fmt.format,
1563+
# Overridden standard functions
1564+
"matches": cel_matches,
15631565
# protovalidate specific functions
15641566
"getField": cel_get_field,
15651567
"isNan": cel_is_nan,

protovalidate/internal/matches.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright 2023-2025 Buf Technologies, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import re
16+
17+
import celpy
18+
from celpy import celtypes
19+
20+
# Patterns that are supported in Python's re package and not in re2.
21+
# RE2: https://github.com/google/re2/wiki/syntax
22+
invalid_patterns = [
23+
r"\\[1-9]", # backreference
24+
r"\\k<\w+>", # backreference
25+
r"\(\?\=", # lookahead
26+
r"\(\?\!", # negative lookahead
27+
r"\(\?\<\=", # lookbehind
28+
r"\(\?\<\!", # negative lookbehind
29+
r"\\c[A-Z]", # control character
30+
r"\\u[0-9a-fA-F]{4}", # UTF-16 code-unit
31+
r"\\0(?!\d)", # NUL
32+
r"\[\\b.*\]", # Backspace eg: [\b]
33+
]
34+
35+
36+
def cel_matches(text: celtypes.Value, pattern: celtypes.Value) -> celpy.Result:
37+
"""Return True if the given pattern matches text. False otherwise.
38+
39+
CEL uses RE2 syntax which diverges from Python re in various ways. Ideally, we
40+
would use the google-re2 package, which is an extra dep in celpy, but at press
41+
time it does not provide a pre-built binary for the latest version of Python (3.13)
42+
which means those using this version will run into many issues.
43+
44+
Instead of foisting this issue on users, we instead mimic re2 syntax by failing
45+
to compile the regex for patterns not compatible with re2.
46+
"""
47+
if not isinstance(text, celtypes.StringType):
48+
msg = "invalid argument for text, expected string"
49+
raise celpy.CELEvalError(msg)
50+
if not isinstance(pattern, celtypes.StringType):
51+
msg = "invalid argument for pattern, expected string"
52+
raise celpy.CELEvalError(msg)
53+
54+
# Simulate re2 by failing on any patterns not compatible with re2 syntax
55+
for invalid_pattern in invalid_patterns:
56+
r = re.search(invalid_pattern, pattern)
57+
if r is not None:
58+
msg = f"error evaluating pattern {pattern}, invalid RE2 syntax"
59+
raise celpy.CELEvalError(msg)
60+
61+
try:
62+
m = re.search(pattern, text)
63+
except re.error as ex:
64+
return celpy.CELEvalError("match error", ex.__class__, ex.args)
65+
66+
return celtypes.BoolType(m is not None)

tests/matches_test.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright 2023-2025 Buf Technologies, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
import celpy
18+
from celpy import celtypes
19+
20+
from protovalidate.internal import extra_func
21+
22+
invalid_patterns = [
23+
r"\1",
24+
r"\k<name>",
25+
r"Jack(?=Sprat)",
26+
"Jack(?!Sprat)",
27+
"(?<=Sprat)Jack",
28+
"(?<!Sprat)Jack",
29+
r"\cM\cJ",
30+
r"\u0041",
31+
r"\0 \01 \0a \012",
32+
r"[\b]",
33+
]
34+
35+
36+
class TestMatches(unittest.TestCase):
37+
def test_invalid_re2_syntax(self):
38+
for pattern in invalid_patterns:
39+
cel_pattern = celtypes.StringType(pattern)
40+
try:
41+
extra_func.cel_matches(celtypes.StringType("test"), cel_pattern)
42+
self.fail(f"expected an error on pattern {cel_pattern}")
43+
except celpy.CELEvalError as e:
44+
self.assertEqual(str(e), f"error evaluating pattern {cel_pattern}, invalid RE2 syntax")

0 commit comments

Comments
 (0)