diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index ae2b02c0..442cdb77 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -21,6 +21,7 @@ from celpy import celtypes from protovalidate.internal import string_format +from protovalidate.internal.matches import cel_matches from protovalidate.internal.rules import MessageType, field_to_cel # See https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address @@ -1554,12 +1555,13 @@ def __peek(self, char: str) -> bool: def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]: - # TODO(#257): Fix types and add tests for StringFormat. # For now, ignoring the type. string_fmt = string_format.StringFormat(locale) # type: ignore return { # Missing standard functions "format": string_fmt.format, + # Overridden standard functions + "matches": cel_matches, # protovalidate specific functions "getField": cel_get_field, "isNan": cel_is_nan, diff --git a/protovalidate/internal/matches.py b/protovalidate/internal/matches.py new file mode 100644 index 00000000..72728582 --- /dev/null +++ b/protovalidate/internal/matches.py @@ -0,0 +1,66 @@ +# Copyright 2023-2025 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import celpy +from celpy import celtypes + +# Patterns that are supported in Python's re package and not in re2. +# RE2: https://github.com/google/re2/wiki/syntax +invalid_patterns = [ + r"\\[1-9]", # backreference + r"\\k<\w+>", # backreference + r"\(\?\=", # lookahead + r"\(\?\!", # negative lookahead + r"\(\?\<\=", # lookbehind + r"\(\?\<\!", # negative lookbehind + r"\\c[A-Z]", # control character + r"\\u[0-9a-fA-F]{4}", # UTF-16 code-unit + r"\\0(?!\d)", # NUL + r"\[\\b.*\]", # Backspace eg: [\b] +] + + +def cel_matches(text: celtypes.Value, pattern: celtypes.Value) -> celpy.Result: + """Return True if the given pattern matches text. False otherwise. + + CEL uses RE2 syntax which diverges from Python re in various ways. Ideally, we + would use the google-re2 package, which is an extra dep in celpy, but at press + time it does not provide a pre-built binary for the latest version of Python (3.13) + which means those using this version will run into many issues. + + Instead of foisting this issue on users, we instead mimic re2 syntax by failing + to compile the regex for patterns not compatible with re2. + """ + if not isinstance(text, celtypes.StringType): + msg = "invalid argument for text, expected string" + raise celpy.CELEvalError(msg) + if not isinstance(pattern, celtypes.StringType): + msg = "invalid argument for pattern, expected string" + raise celpy.CELEvalError(msg) + + # Simulate re2 by failing on any patterns not compatible with re2 syntax + for invalid_pattern in invalid_patterns: + r = re.search(invalid_pattern, pattern) + if r is not None: + msg = f"error evaluating pattern {pattern}, invalid RE2 syntax" + raise celpy.CELEvalError(msg) + + try: + m = re.search(pattern, text) + except re.error as ex: + return celpy.CELEvalError("match error", ex.__class__, ex.args) + + return celtypes.BoolType(m is not None) diff --git a/tests/matches_test.py b/tests/matches_test.py new file mode 100644 index 00000000..7e3a91aa --- /dev/null +++ b/tests/matches_test.py @@ -0,0 +1,44 @@ +# Copyright 2023-2025 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import celpy +from celpy import celtypes + +from protovalidate.internal import extra_func + +invalid_patterns = [ + r"\1", + r"\k", + r"Jack(?=Sprat)", + "Jack(?!Sprat)", + "(?<=Sprat)Jack", + "(?