Skip to content

Commit 55914af

Browse files
authored
Add ability to specify custom regex matcher (#325)
This adds the ability to the `Config` object for specifying an optional custom matcher. If provided, it will be invoked instead of protovalidate-python's `matches` implementation.
1 parent 0c7db6c commit 55914af

File tree

13 files changed

+111
-34
lines changed

13 files changed

+111
-34
lines changed

gen/tests/example/v1/validations_pb2.py

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gen/tests/example/v1/validations_pb2.pyi

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proto/tests/example/v1/validations.proto

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ message MapKeys {
9393
message Embed {
9494
int64 val = 1 [(buf.validate.field).int64.gt = 0];
9595
}
96+
9697
message RepeatedEmbedSkip {
9798
repeated Embed val = 1 [(buf.validate.field).repeated.items.ignore = IGNORE_ALWAYS];
9899
}
100+
101+
message InvalidRESyntax {
102+
string value = 1 [(buf.validate.field).string.pattern = "^\\z"];
103+
}

protovalidate/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from collections.abc import Callable
1516
from dataclasses import dataclass
17+
from typing import Optional
1618

1719

1820
@dataclass
@@ -21,6 +23,10 @@ class Config:
2123
2224
Attributes:
2325
fail_fast (bool): If true, validation will stop after the first violation. Defaults to False.
26+
regex_matches_func: An optional regex matcher to use. If specified, this will be used to match
27+
on regex expressions instead of this library's `matches` logic.
2428
"""
2529

2630
fail_fast: bool = False
31+
32+
regex_matches_func: Optional[Callable[[str, str], bool]] = None

protovalidate/internal/extra_func.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@
1515
import math
1616
import re
1717
import typing
18+
from collections.abc import Callable
1819
from urllib import parse as urlparse
1920

2021
import celpy
2122
from celpy import celtypes
2223

24+
from protovalidate.config import Config
2325
from protovalidate.internal import string_format
24-
from protovalidate.internal.matches import cel_matches
26+
from protovalidate.internal.matches import matches as protovalidate_matches
2527
from protovalidate.internal.rules import MessageType, field_to_cel
2628

2729
# See https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
@@ -1554,14 +1556,31 @@ def __peek(self, char: str) -> bool:
15541556
return self._index < len(self._string) and self._string[self._index] == char
15551557

15561558

1557-
def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]:
1558-
# For now, ignoring the type.
1559-
string_fmt = string_format.StringFormat(locale) # type: ignore
1559+
def get_matches_func(matcher: typing.Optional[Callable[[str, str], bool]]):
1560+
if matcher is None:
1561+
matcher = protovalidate_matches
1562+
1563+
def cel_matches(text: celtypes.Value, pattern: celtypes.Value) -> celpy.Result:
1564+
if not isinstance(text, celtypes.StringType):
1565+
msg = "invalid argument for text, expected string"
1566+
raise celpy.CELEvalError(msg)
1567+
if not isinstance(pattern, celtypes.StringType):
1568+
msg = "invalid argument for pattern, expected string"
1569+
raise celpy.CELEvalError(msg)
1570+
1571+
b = matcher(text, pattern)
1572+
return celtypes.BoolType(b)
1573+
1574+
return cel_matches
1575+
1576+
1577+
def make_extra_funcs(config: Config) -> dict[str, celpy.CELFunction]:
1578+
string_fmt = string_format.StringFormat()
15601579
return {
15611580
# Missing standard functions
15621581
"format": string_fmt.format,
15631582
# Overridden standard functions
1564-
"matches": cel_matches,
1583+
"matches": get_matches_func(config.regex_matches_func),
15651584
# protovalidate specific functions
15661585
"getField": cel_get_field,
15671586
"isNan": cel_is_nan,
@@ -1575,6 +1594,3 @@ def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]:
15751594
"isHostAndPort": cel_is_host_and_port,
15761595
"unique": cel_unique,
15771596
}
1578-
1579-
1580-
EXTRA_FUNCS = make_extra_funcs("en_US")

protovalidate/internal/matches.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import re
1616

1717
import celpy
18-
from celpy import celtypes
1918

2019
# Patterns that are supported in Python's re package and not in re2.
2120
# RE2: https://github.com/google/re2/wiki/syntax
@@ -30,10 +29,11 @@
3029
r"\\u[0-9a-fA-F]{4}", # UTF-16 code-unit
3130
r"\\0(?!\d)", # NUL
3231
r"\[\\b.*\]", # Backspace eg: [\b]
32+
r"\\Z", # End of text (only lowercase z is supported in re2)
3333
]
3434

3535

36-
def cel_matches(text: celtypes.Value, pattern: celtypes.Value) -> celpy.Result:
36+
def matches(text: str, pattern: str) -> bool:
3737
"""Return True if the given pattern matches text. False otherwise.
3838
3939
CEL uses RE2 syntax which diverges from Python re in various ways. Ideally, we
@@ -43,14 +43,13 @@ def cel_matches(text: celtypes.Value, pattern: celtypes.Value) -> celpy.Result:
4343
4444
Instead of foisting this issue on users, we instead mimic re2 syntax by failing
4545
to compile the regex for patterns not compatible with re2.
46-
"""
47-
if not isinstance(text, celtypes.StringType):
48-
msg = "invalid argument for text, expected string"
49-
raise celpy.CELEvalError(msg)
50-
if not isinstance(pattern, celtypes.StringType):
51-
msg = "invalid argument for pattern, expected string"
52-
raise celpy.CELEvalError(msg)
5346
47+
Users can choose to override this behavior by providing their own custom matches
48+
function via the Config.
49+
50+
Raises:
51+
celpy.CELEvalError: If pattern contains invalid re2 syntax or if an re.error is raised during matching.
52+
"""
5453
# Simulate re2 by failing on any patterns not compatible with re2 syntax
5554
for invalid_pattern in invalid_patterns:
5655
r = re.search(invalid_pattern, pattern)
@@ -61,6 +60,7 @@ def cel_matches(text: celtypes.Value, pattern: celtypes.Value) -> celpy.Result:
6160
try:
6261
m = re.search(pattern, text)
6362
except re.error as ex:
64-
return celpy.CELEvalError("match error", ex.__class__, ex.args)
63+
msg = "match error"
64+
raise celpy.CELEvalError(msg, ex.__class__, ex.args) from ex
6565

66-
return celtypes.BoolType(m is not None)
66+
return m is not None

protovalidate/internal/rules.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import dataclasses
1616
import datetime
1717
import typing
18+
from collections.abc import Callable
1819

1920
import celpy
2021
from celpy import celtypes
@@ -44,7 +45,7 @@ def unwrap(msg: message.Message) -> celtypes.Value:
4445
return field_to_cel(msg, msg.DESCRIPTOR.fields_by_name["value"])
4546

4647

47-
_MSG_TYPE_URL_TO_CTOR: dict[str, typing.Callable[..., celtypes.Value]] = {
48+
_MSG_TYPE_URL_TO_CTOR: dict[str, Callable[..., celtypes.Value]] = {
4849
"google.protobuf.Duration": make_duration,
4950
"google.protobuf.Timestamp": make_timestamp,
5051
"google.protobuf.StringValue": unwrap,

protovalidate/internal/string_format.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@
2424
class StringFormat:
2525
"""An implementation of string.format() in CEL."""
2626

27-
def __init__(self, locale: str):
28-
self.locale = locale
27+
def __init__(self):
2928
self.fmt = None
3029

3130
def format(self, fmt: celtypes.Value, args: celtypes.Value) -> celpy.Result:

protovalidate/validator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ class Validator:
3939
_cfg: Config
4040

4141
def __init__(self, config=None):
42-
self._factory = _rules.RuleFactory(extra_func.EXTRA_FUNCS)
4342
self._cfg = config if config is not None else Config()
43+
funcs = extra_func.make_extra_funcs(self._cfg)
44+
self._factory = _rules.RuleFactory(funcs)
4445

4546
def validate(
4647
self,

tests/config_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ class TestConfig(unittest.TestCase):
2121
def test_defaults(self):
2222
cfg = Config()
2323
self.assertFalse(cfg.fail_fast)
24+
self.assertIsNone(cfg.regex_matches_func)

0 commit comments

Comments
 (0)