Skip to content

Commit 47bd8e3

Browse files
authored
Use ASCII characters for things like \w (#931)
1 parent 86f6528 commit 47bd8e3

File tree

3 files changed

+14
-3
lines changed

3 files changed

+14
-3
lines changed

src/rpdk/core/contract/resource_generator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import re
23
from collections.abc import Sequence
34

45
from hypothesis.strategies import (
@@ -247,7 +248,7 @@ def generate_string_strategy(schema):
247248
if "maxLength" in schema: # pragma: no cover
248249
LOG.warning("found maxLength used with pattern")
249250

250-
return from_regex(terminate_regex(regex))
251+
return from_regex(re.compile(terminate_regex(regex), re.ASCII))
251252

252253
if "pattern" in schema: # pragma: no cover
253254
LOG.warning("found pattern used with format")
@@ -257,4 +258,4 @@ def generate_string_strategy(schema):
257258
LOG.warning("found maxLength used with format")
258259

259260
regex = STRING_FORMATS[string_format]
260-
return from_regex(regex)
261+
return from_regex(re.compile(regex, re.ASCII))

src/rpdk/core/data_loaders.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,10 @@ def load_resource_spec(resource_spec_file): # pylint: disable=R # noqa: C901
249249
pattern,
250250
)
251251
try:
252-
re.compile(pattern)
252+
# http://json-schema.org/understanding-json-schema/reference/regular_expressions.html
253+
# ECMA-262 has \w, \W, \b, \B, \d, \D, \s and \S perform ASCII-only matching
254+
# instead of full Unicode matching. Unicode matching is the default in Python
255+
re.compile(pattern, re.ASCII)
253256
except re.error:
254257
LOG.warning("Could not validate regular expression: %s", pattern)
255258

tests/contract/test_resource_generator.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ def test_generate_string_strategy_regex():
6868
assert re.fullmatch(schema["pattern"], regex_strategy.example())
6969

7070

71+
def test_generate_string_strategy_ascii():
72+
schema = {"type": "string", "pattern": "^\\w{1,6}$"}
73+
strategy = ResourceGenerator(schema).generate_schema_strategy(schema)
74+
for _ in range(100):
75+
assert re.match("^[A-Za-z0-9_]{1,6}$", strategy.example())
76+
77+
7178
def test_generate_string_strategy_format():
7279
schema = {"type": "string", "format": "arn"}
7380
strategy = ResourceGenerator(schema).generate_schema_strategy(schema)

0 commit comments

Comments
 (0)