Skip to content

Commit 25c56ff

Browse files
authored
Merge pull request #306 from python-jsonschema/regress-regexes
Use regress and remove 'disabled' regex format
2 parents 6bdc483 + 9e7023e commit 25c56ff

File tree

7 files changed

+61
-65
lines changed

7 files changed

+61
-65
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ Unreleased
99
----------
1010

1111
.. vendor-insert-here
12+
- The regex format check has been improved to support ECMAScript regexes by
13+
default. (:issue:`302`)
14+
- The ``--format-regex disabled`` option has been removed. Users should use
15+
``--disable-formats regex`` if they wish to disable regex format checking.
1216

1317
0.25.0
1418
------
@@ -71,7 +75,7 @@ Unreleased
7175
- A new option, ``--disable-formats`` replaces and enhances the
7276
``--disable-format`` flag. ``--disable-formats`` takes a format to disable
7377
and may be passed multiple times, allowing users to opt out of any specific
74-
format checks. ``--disable-format "*"`` can be used to disable all format
78+
format checks. ``--disable-formats "*"`` can be used to disable all format
7579
checking. ``--disable-format`` is still supported, but is deprecated and
7680
emits a warning.
7781

docs/usage.rst

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,13 +198,10 @@ follows:
198198

199199
* - mode
200200
- description
201-
* - disabled
202-
- Skip checking ``regex``, but leave other formats enabled.
203201
* - default
204-
- Check for known non-python regex syntaxes. If one is found, the expression
205-
always passes. Otherwise, check validity in the python engine.
202+
- Require the regex to be valid in ECMAScript regex syntax.
206203
* - python
207-
- Require the regex to be valid in python regex syntax.
204+
- Require the regex to be valid in Python regex syntax.
208205

209206
Other Options
210207
--------------

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ install_requires =
2121
importlib-resources>=1.4.0;python_version<"3.9"
2222
ruamel.yaml==0.17.32
2323
jsonschema>=4.18.0,<5.0
24+
regress>=0.4.0
2425
requests<3.0
2526
click>=8,<9
2627
package_dir=

src/check_jsonschema/cli/main_command.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from ..catalog import CUSTOM_SCHEMA_NAMES, SCHEMA_CATALOG
99
from ..checker import SchemaChecker
10-
from ..formats import KNOWN_FORMATS, RegexFormatBehavior
10+
from ..formats import KNOWN_FORMATS, RegexVariantName
1111
from ..instance_loader import InstanceLoader
1212
from ..parsers import SUPPORTED_FILE_FORMATS
1313
from ..reporter import REPORTER_BY_NAME, Reporter
@@ -69,8 +69,7 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
6969
\b
7070
For the "regex" format, there are multiple modes which can be specified with
7171
'--format-regex':
72-
default | best effort check
73-
disabled | do not check the regex format
72+
default | check that the string is a valid ECMAScript regex
7473
python | check that the string is a valid python regex
7574
7675
\b
@@ -153,8 +152,8 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
153152
"Set the mode of format validation for regexes. "
154153
"If `--disable-formats regex` is used, this option has no effect."
155154
),
156-
default=RegexFormatBehavior.default.value,
157-
type=click.Choice([x.value for x in RegexFormatBehavior], case_sensitive=False),
155+
default=RegexVariantName.default.value,
156+
type=click.Choice([x.value for x in RegexVariantName], case_sensitive=False),
158157
)
159158
@click.option(
160159
"--default-filetype",
@@ -249,7 +248,7 @@ def main(
249248
args.disable_all_formats = True
250249
else:
251250
args.disable_formats = normalized_disable_formats
252-
args.format_regex = RegexFormatBehavior(format_regex)
251+
args.format_regex = RegexVariantName(format_regex)
253252
args.disable_cache = no_cache
254253
args.default_filetype = default_filetype
255254
args.fill_defaults = fill_defaults

src/check_jsonschema/cli/parse_result.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import click
66

7-
from ..formats import FormatOptions, RegexFormatBehavior
7+
from ..formats import FormatOptions, RegexVariantName
88
from ..transforms import Transform
99

1010

@@ -33,7 +33,7 @@ def __init__(self) -> None:
3333
# regex format options
3434
self.disable_all_formats: bool = False
3535
self.disable_formats: tuple[str, ...] = ()
36-
self.format_regex: RegexFormatBehavior = RegexFormatBehavior.default
36+
self.format_regex: RegexVariantName = RegexVariantName.default
3737
# error and output controls
3838
self.verbosity: int = 1
3939
self.traceback_mode: str = "short"
@@ -69,6 +69,6 @@ def set_schema(
6969
def format_opts(self) -> FormatOptions:
7070
return FormatOptions(
7171
enabled=not self.disable_all_formats,
72-
regex_behavior=self.format_regex,
72+
regex_variant=self.format_regex,
7373
disabled_formats=self.disable_formats,
7474
)

src/check_jsonschema/formats.py

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import jsonschema
99
import jsonschema.validators
10+
import regress
1011

1112
# all known format strings except for a selection from draft3 which have either
1213
# been renamed or removed:
@@ -36,41 +37,43 @@
3637
)
3738

3839

39-
def _regex_check(instance: t.Any) -> bool:
40-
if not isinstance(instance, str):
41-
return True
42-
re.compile(instance)
43-
return True
40+
class RegexVariantName(enum.Enum):
41+
default = "default"
42+
python = "python"
4443

4544

46-
def _gated_regex_check(instance: t.Any) -> bool:
47-
if not isinstance(instance, str):
48-
return True
49-
if re.search(r"\(\?[^!=]", instance):
50-
return True
51-
re.compile(instance)
52-
return True
45+
class RegexImplementation:
46+
def __init__(self, variant: RegexVariantName) -> None:
47+
self.variant = variant
5348

49+
def check_format(self, instance: t.Any) -> bool:
50+
if not isinstance(instance, str):
51+
return True
5452

55-
class RegexFormatBehavior(enum.Enum):
56-
default = "default"
57-
disabled = "disabled"
58-
python = "python"
53+
try:
54+
if self.variant == RegexVariantName.default:
55+
regress.Regex(instance)
56+
else:
57+
re.compile(instance)
58+
# something is wrong with RegressError getting into the published types
59+
# needs investigation... for now, ignore the error
60+
except (regress.RegressError, re.error): # type: ignore[attr-defined]
61+
return False
62+
63+
return True
5964

6065

6166
class FormatOptions:
6267
def __init__(
6368
self,
6469
*,
6570
enabled: bool = True,
66-
regex_behavior: RegexFormatBehavior = RegexFormatBehavior.default,
71+
regex_variant: RegexVariantName = RegexVariantName.default,
6772
disabled_formats: tuple[str, ...] = (),
6873
) -> None:
6974
self.enabled = enabled
70-
self.regex_behavior = regex_behavior
75+
self.regex_variant = regex_variant
7176
self.disabled_formats = disabled_formats
72-
if "regex" in self.disabled_formats:
73-
self.regex_behavior = RegexFormatBehavior.disabled
7477

7578

7679
def get_base_format_checker(schema_dialect: str | None) -> jsonschema.FormatChecker:
@@ -94,22 +97,15 @@ def make_format_checker(
9497
base_checker = get_base_format_checker(schema_dialect)
9598
checker = copy.deepcopy(base_checker)
9699

97-
# remove the regex check -- it will be re-added if it is enabled
100+
# replace the regex check
98101
del checker.checkers["regex"]
102+
regex_impl = RegexImplementation(opts.regex_variant)
103+
checker.checks("regex")(regex_impl.check_format)
99104

100-
# remove the disabled checks
105+
# remove the disabled checks, which may include the regex check
101106
for checkname in opts.disabled_formats:
102107
if checkname not in checker.checkers:
103108
continue
104109
del checker.checkers[checkname]
105110

106-
if opts.regex_behavior == RegexFormatBehavior.disabled:
107-
pass
108-
elif opts.regex_behavior == RegexFormatBehavior.default:
109-
checker.checks("regex", raises=re.error)(_gated_regex_check)
110-
elif opts.regex_behavior == RegexFormatBehavior.python:
111-
checker.checks("regex", raises=re.error)(_regex_check)
112-
else: # pragma: no cover
113-
raise NotImplementedError
114-
115111
return checker

tests/acceptance/test_format_regex_opts.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# test on a JavaScript regex which is not a valid python regex
2-
# `--format-regex=disabled` should skip
32
# `--format-regex=default` should accept it
43
# `--format-regex=python` should reject it
54
#
@@ -39,24 +38,28 @@
3938
}
4039

4140

42-
@pytest.fixture(params=["disabled", "default", "python"])
43-
def regexopt(request):
41+
@pytest.fixture(
42+
params=[
43+
("--disable-formats", "regex"),
44+
("--format-regex", "default"),
45+
("--format-regex", "python"),
46+
]
47+
)
48+
def regexopts(request):
4449
return request.param
4550

4651

47-
def test_regex_format_good(run_line_simple, tmp_path, regexopt):
52+
def test_regex_format_good(run_line_simple, tmp_path, regexopts):
4853
schemafile = tmp_path / "schema.json"
4954
schemafile.write_text(json.dumps(FORMAT_SCHEMA))
5055

5156
doc = tmp_path / "doc.json"
5257
doc.write_text(json.dumps(ALWAYS_PASSING_DOCUMENT))
5358

54-
run_line_simple(
55-
["--format-regex", regexopt, "--schemafile", str(schemafile), str(doc)]
56-
)
59+
run_line_simple([*regexopts, "--schemafile", str(schemafile), str(doc)])
5760

5861

59-
def test_regex_format_accepts_non_str_inputs(run_line_simple, tmp_path, regexopt):
62+
def test_regex_format_accepts_non_str_inputs(run_line_simple, tmp_path, regexopts):
6063
# potentially confusing, but a format checker is allowed to check non-str instances
6164
# validate the format checker behavior on such a case
6265
schemafile = tmp_path / "schema.json"
@@ -70,25 +73,22 @@ def test_regex_format_accepts_non_str_inputs(run_line_simple, tmp_path, regexopt
7073
)
7174
doc = tmp_path / "doc.json"
7275
doc.write_text(json.dumps({"pattern": 0}))
73-
run_line_simple(
74-
["--format-regex", regexopt, "--schemafile", str(schemafile), str(doc)]
75-
)
76+
run_line_simple([*regexopts, "--schemafile", str(schemafile), str(doc)])
7677

7778

78-
def test_regex_format_bad(run_line, tmp_path, regexopt):
79+
def test_regex_format_bad(run_line, tmp_path, regexopts):
7980
schemafile = tmp_path / "schema.json"
8081
schemafile.write_text(json.dumps(FORMAT_SCHEMA))
8182

8283
doc = tmp_path / "doc.json"
8384
doc.write_text(json.dumps(ALWAYS_FAILING_DOCUMENT))
8485

85-
expect_ok = regexopt == "disabled"
86+
expect_ok = regexopts == ("--disable-formats", "regex")
8687

8788
res = run_line(
8889
[
8990
"check-jsonschema",
90-
"--format-regex",
91-
regexopt,
91+
*regexopts,
9292
"--schemafile",
9393
str(schemafile),
9494
str(doc),
@@ -101,20 +101,19 @@ def test_regex_format_bad(run_line, tmp_path, regexopt):
101101
assert "is not a 'regex'" in res.stdout
102102

103103

104-
def test_regex_format_js_specific(run_line, tmp_path, regexopt):
104+
def test_regex_format_js_specific(run_line, tmp_path, regexopts):
105105
schemafile = tmp_path / "schema.json"
106106
schemafile.write_text(json.dumps(FORMAT_SCHEMA))
107107

108108
doc = tmp_path / "doc.json"
109109
doc.write_text(json.dumps(JS_REGEX_DOCUMENT))
110110

111-
expect_ok = regexopt in ("disabled", "default")
111+
expect_ok = regexopts != ("--format-regex", "python")
112112

113113
res = run_line(
114114
[
115115
"check-jsonschema",
116-
"--format-regex",
117-
regexopt,
116+
*regexopts,
118117
"--schemafile",
119118
str(schemafile),
120119
str(doc),

0 commit comments

Comments
 (0)