Skip to content

Commit 5c293f3

Browse files
authored
Merge pull request #395 from python-jsonschema/rfc3339-validator
Built-in RFC3339 validator
2 parents 2bf5f7d + 02f6a8a commit 5c293f3

File tree

8 files changed

+297
-1
lines changed

8 files changed

+297
-1
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ Unreleased
1111
.. vendor-insert-here
1212
1313
- Update vendored schemas (2024-02-05)
14+
- Include built-in, efficient implementations of `date-time` format validation
15+
(RFC 3339) and `time` format validation (ISO 8601). This makes the `date-time`
16+
and `time` formats always available for validation. (:issue:`378`)
1417
- Support the use of `orjson` for faster JSON parsing when it is installed.
1518
This makes it an optional parser which is preferred over the default
1619
`json` module when it is available.

src/check_jsonschema/cli/main_command.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
7171
7272
'check-jsonschema' supports format checks with appropriate libraries installed,
7373
including the following formats by default:
74-
date, email, ipv4, ipv6, regex, uuid
74+
date, date-time, email, ipv4, ipv6, regex, uuid
7575
7676
\b
7777
For the "regex" format, there are multiple modes which can be specified with

src/check_jsonschema/formats.py renamed to src/check_jsonschema/formats/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import jsonschema.validators
1010
import regress
1111

12+
from .implementations import validate_rfc3339, validate_time
13+
1214
# all known format strings except for a selection from draft3 which have either
1315
# been renamed or removed:
1416
# - color
@@ -101,6 +103,8 @@ def make_format_checker(
101103
del checker.checkers["regex"]
102104
regex_impl = RegexImplementation(opts.regex_variant)
103105
checker.checks("regex")(regex_impl.check_format)
106+
checker.checks("date-time")(validate_rfc3339)
107+
checker.checks("time")(validate_time)
104108

105109
# remove the disabled checks, which may include the regex check
106110
for checkname in opts.disabled_formats:
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .iso8601_time import validate as validate_time
2+
from .rfc3339 import validate as validate_rfc3339
3+
4+
__all__ = ("validate_rfc3339", "validate_time")
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import re
2+
3+
TIME_REGEX = re.compile(
4+
r"""
5+
^
6+
(?:[01]\d|2[0123])
7+
:
8+
(?:[0-5]\d)
9+
:
10+
(?:[0-5]\d)
11+
# (optional) fractional seconds
12+
(?:(\.|,)\d+)?
13+
# UTC or offset
14+
(?:
15+
Z
16+
| z
17+
| [+-](?:[01]\d|2[0123]):[0-5]\d
18+
)
19+
$
20+
""",
21+
re.VERBOSE | re.ASCII,
22+
)
23+
24+
25+
def validate(time_str: object) -> bool:
26+
if not isinstance(time_str, str):
27+
return False
28+
return bool(TIME_REGEX.match(time_str))
29+
30+
31+
if __name__ == "__main__":
32+
import timeit
33+
34+
N = 100_000
35+
tests = (
36+
("basic", "23:59:59Z"),
37+
("long_fracsec", "23:59:59.8446519776713Z"),
38+
)
39+
40+
print("benchmarking")
41+
for name, val in tests:
42+
all_times = timeit.repeat(
43+
f"validate({val!r})", globals=globals(), repeat=3, number=N
44+
)
45+
print(f"{name} (valid={validate(val)}): {int(min(all_times) / N * 10**9)}ns")
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import re
2+
3+
# this regex is based on the one from the rfc3339-validator package
4+
# credit to the original author
5+
# original license:
6+
#
7+
# MIT License
8+
#
9+
# Copyright (c) 2019, Nicolas Aimetti
10+
#
11+
# Permission is hereby granted, free of charge, to any person obtaining a copy
12+
# of this software and associated documentation files (the "Software"), to deal
13+
# in the Software without restriction, including without limitation the rights
14+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15+
# copies of the Software, and to permit persons to whom the Software is
16+
# furnished to do so, subject to the following conditions:
17+
#
18+
# The above copyright notice and this permission notice shall be included in all
19+
# copies or substantial portions of the Software.
20+
#
21+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27+
# SOFTWARE.
28+
#
29+
# modifications have been made for additional corner cases and speed
30+
RFC3339_REGEX = re.compile(
31+
r"""
32+
^
33+
(?:\d{4})
34+
-
35+
(?:0[1-9]|1[0-2])
36+
-
37+
(?:\d{2})
38+
(?:T|t)
39+
(?:[01]\d|2[0123])
40+
:
41+
(?:[0-5]\d)
42+
:
43+
(?:[0-5]\d)
44+
# (optional) fractional seconds
45+
(?:(\.|,)\d+)?
46+
# UTC or offset
47+
(?:
48+
Z
49+
| z
50+
| [+-](?:[01]\d|2[0123]):[0-5]\d
51+
)
52+
$
53+
""",
54+
re.VERBOSE | re.ASCII,
55+
)
56+
57+
58+
def validate(date_str: object) -> bool:
59+
"""Validate a string as a RFC3339 date-time."""
60+
if not isinstance(date_str, str):
61+
return False
62+
if not RFC3339_REGEX.match(date_str):
63+
return False
64+
65+
year, month, day = int(date_str[:4]), int(date_str[5:7]), int(date_str[8:10])
66+
67+
if month in {4, 6, 9, 11}:
68+
max_day = 30
69+
elif month == 2:
70+
max_day = 29 if year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) else 28
71+
else:
72+
max_day = 31
73+
if not 1 <= day <= max_day:
74+
return False
75+
return True
76+
77+
78+
if __name__ == "__main__":
79+
import timeit
80+
81+
N = 100_000
82+
tests = (
83+
("long_fracsec", "2018-12-31T23:59:59.8446519776713Z"),
84+
("basic", "2018-12-31T23:59:59Z"),
85+
("in_february", "2018-02-12T23:59:59Z"),
86+
("in_february_invalid", "2018-02-29T23:59:59Z"),
87+
)
88+
89+
print("benchmarking")
90+
for name, val in tests:
91+
all_times = timeit.repeat(
92+
f"validate({val!r})", globals=globals(), repeat=3, number=N
93+
)
94+
print(f"{name} (valid={validate(val)}): {int(min(all_times) / N * 10**9)}ns")

tests/unit/formats/test_rfc3339.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import random
2+
3+
import pytest
4+
5+
from check_jsonschema.formats.implementations.rfc3339 import validate
6+
7+
8+
@pytest.mark.parametrize(
9+
"datestr",
10+
(
11+
"2018-12-31T23:59:59Z",
12+
"2018-12-31t23:59:59Z",
13+
"2018-12-31t23:59:59z",
14+
"2018-12-31T23:59:59+00:00",
15+
"2018-12-31T23:59:59-00:00",
16+
),
17+
)
18+
def test_simple_positive_cases(datestr):
19+
assert validate(datestr)
20+
21+
22+
@pytest.mark.parametrize(
23+
"datestr",
24+
(
25+
"2018-12-31T23:59:59",
26+
"2018-12-31T23:59:59+00:00Z",
27+
"2018-12-31 23:59:59",
28+
),
29+
)
30+
def test_simple_negative_case(datestr):
31+
assert not validate(datestr)
32+
33+
34+
@pytest.mark.parametrize("precision", list(range(20)))
35+
@pytest.mark.parametrize(
36+
"offsetstr",
37+
(
38+
"Z",
39+
"+00:00",
40+
"-00:00",
41+
"+23:59",
42+
),
43+
)
44+
def test_allows_fracsec(precision, offsetstr):
45+
fracsec = random.randint(0, 10**precision)
46+
assert validate(f"2018-12-31T23:59:59.{fracsec}{offsetstr}")
47+
48+
49+
@pytest.mark.parametrize(
50+
"datestr",
51+
(
52+
# no such month
53+
"2020-13-01T00:00:00Z",
54+
"2020-00-01T00:00:00Z",
55+
# no such day
56+
"2020-01-00T00:00:00Z",
57+
"2020-01-32T00:00:00Z",
58+
),
59+
)
60+
def test_basic_bounds_validated(datestr):
61+
assert not validate(datestr)
62+
63+
64+
@pytest.mark.parametrize(
65+
"month, maxday",
66+
(
67+
(1, 31),
68+
(3, 31),
69+
(4, 30),
70+
(5, 31),
71+
(6, 30),
72+
(7, 31),
73+
(8, 31),
74+
(9, 30),
75+
(10, 31),
76+
(11, 30),
77+
),
78+
)
79+
def test_day_bounds_by_month(month, maxday):
80+
good_date = f"2020-{month:02}-{maxday:02}T00:00:00Z"
81+
bad_date = f"2020-{month:02}-{maxday+1:02}T00:00:00Z"
82+
assert validate(good_date)
83+
assert not validate(bad_date)
84+
85+
86+
@pytest.mark.parametrize(
87+
"year, maxday",
88+
(
89+
(2018, 28),
90+
(2016, 29),
91+
(2400, 29),
92+
(2500, 28),
93+
),
94+
)
95+
def test_day_bounds_for_february(year, maxday):
96+
good_date = f"{year}-02-{maxday:02}T00:00:00Z"
97+
bad_date = f"{year}-02-{maxday+1:02}T00:00:00Z"
98+
assert validate(good_date)
99+
assert not validate(bad_date)

tests/unit/formats/test_time.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import random
2+
3+
import pytest
4+
5+
from check_jsonschema.formats.implementations.iso8601_time import validate
6+
7+
8+
@pytest.mark.parametrize(
9+
"timestr",
10+
(
11+
"12:34:56Z",
12+
"23:59:59z",
13+
"23:59:59+00:00",
14+
"01:59:59-00:00",
15+
),
16+
)
17+
def test_simple_positive_cases(timestr):
18+
assert validate(timestr)
19+
20+
21+
@pytest.mark.parametrize(
22+
"timestr",
23+
(
24+
"12:34:56",
25+
"23:59:60Z",
26+
"23:59:59+24:00",
27+
"01:59:59-00:60",
28+
"01:01:00:00:60",
29+
),
30+
)
31+
def test_simple_negative_cases(timestr):
32+
assert not validate(timestr)
33+
34+
35+
@pytest.mark.parametrize("precision", list(range(20)))
36+
@pytest.mark.parametrize(
37+
"offsetstr",
38+
(
39+
"Z",
40+
"+00:00",
41+
"-00:00",
42+
"+23:59",
43+
),
44+
)
45+
def test_allows_fracsec(precision, offsetstr):
46+
fracsec = random.randint(0, 10**precision)
47+
assert validate(f"23:59:59.{fracsec}{offsetstr}")

0 commit comments

Comments
 (0)