Skip to content

Commit 61e0cdf

Browse files
committed
Option to generate custom string formats
1 parent c00397c commit 61e0cdf

File tree

5 files changed

+164
-34
lines changed

5 files changed

+164
-34
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
#### 0.17.0 - 2020-07-16
4+
- Adds a `custom_formats` keyword argument to `from_schema()`, so that you can
5+
specify a strategy to generate strings for custom formats like credit card numbers.
6+
Thanks to Dmitry Dygalo, whose [sponsorship](https://github.com/sponsors/Zac-HD)
7+
motivated me to add the feature!
8+
39
#### 0.16.2 - 2020-07-12
410
- Substantial performance gains for some schemas, via improved handling of the
511
`contains`, `not`, `anyOf`, and `if/then/else` keywords

README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,24 @@ which takes a JSON schema and returns a strategy for allowed JSON objects.
1414
from hypothesis import given
1515
from hypothesis_jsonschema import from_schema
1616

17+
1718
@given(from_schema(
1819
{"type": "integer", "minimum": 1, "exclusiveMaximum": 10}
1920
))
20-
def test(value):
21+
def test_integers(value):
2122
assert isinstance(value, int)
2223
assert 1 <= value < 10
24+
25+
26+
@given(from_schema(
27+
{"type": "string", "format": "card"},
28+
# Standard formats work out of the box. Custom formats are ignored
29+
# by default, but you can pass custom strategies for them - e.g.
30+
custom_formats={"card": st.sampled_from(EXAMPLE_CARD_NUMBERS)}
31+
))
32+
def test_card_numbers(value):
33+
assert isinstance(value, str)
34+
assert re.match(r"^\d{4} \d{4} \d{4} \d{4}$", value)
2335
```
2436

2537
For more details on property-based testing and how to use or customise

src/hypothesis_jsonschema/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
The only public API is `from_schema`; check the docstring for details.
44
"""
55

6-
__version__ = "0.16.2"
6+
__version__ = "0.17.0"
77
__all__ = ["from_schema"]
88

99
from ._from_schema import from_schema

src/hypothesis_jsonschema/_from_schema.py

Lines changed: 100 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import operator
66
import re
77
from fractions import Fraction
8+
from functools import partial
89
from typing import Any, Callable, Dict, List, NoReturn, Optional, Set, Union
910

1011
import jsonschema
@@ -38,12 +39,15 @@
3839
lambda strategy: st.lists(strategy, max_size=3)
3940
| st.dictionaries(st.text(), strategy, max_size=3),
4041
)
42+
_FORMATS_TOKEN = object()
4143

4244

43-
def merged_as_strategies(schemas: List[Schema]) -> st.SearchStrategy[JSONType]:
45+
def merged_as_strategies(
46+
schemas: List[Schema], custom_formats: Optional[Dict[str, st.SearchStrategy[str]]]
47+
) -> st.SearchStrategy[JSONType]:
4448
assert schemas, "internal error: must pass at least one schema to merge"
4549
if len(schemas) == 1:
46-
return from_schema(schemas[0])
50+
return from_schema(schemas[0], custom_formats=custom_formats)
4751
# Try to merge combinations of strategies.
4852
strats = []
4953
combined: Set[str] = set()
@@ -57,22 +61,26 @@ def merged_as_strategies(schemas: List[Schema]) -> st.SearchStrategy[JSONType]:
5761
if s is not None and s != FALSEY:
5862
validators = [make_validator(s) for s in schemas]
5963
strats.append(
60-
from_schema(s).filter(
64+
from_schema(s, custom_formats=custom_formats).filter(
6165
lambda obj: all(v.is_valid(obj) for v in validators)
6266
)
6367
)
6468
combined.update(group)
6569
return st.one_of(strats)
6670

6771

68-
def from_schema(schema: Union[bool, Schema]) -> st.SearchStrategy[JSONType]:
72+
def from_schema(
73+
schema: Union[bool, Schema],
74+
*,
75+
custom_formats: Dict[str, st.SearchStrategy[str]] = None,
76+
) -> st.SearchStrategy[JSONType]:
6977
"""Take a JSON schema and return a strategy for allowed JSON objects.
7078
7179
Schema reuse with "definitions" and "$ref" is not yet supported, but
7280
everything else in drafts 04, 05, and 07 is fully tested and working.
7381
"""
7482
try:
75-
return __from_schema(schema)
83+
return __from_schema(schema, custom_formats=custom_formats)
7684
except Exception as err:
7785
error = err
7886

@@ -82,13 +90,58 @@ def error_raiser() -> NoReturn:
8290
return st.builds(error_raiser)
8391

8492

85-
def __from_schema(schema: Union[bool, Schema]) -> st.SearchStrategy[JSONType]:
93+
def _get_format_filter(
94+
format_name: str,
95+
checker: jsonschema.FormatChecker,
96+
strategy: st.SearchStrategy[str],
97+
) -> st.SearchStrategy[str]:
98+
def check_valid(string: str) -> str:
99+
try:
100+
assert isinstance(string, str)
101+
checker.check(string, format=format_name)
102+
except (AssertionError, jsonschema.FormatError) as err:
103+
raise InvalidArgument(
104+
f"Got string={string!r} from strategy {strategy!r}, but this "
105+
f"is not a valid value for the {format_name!r} checker."
106+
) from err
107+
return string
108+
109+
return strategy.map(check_valid)
110+
111+
112+
def __from_schema(
113+
schema: Union[bool, Schema],
114+
*,
115+
custom_formats: Dict[str, st.SearchStrategy[str]] = None,
116+
) -> st.SearchStrategy[JSONType]:
86117
try:
87118
schema = resolve_all_refs(schema)
88119
except RecursionError:
89120
raise HypothesisRefResolutionError(
90121
f"Could not resolve recursive references in schema={schema!r}"
91122
) from None
123+
# We check for _FORMATS_TOKEN to avoid re-validating known good data.
124+
if custom_formats is not None and _FORMATS_TOKEN not in custom_formats:
125+
assert isinstance(custom_formats, dict)
126+
for name, strat in custom_formats.items():
127+
if not isinstance(name, str):
128+
raise InvalidArgument(f"format name {name!r} must be a string")
129+
if name in STRING_FORMATS:
130+
raise InvalidArgument(f"Cannot redefine standard format {name!r}")
131+
if not isinstance(strat, st.SearchStrategy):
132+
raise InvalidArgument(
133+
f"custom_formats[{name!r}]={strat!r} must be a Hypothesis "
134+
"strategy which generates strings matching this format."
135+
)
136+
format_checker = jsonschema.FormatChecker()
137+
custom_formats = {
138+
name: _get_format_filter(name, format_checker, strategy)
139+
if name in format_checker.checkers
140+
else strategy
141+
for name, strategy in custom_formats.items()
142+
}
143+
custom_formats[_FORMATS_TOKEN] = None # type: ignore
144+
92145
schema = canonicalish(schema)
93146
# Boolean objects are special schemata; False rejects all and True accepts all.
94147
if schema == FALSEY:
@@ -101,31 +154,38 @@ def __from_schema(schema: Union[bool, Schema]) -> st.SearchStrategy[JSONType]:
101154
if schema["$schema"] == "http://json-schema.org/draft-03/schema#":
102155
raise InvalidArgument("Draft-03 schemas are not supported")
103156

157+
assert isinstance(schema, dict)
104158
# Now we handle as many validation keywords as we can...
105159
# Applying subschemata with boolean logic
106160
if "not" in schema:
107161
not_ = schema.pop("not")
108162
assert isinstance(not_, dict)
109163
validator = make_validator(not_).is_valid
110-
return from_schema(schema).filter(lambda v: not validator(v))
164+
return from_schema(schema, custom_formats=custom_formats).filter(
165+
lambda v: not validator(v)
166+
)
111167
if "anyOf" in schema:
112168
tmp = schema.copy()
113169
ao = tmp.pop("anyOf")
114170
assert isinstance(ao, list)
115-
return st.one_of([merged_as_strategies([tmp, s]) for s in ao])
171+
return st.one_of([merged_as_strategies([tmp, s], custom_formats) for s in ao])
116172
if "allOf" in schema:
117173
tmp = schema.copy()
118174
ao = tmp.pop("allOf")
119175
assert isinstance(ao, list)
120-
return merged_as_strategies([tmp] + ao)
176+
return merged_as_strategies([tmp] + ao, custom_formats)
121177
if "oneOf" in schema:
122178
tmp = schema.copy()
123179
oo = tmp.pop("oneOf")
124180
assert isinstance(oo, list)
125181
schemas = [merged([tmp, s]) for s in oo]
126-
return st.one_of([from_schema(s) for s in schemas if s is not None]).filter(
127-
make_validator(schema).is_valid
128-
)
182+
return st.one_of(
183+
[
184+
from_schema(s, custom_formats=custom_formats)
185+
for s in schemas
186+
if s is not None
187+
]
188+
).filter(make_validator(schema).is_valid)
129189
# Simple special cases
130190
if "enum" in schema:
131191
assert schema["enum"], "Canonicalises to non-empty list or FALSEY"
@@ -138,9 +198,9 @@ def __from_schema(schema: Union[bool, Schema]) -> st.SearchStrategy[JSONType]:
138198
"boolean": lambda _: st.booleans(),
139199
"number": number_schema,
140200
"integer": integer_schema,
141-
"string": string_schema,
142-
"array": array_schema,
143-
"object": object_schema,
201+
"string": partial(string_schema, custom_formats),
202+
"array": partial(array_schema, custom_formats),
203+
"object": partial(object_schema, custom_formats),
144204
}
145205
assert set(map_) == set(TYPE_STRINGS)
146206
return st.one_of([map_[t](schema) for t in get_type(schema)])
@@ -329,21 +389,22 @@ def relative_json_pointers() -> st.SearchStrategy[str]:
329389
}
330390

331391

332-
def string_schema(schema: dict) -> st.SearchStrategy[str]:
392+
def string_schema(
393+
custom_formats: Dict[str, st.SearchStrategy[str]], schema: dict
394+
) -> st.SearchStrategy[str]:
333395
"""Handle schemata for strings."""
334396
# also https://json-schema.org/latest/json-schema-validation.html#rfc.section.7
335397
min_size = schema.get("minLength", 0)
336398
max_size = schema.get("maxLength")
337399
strategy = st.text(min_size=min_size, max_size=max_size)
338-
if schema.get("format") in STRING_FORMATS:
400+
known_formats = {**(custom_formats or {}), **STRING_FORMATS}
401+
if schema.get("format") in known_formats:
339402
# Unknown "format" specifiers should be ignored for validation.
340403
# See https://json-schema.org/latest/json-schema-validation.html#format
341-
strategy = STRING_FORMATS[schema["format"]]
404+
strategy = known_formats[schema["format"]]
342405
if "pattern" in schema:
343406
# This isn't really supported, but we'll do our best.
344-
strategy = strategy.filter(
345-
lambda s: re.search(schema["pattern"], string=s) is not None
346-
)
407+
strategy = strategy.filter(re.compile(schema["pattern"]).search)
347408
elif "pattern" in schema:
348409
try:
349410
re.compile(schema["pattern"])
@@ -361,8 +422,11 @@ def string_schema(schema: dict) -> st.SearchStrategy[str]:
361422
return strategy
362423

363424

364-
def array_schema(schema: dict) -> st.SearchStrategy[List[JSONType]]:
425+
def array_schema(
426+
custom_formats: Dict[str, st.SearchStrategy[str]], schema: dict
427+
) -> st.SearchStrategy[List[JSONType]]:
365428
"""Handle schemata for arrays."""
429+
_from_schema_ = partial(from_schema, custom_formats=custom_formats)
366430
items = schema.get("items", {})
367431
additional_items = schema.get("additionalItems", {})
368432
min_size = schema.get("minItems", 0)
@@ -373,19 +437,19 @@ def array_schema(schema: dict) -> st.SearchStrategy[List[JSONType]]:
373437
if max_size is not None:
374438
max_size -= len(items)
375439

376-
items_strats = [from_schema(s) for s in items]
377-
additional_items_strat = from_schema(additional_items)
440+
items_strats = [_from_schema_(s) for s in items]
441+
additional_items_strat = _from_schema_(additional_items)
378442

379443
# If we have a contains schema to satisfy, we try generating from it when
380444
# allowed to do so. We'll skip the None (unmergable / no contains) cases
381445
# below, and let Hypothesis ignore the FALSEY cases for us.
382446
if "contains" in schema:
383447
for i, mrgd in enumerate(merged([schema["contains"], s]) for s in items):
384448
if mrgd is not None:
385-
items_strats[i] |= from_schema(mrgd)
449+
items_strats[i] |= _from_schema_(mrgd)
386450
contains_additional = merged([schema["contains"], additional_items])
387451
if contains_additional is not None:
388-
additional_items_strat |= from_schema(contains_additional)
452+
additional_items_strat |= _from_schema_(contains_additional)
389453

390454
if unique:
391455

@@ -417,9 +481,9 @@ def not_seen(elem: JSONType) -> bool:
417481
st.lists(additional_items_strat, min_size=min_size, max_size=max_size),
418482
)
419483
else:
420-
items_strat = from_schema(items)
484+
items_strat = _from_schema_(items)
421485
if "contains" in schema:
422-
contains_strat = from_schema(schema["contains"])
486+
contains_strat = _from_schema_(schema["contains"])
423487
if merged([items, schema["contains"]]) != schema["contains"]:
424488
# We only need this filter if we couldn't merge items in when
425489
# canonicalising. Note that for list-items, above, we just skip
@@ -440,7 +504,9 @@ def not_seen(elem: JSONType) -> bool:
440504
return strat.filter(lambda val: any(contains(x) for x in val))
441505

442506

443-
def object_schema(schema: dict) -> st.SearchStrategy[Dict[str, JSONType]]:
507+
def object_schema(
508+
custom_formats: Dict[str, st.SearchStrategy[str]], schema: dict
509+
) -> st.SearchStrategy[Dict[str, JSONType]]:
444510
"""Handle a manageable subset of possible schemata for objects."""
445511
required = schema.get("required", []) # required keys
446512
min_size = max(len(required), schema.get("minProperties", 0))
@@ -468,7 +534,9 @@ def object_schema(schema: dict) -> st.SearchStrategy[Dict[str, JSONType]]:
468534
st.sampled_from(sorted(dep_names) + sorted(dep_schemas) + sorted(properties))
469535
if (dep_names or dep_schemas or properties)
470536
else st.nothing(),
471-
from_schema(names) if additional_allowed else st.nothing(),
537+
from_schema(names, custom_formats=custom_formats)
538+
if additional_allowed
539+
else st.nothing(),
472540
st.one_of([st.from_regex(p) for p in sorted(patterns)]),
473541
)
474542
all_names_strategy = st.one_of([s for s in name_strats if not s.is_empty]).filter(
@@ -515,9 +583,9 @@ def from_object_schema(draw: Any) -> Any:
515583
pattern_schemas.insert(0, properties[key])
516584

517585
if pattern_schemas:
518-
out[key] = draw(merged_as_strategies(pattern_schemas))
586+
out[key] = draw(merged_as_strategies(pattern_schemas, custom_formats))
519587
else:
520-
out[key] = draw(from_schema(additional))
588+
out[key] = draw(from_schema(additional, custom_formats=custom_formats))
521589

522590
for k, v in dep_schemas.items():
523591
if k in out and not make_validator(v).is_valid(out):

tests/test_from_schema.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for the hypothesis-jsonschema library."""
22

33
import json
4+
import re
45
from pathlib import Path
56

67
import jsonschema
@@ -383,3 +384,46 @@ def test_multiple_contains_behind_allof(value):
383384
# By placing *multiple* contains elements behind "allOf" we've disabled the
384385
# mixed-generation logic, and so we can't generate any valid instances at all.
385386
jsonschema.validate(value, ALLOF_CONTAINS)
387+
388+
389+
@jsonschema.FormatChecker.cls_checks("card-test")
390+
def validate_card_format(string):
391+
# For the real thing, you'd want use the Luhn algorithm; this is enough for tests.
392+
return bool(re.match(r"^\d{4} \d{4} \d{4} \d{4}$", string))
393+
394+
395+
@pytest.mark.parametrize(
396+
"kw",
397+
[
398+
{"foo": "not a strategy"},
399+
{5: st.just("name is not a string")},
400+
{"full-date": st.just("2000-01-01")}, # can't override a standard format
401+
{"card-test": st.just("not a valid card")},
402+
],
403+
)
404+
@given(data=st.data())
405+
def test_custom_formats_validation(data, kw):
406+
s = from_schema({"type": "string", "format": "card-test"}, custom_formats=kw)
407+
with pytest.raises(InvalidArgument):
408+
data.draw(s)
409+
410+
411+
@given(
412+
num=from_schema(
413+
{"type": "string", "format": "card-test"},
414+
custom_formats={"card-test": st.just("4111 1111 1111 1111")},
415+
)
416+
)
417+
def test_allowed_custom_format(num):
418+
assert num == "4111 1111 1111 1111"
419+
420+
421+
@given(
422+
string=from_schema(
423+
{"type": "string", "format": "not registered"},
424+
custom_formats={"not registered": st.just("hello world")},
425+
)
426+
)
427+
def test_allowed_unknown_custom_format(string):
428+
assert string == "hello world"
429+
assert "not registered" not in jsonschema.FormatChecker().checkers

0 commit comments

Comments
 (0)