|
12 | 12 | most things by construction instead of by filtering. That's the difference
|
13 | 13 | between "I'd like it to be faster" and "doesn't finish at all".
|
14 | 14 | """
|
15 |
| -import functools |
16 | 15 | import itertools
|
17 | 16 | import json
|
18 | 17 | import math
|
19 | 18 | import re
|
20 | 19 | from copy import deepcopy
|
21 |
| -from json.encoder import _make_iterencode, encode_basestring_ascii # type: ignore |
22 |
| -from typing import Any, Callable, Dict, List, NoReturn, Optional, Tuple, Type, Union |
| 20 | +from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union |
23 | 21 |
|
24 | 22 | import jsonschema
|
25 | 23 | from hypothesis.errors import InvalidArgument
|
26 | 24 | from hypothesis.internal.floats import next_down as ieee_next_down, next_up
|
27 | 25 |
|
28 |
| -# Mypy does not (yet!) support recursive type definitions. |
29 |
| -# (and writing a few steps by hand is a DoS attack on the AST walker in Pytest) |
30 |
| -JSONType = Union[None, bool, float, str, list, Dict[str, Any]] |
| 26 | +from ._encode import JSONType, encode_canonical_json, sort_key |
| 27 | + |
31 | 28 | Schema = Dict[str, JSONType]
|
32 | 29 | JSONSchemaValidator = Union[
|
33 | 30 | jsonschema.validators.Draft4Validator,
|
@@ -86,110 +83,10 @@ def make_validator(schema: Schema) -> JSONSchemaValidator:
|
86 | 83 | return validator(schema)
|
87 | 84 |
|
88 | 85 |
|
89 |
| -class CanonicalisingJsonEncoder(json.JSONEncoder): |
90 |
| - def iterencode(self, o: Any, _one_shot: bool = False) -> Any: |
91 |
| - """Replace a stdlib method, so we encode integer-valued floats as ints.""" |
92 |
| - |
93 |
| - def floatstr(o: float) -> str: |
94 |
| - # This is the bit we're overriding - integer-valued floats are |
95 |
| - # encoded as integers, to support JSONschemas's uniqueness. |
96 |
| - assert math.isfinite(o) |
97 |
| - if o == int(o): |
98 |
| - return repr(int(o)) |
99 |
| - return repr(o) |
100 |
| - |
101 |
| - return _make_iterencode( |
102 |
| - {}, |
103 |
| - self.default, |
104 |
| - encode_basestring_ascii, |
105 |
| - self.indent, |
106 |
| - floatstr, |
107 |
| - self.key_separator, |
108 |
| - self.item_separator, |
109 |
| - self.sort_keys, |
110 |
| - self.skipkeys, |
111 |
| - _one_shot, |
112 |
| - )(o, 0) |
113 |
| - |
114 |
| - |
115 | 86 | class HypothesisRefResolutionError(jsonschema.exceptions.RefResolutionError):
|
116 | 87 | pass
|
117 | 88 |
|
118 | 89 |
|
119 |
| -def _make_cache_key( |
120 |
| - value: JSONType, |
121 |
| -) -> Tuple[Type, Union[Tuple, None, bool, float, str]]: |
122 |
| - """Make a hashable object from any JSON value. |
123 |
| -
|
124 |
| - The idea is to recursively convert all mutable values to immutable and adding values types as a discriminant. |
125 |
| - """ |
126 |
| - if isinstance(value, dict): |
127 |
| - return (dict, tuple((k, _make_cache_key(v)) for k, v in value.items())) |
128 |
| - if isinstance(value, list): |
129 |
| - return (list, tuple(map(_make_cache_key, value))) |
130 |
| - # Primitive types are hashable |
131 |
| - # `type` is needed to distinguish false-ish values - 0, "", False have the same hash (0) |
132 |
| - return (type(value), value) |
133 |
| - |
134 |
| - |
135 |
| -class HashedJSON: |
136 |
| - """A proxy that holds a JSON value. |
137 |
| -
|
138 |
| - Adds a capability for the inner value to be cached, loosely based on `functools._HashedSeq`. |
139 |
| - """ |
140 |
| - |
141 |
| - __slots__ = ("value", "hashedvalue") |
142 |
| - |
143 |
| - def __init__(self, value: JSONType): |
144 |
| - self.value = value |
145 |
| - # `hash` is called multiple times on cache miss, therefore it is evaluated only once |
146 |
| - self.hashedvalue = hash(_make_cache_key(value)) |
147 |
| - |
148 |
| - def __hash__(self) -> int: |
149 |
| - return self.hashedvalue |
150 |
| - |
151 |
| - def __eq__(self, other: "HashedJSON") -> bool: # type: ignore |
152 |
| - # TYPES: This class should be used only for caching purposes and there should be |
153 |
| - # no values of other types to compare |
154 |
| - return self.hashedvalue == other.hashedvalue |
155 |
| - |
156 |
| - |
157 |
| -def cached_json(func: Callable[[HashedJSON], str]) -> Callable[[JSONType], str]: |
158 |
| - """Cache calls to `encode_canonical_json`. |
159 |
| -
|
160 |
| - The same schemas are encoded multiple times during canonicalisation and caching gives visible performance impact. |
161 |
| - """ |
162 |
| - cached_func = functools.lru_cache(maxsize=1024)(func) |
163 |
| - |
164 |
| - @functools.wraps(cached_func) |
165 |
| - def wrapped(value: JSONType) -> str: |
166 |
| - return cached_func(HashedJSON(value)) |
167 |
| - |
168 |
| - return wrapped |
169 |
| - |
170 |
| - |
171 |
| -@cached_json |
172 |
| -def encode_canonical_json(value: HashedJSON) -> str: |
173 |
| - """Canonical form serialiser, for uniqueness testing.""" |
174 |
| - return json.dumps(value.value, sort_keys=True, cls=CanonicalisingJsonEncoder) |
175 |
| - |
176 |
| - |
177 |
| -def sort_key(value: JSONType) -> Tuple[int, float, Union[float, str]]: |
178 |
| - """Return a sort key (type, guess, tiebreak) that can compare any JSON value. |
179 |
| -
|
180 |
| - Sorts scalar types before collections, and within each type tries for a |
181 |
| - sensible ordering similar to Hypothesis' idea of simplicity. |
182 |
| - """ |
183 |
| - if value is None: |
184 |
| - return (0, 0, 0) |
185 |
| - if isinstance(value, bool): |
186 |
| - return (1, int(value), 0) |
187 |
| - if isinstance(value, (int, float)): |
188 |
| - return (2 if int(value) == value else 3, abs(value), value >= 0) |
189 |
| - type_key = {str: 4, list: 5, dict: 6}[type(value)] |
190 |
| - return (type_key, len(value), encode_canonical_json(value)) |
191 |
| - |
192 |
| - |
193 | 90 | def get_type(schema: Schema) -> List[str]:
|
194 | 91 | """Return a canonical value for the "type" key.
|
195 | 92 |
|
|
0 commit comments