Skip to content

Commit 747ab07

Browse files
committed
Added a custom JSON encoder for json.dumps().
Added a custom JSON encoder based on json.JSONEncoder. Changes from the default behaviour: - Using `_make_iterencode` as the only `_iterencode`. - Change `_make_iterencode`, setting `_indent` in `_iterencode_dict` to `None`. - Use `py_encode_basestring` as the only `encoder`. Also, updated the json writer to sort keys according to requested top_keys. Signed-off-by: Md Safiyat Reza <[email protected]>
1 parent e71fd49 commit 747ab07

File tree

1 file changed

+239
-0
lines changed

1 file changed

+239
-0
lines changed

pre_commit_hooks/pretty_format_json.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,243 @@
99
from typing import Tuple
1010
from typing import Union
1111

12+
INFINITY = float('inf')
13+
14+
15+
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
16+
_key_separator, _item_separator, _sort_keys, _skipkeys,
17+
_one_shot,
18+
## HACK: hand-optimized bytecode; turn globals into locals
19+
ValueError=ValueError,
20+
dict=dict,
21+
float=float,
22+
id=id,
23+
int=int,
24+
isinstance=isinstance,
25+
list=list,
26+
str=str,
27+
tuple=tuple,
28+
_intstr=int.__str__,
29+
):
30+
31+
if _indent is not None and not isinstance(_indent, str):
32+
_indent = ' ' * _indent
33+
34+
def _iterencode_list(lst, _current_indent_level):
35+
if not lst:
36+
yield '[]'
37+
return
38+
if markers is not None:
39+
markerid = id(lst)
40+
if markerid in markers:
41+
raise ValueError("Circular reference detected")
42+
markers[markerid] = lst
43+
buf = '['
44+
if _indent is not None:
45+
_current_indent_level += 1
46+
newline_indent = '\n' + _indent * _current_indent_level
47+
separator = _item_separator.rstrip() + newline_indent
48+
buf += newline_indent
49+
else:
50+
newline_indent = None
51+
separator = _item_separator
52+
first = True
53+
for value in lst:
54+
if first:
55+
first = False
56+
else:
57+
buf = separator
58+
if isinstance(value, str):
59+
yield buf + _encoder(value)
60+
elif value is None:
61+
yield buf + 'null'
62+
elif value is True:
63+
yield buf + 'true'
64+
elif value is False:
65+
yield buf + 'false'
66+
elif isinstance(value, int):
67+
# Subclasses of int/float may override __str__, but we still
68+
# want to encode them as integers/floats in JSON. One example
69+
# within the standard library is IntEnum.
70+
yield buf + _intstr(value)
71+
elif isinstance(value, float):
72+
# see comment above for int
73+
yield buf + _floatstr(value)
74+
else:
75+
yield buf
76+
if isinstance(value, (list, tuple)):
77+
chunks = _iterencode_list(value, _current_indent_level)
78+
elif isinstance(value, dict):
79+
chunks = _iterencode_dict(value, _current_indent_level)
80+
else:
81+
chunks = _iterencode(value, _current_indent_level)
82+
yield from chunks
83+
if newline_indent is not None:
84+
_current_indent_level -= 1
85+
yield '\n' + _indent * _current_indent_level
86+
yield ']'
87+
if markers is not None:
88+
del markers[markerid]
89+
90+
def _iterencode_dict(dct, _current_indent_level):
91+
if not dct:
92+
yield '{}'
93+
return
94+
_indent = None # No newlines or indentation for the k-v pairs.
95+
if markers is not None:
96+
markerid = id(dct)
97+
if markerid in markers:
98+
raise ValueError("Circular reference detected")
99+
markers[markerid] = dct
100+
yield '{'
101+
if _indent is not None:
102+
_current_indent_level += 1
103+
newline_indent = '\n' + _indent * _current_indent_level
104+
item_separator = _item_separator + newline_indent
105+
yield newline_indent
106+
else:
107+
newline_indent = None
108+
item_separator = _item_separator
109+
first = True
110+
if _sort_keys:
111+
items = sorted(dct.items(), key=lambda kv: kv[0])
112+
else:
113+
items = dct.items()
114+
for key, value in items:
115+
if isinstance(key, str):
116+
pass
117+
# JavaScript is weakly typed for these, so it makes sense to
118+
# also allow them. Many encoders seem to do something like this.
119+
elif isinstance(key, float):
120+
# see comment for int/float in _make_iterencode
121+
key = _floatstr(key)
122+
elif key is True:
123+
key = 'true'
124+
elif key is False:
125+
key = 'false'
126+
elif key is None:
127+
key = 'null'
128+
elif isinstance(key, int):
129+
# see comment for int/float in _make_iterencode
130+
key = _intstr(key)
131+
elif _skipkeys:
132+
continue
133+
else:
134+
raise TypeError(f'keys must be str, int, float, bool or None, '
135+
f'not {key.__class__.__name__}')
136+
if first:
137+
first = False
138+
else:
139+
yield item_separator
140+
yield _encoder(key)
141+
yield _key_separator
142+
if isinstance(value, str):
143+
yield _encoder(value)
144+
elif value is None:
145+
yield 'null'
146+
elif value is True:
147+
yield 'true'
148+
elif value is False:
149+
yield 'false'
150+
elif isinstance(value, int):
151+
# see comment for int/float in _make_iterencode
152+
yield _intstr(value)
153+
elif isinstance(value, float):
154+
# see comment for int/float in _make_iterencode
155+
yield _floatstr(value)
156+
else:
157+
if isinstance(value, (list, tuple)):
158+
chunks = _iterencode_list(value, _current_indent_level)
159+
elif isinstance(value, dict):
160+
chunks = _iterencode_dict(value, _current_indent_level)
161+
else:
162+
chunks = _iterencode(value, _current_indent_level)
163+
yield from chunks
164+
if newline_indent is not None:
165+
_current_indent_level -= 1
166+
yield '\n' + _indent * _current_indent_level
167+
yield '}'
168+
if markers is not None:
169+
del markers[markerid]
170+
171+
def _iterencode(o, _current_indent_level):
172+
if isinstance(o, str):
173+
yield _encoder(o)
174+
elif o is None:
175+
yield 'null'
176+
elif o is True:
177+
yield 'true'
178+
elif o is False:
179+
yield 'false'
180+
elif isinstance(o, int):
181+
# see comment for int/float in _make_iterencode
182+
yield _intstr(o)
183+
elif isinstance(o, float):
184+
# see comment for int/float in _make_iterencode
185+
yield _floatstr(o)
186+
elif isinstance(o, (list, tuple)):
187+
yield from _iterencode_list(o, _current_indent_level)
188+
elif isinstance(o, dict):
189+
yield from _iterencode_dict(o, _current_indent_level)
190+
else:
191+
if markers is not None:
192+
markerid = id(o)
193+
if markerid in markers:
194+
raise ValueError("Circular reference detected")
195+
markers[markerid] = o
196+
o = _default(o)
197+
yield from _iterencode(o, _current_indent_level)
198+
if markers is not None:
199+
del markers[markerid]
200+
return _iterencode
201+
202+
203+
class CustomJSONEncoder(json.JSONEncoder):
204+
def iterencode(self, o, _one_shot=False):
205+
"""Encode the given object and yield each string
206+
representation as available.
207+
208+
For example::
209+
210+
for chunk in JSONEncoder().iterencode(bigobject):
211+
mysocket.write(chunk)
212+
213+
"""
214+
if self.check_circular:
215+
markers = {}
216+
else:
217+
markers = None
218+
219+
def floatstr(o, allow_nan=self.allow_nan,
220+
_repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
221+
# Check for specials. Note that this type of test is processor
222+
# and/or platform-specific, so do tests which don't depend on the
223+
# internals.
224+
225+
if o != o:
226+
text = 'NaN'
227+
elif o == _inf:
228+
text = 'Infinity'
229+
elif o == _neginf:
230+
text = '-Infinity'
231+
else:
232+
return _repr(o)
233+
234+
if not allow_nan:
235+
raise ValueError(
236+
"Out of range float values are not JSON compliant: " +
237+
repr(o))
238+
239+
return text
240+
241+
_encoder = json.encoder.py_encode_basestring
242+
243+
_iterencode = _make_iterencode(
244+
markers, self.default, _encoder, self.indent, floatstr,
245+
self.key_separator, self.item_separator, self.sort_keys,
246+
self.skipkeys, _one_shot)
247+
return _iterencode(o, 0)
248+
12249

13250
def _get_pretty_format(
14251
contents: str,
@@ -28,6 +265,8 @@ def pairs_first(pairs: Sequence[Tuple[str, str]]) -> Mapping[str, str]:
28265
json.loads(contents, object_pairs_hook=pairs_first),
29266
indent=indent,
30267
ensure_ascii=ensure_ascii,
268+
cls=CustomJSONEncoder,
269+
separators=(', ', ': ')
31270
)
32271
return f'{json_pretty}\n'
33272

0 commit comments

Comments
 (0)