|
| 1 | +"""Utilities to manipulate JSON objects.""" |
| 2 | + |
| 3 | +# NOTE: this is a copy of ipykernel/jsonutils.py (+blackified) |
| 4 | + |
| 5 | +# Copyright (c) IPython Development Team. |
| 6 | +# Distributed under the terms of the Modified BSD License. |
| 7 | + |
| 8 | +from binascii import b2a_base64 |
| 9 | +import math |
| 10 | +import re |
| 11 | +import types |
| 12 | +from datetime import datetime |
| 13 | +import numbers |
| 14 | + |
| 15 | + |
| 16 | +from ipython_genutils import py3compat |
| 17 | +from ipython_genutils.py3compat import unicode_type, iteritems |
| 18 | + |
| 19 | +next_attr_name = '__next__' if py3compat.PY3 else 'next' |
| 20 | + |
| 21 | +# ----------------------------------------------------------------------------- |
| 22 | +# Globals and constants |
| 23 | +# ----------------------------------------------------------------------------- |
| 24 | + |
| 25 | +# timestamp formats |
| 26 | +ISO8601 = "%Y-%m-%dT%H:%M:%S.%f" |
| 27 | +ISO8601_PAT = re.compile( |
| 28 | + r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$" |
| 29 | +) |
| 30 | + |
| 31 | +# holy crap, strptime is not threadsafe. |
| 32 | +# Calling it once at import seems to help. |
| 33 | +datetime.strptime("1", "%d") |
| 34 | + |
| 35 | +# ----------------------------------------------------------------------------- |
| 36 | +# Classes and functions |
| 37 | +# ----------------------------------------------------------------------------- |
| 38 | + |
| 39 | + |
| 40 | +# constants for identifying png/jpeg data |
| 41 | +PNG = b'\x89PNG\r\n\x1a\n' |
| 42 | +# front of PNG base64-encoded |
| 43 | +PNG64 = b'iVBORw0KG' |
| 44 | +JPEG = b'\xff\xd8' |
| 45 | +# front of JPEG base64-encoded |
| 46 | +JPEG64 = b'/9' |
| 47 | +# constants for identifying gif data |
| 48 | +GIF_64 = b'R0lGODdh' |
| 49 | +GIF89_64 = b'R0lGODlh' |
| 50 | +# front of PDF base64-encoded |
| 51 | +PDF64 = b'JVBER' |
| 52 | + |
| 53 | + |
| 54 | +def encode_images(format_dict): |
| 55 | + """b64-encodes images in a displaypub format dict |
| 56 | +
|
| 57 | + Perhaps this should be handled in json_clean itself? |
| 58 | +
|
| 59 | + Parameters |
| 60 | + ---------- |
| 61 | +
|
| 62 | + format_dict : dict |
| 63 | + A dictionary of display data keyed by mime-type |
| 64 | +
|
| 65 | + Returns |
| 66 | + ------- |
| 67 | +
|
| 68 | + format_dict : dict |
| 69 | + A copy of the same dictionary, |
| 70 | + but binary image data ('image/png', 'image/jpeg' or 'application/pdf') |
| 71 | + is base64-encoded. |
| 72 | +
|
| 73 | + """ |
| 74 | + |
| 75 | + # no need for handling of ambiguous bytestrings on Python 3, |
| 76 | + # where bytes objects always represent binary data and thus |
| 77 | + # base64-encoded. |
| 78 | + if py3compat.PY3: |
| 79 | + return format_dict |
| 80 | + |
| 81 | + encoded = format_dict.copy() |
| 82 | + |
| 83 | + pngdata = format_dict.get('image/png') |
| 84 | + if isinstance(pngdata, bytes): |
| 85 | + # make sure we don't double-encode |
| 86 | + if not pngdata.startswith(PNG64): |
| 87 | + pngdata = b2a_base64(pngdata) |
| 88 | + encoded['image/png'] = pngdata.decode('ascii') |
| 89 | + |
| 90 | + jpegdata = format_dict.get('image/jpeg') |
| 91 | + if isinstance(jpegdata, bytes): |
| 92 | + # make sure we don't double-encode |
| 93 | + if not jpegdata.startswith(JPEG64): |
| 94 | + jpegdata = b2a_base64(jpegdata) |
| 95 | + encoded['image/jpeg'] = jpegdata.decode('ascii') |
| 96 | + |
| 97 | + gifdata = format_dict.get('image/gif') |
| 98 | + if isinstance(gifdata, bytes): |
| 99 | + # make sure we don't double-encode |
| 100 | + if not gifdata.startswith((GIF_64, GIF89_64)): |
| 101 | + gifdata = b2a_base64(gifdata) |
| 102 | + encoded['image/gif'] = gifdata.decode('ascii') |
| 103 | + |
| 104 | + pdfdata = format_dict.get('application/pdf') |
| 105 | + if isinstance(pdfdata, bytes): |
| 106 | + # make sure we don't double-encode |
| 107 | + if not pdfdata.startswith(PDF64): |
| 108 | + pdfdata = b2a_base64(pdfdata) |
| 109 | + encoded['application/pdf'] = pdfdata.decode('ascii') |
| 110 | + |
| 111 | + return encoded |
| 112 | + |
| 113 | + |
| 114 | +def json_clean(obj): |
| 115 | + """Clean an object to ensure it's safe to encode in JSON. |
| 116 | +
|
| 117 | + Atomic, immutable objects are returned unmodified. Sets and tuples are |
| 118 | + converted to lists, lists are copied and dicts are also copied. |
| 119 | +
|
| 120 | + Note: dicts whose keys could cause collisions upon encoding (such as a dict |
| 121 | + with both the number 1 and the string '1' as keys) will cause a ValueError |
| 122 | + to be raised. |
| 123 | +
|
| 124 | + Parameters |
| 125 | + ---------- |
| 126 | + obj : any python object |
| 127 | +
|
| 128 | + Returns |
| 129 | + ------- |
| 130 | + out : object |
| 131 | +
|
| 132 | + A version of the input which will not cause an encoding error when |
| 133 | + encoded as JSON. Note that this function does not *encode* its inputs, |
| 134 | + it simply sanitizes it so that there will be no encoding errors later. |
| 135 | +
|
| 136 | + """ |
| 137 | + # types that are 'atomic' and ok in json as-is. |
| 138 | + atomic_ok = (unicode_type, type(None)) |
| 139 | + |
| 140 | + # containers that we need to convert into lists |
| 141 | + container_to_list = (tuple, set, types.GeneratorType) |
| 142 | + |
| 143 | + # Since bools are a subtype of Integrals, which are a subtype of Reals, |
| 144 | + # we have to check them in that order. |
| 145 | + |
| 146 | + if isinstance(obj, bool): |
| 147 | + return obj |
| 148 | + |
| 149 | + if isinstance(obj, numbers.Integral): |
| 150 | + # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598) |
| 151 | + return int(obj) |
| 152 | + |
| 153 | + if isinstance(obj, numbers.Real): |
| 154 | + # cast out-of-range floats to their reprs |
| 155 | + if math.isnan(obj) or math.isinf(obj): |
| 156 | + return repr(obj) |
| 157 | + return float(obj) |
| 158 | + |
| 159 | + if isinstance(obj, atomic_ok): |
| 160 | + return obj |
| 161 | + |
| 162 | + if isinstance(obj, bytes): |
| 163 | + if py3compat.PY3: |
| 164 | + # unanmbiguous binary data is base64-encoded |
| 165 | + # (this probably should have happened upstream) |
| 166 | + return b2a_base64(obj).decode('ascii') |
| 167 | + else: |
| 168 | + # Python 2 bytestr is ambiguous, |
| 169 | + # needs special handling for possible binary bytestrings. |
| 170 | + # imperfect workaround: if ascii, assume text. |
| 171 | + # otherwise assume binary, base64-encode (py3 behavior). |
| 172 | + try: |
| 173 | + return obj.decode('ascii') |
| 174 | + except UnicodeDecodeError: |
| 175 | + return b2a_base64(obj).decode('ascii') |
| 176 | + |
| 177 | + if isinstance(obj, container_to_list) or ( |
| 178 | + hasattr(obj, '__iter__') and hasattr(obj, next_attr_name) |
| 179 | + ): |
| 180 | + obj = list(obj) |
| 181 | + |
| 182 | + if isinstance(obj, list): |
| 183 | + return [json_clean(x) for x in obj] |
| 184 | + |
| 185 | + if isinstance(obj, dict): |
| 186 | + # First, validate that the dict won't lose data in conversion due to |
| 187 | + # key collisions after stringification. This can happen with keys like |
| 188 | + # True and 'true' or 1 and '1', which collide in JSON. |
| 189 | + nkeys = len(obj) |
| 190 | + nkeys_collapsed = len(set(map(unicode_type, obj))) |
| 191 | + if nkeys != nkeys_collapsed: |
| 192 | + raise ValueError( |
| 193 | + 'dict cannot be safely converted to JSON: ' |
| 194 | + 'key collision would lead to dropped values' |
| 195 | + ) |
| 196 | + # If all OK, proceed by making the new dict that will be json-safe |
| 197 | + out = {} |
| 198 | + for k, v in iteritems(obj): |
| 199 | + out[unicode_type(k)] = json_clean(v) |
| 200 | + return out |
| 201 | + if isinstance(obj, datetime): |
| 202 | + return obj.strftime(ISO8601) |
| 203 | + |
| 204 | + # we don't understand it, it's probably an unserializable object |
| 205 | + raise ValueError("Can't clean for JSON: %r" % obj) |
0 commit comments