diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 1d972d22ded072..a34ff471f219d6 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -117,9 +117,72 @@ default=None, ) +# def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, +# allow_nan=True, cls=None, indent=None, separators=None, +# default=None, sort_keys=False, **kw): +# """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a +# ``.write()``-supporting file-like object). + +# If ``skipkeys`` is true then ``dict`` keys that are not basic types +# (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped +# instead of raising a ``TypeError``. + +# If ``ensure_ascii`` is false, then the strings written to ``fp`` can +# contain non-ASCII characters if they appear in strings contained in +# ``obj``. Otherwise, all such characters are escaped in JSON strings. + +# If ``check_circular`` is false, then the circular reference check +# for container types will be skipped and a circular reference will +# result in an ``RecursionError`` (or worse). + +# If ``allow_nan`` is false, then it will be a ``ValueError`` to +# serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) +# in strict compliance of the JSON specification, instead of using the +# JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + +# If ``indent`` is a non-negative integer, then JSON array elements and +# object members will be pretty-printed with that indent level. An indent +# level of 0 will only insert newlines. ``None`` is the most compact +# representation. + +# If specified, ``separators`` should be an ``(item_separator, key_separator)`` +# tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and +# ``(',', ': ')`` otherwise. To get the most compact JSON representation, +# you should specify ``(',', ':')`` to eliminate whitespace. + +# ``default(obj)`` is a function that should return a serializable version +# of obj or raise TypeError. The default simply raises TypeError. + +# If *sort_keys* is true (default: ``False``), then the output of +# dictionaries will be sorted by key. + +# To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the +# ``.default()`` method to serialize additional types), specify it with +# the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. + +# """ +# # cached encoder +# if (not skipkeys and ensure_ascii and +# check_circular and allow_nan and +# cls is None and indent is None and separators is None and +# default is None and not sort_keys and not kw): +# iterable = _default_encoder.iterencode(obj) +# else: +# if cls is None: +# cls = JSONEncoder +# iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, +# check_circular=check_circular, allow_nan=allow_nan, indent=indent, +# separators=separators, +# default=default, sort_keys=sort_keys, **kw).iterencode(obj) +# # could accelerate with writelines in some versions of Python, at +# # a debuggability cost +# for chunk in iterable: +# fp.write(chunk) + + def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - default=None, sort_keys=False, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + default=None, sort_keys=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). @@ -166,18 +229,18 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, check_circular and allow_nan and cls is None and indent is None and separators is None and default is None and not sort_keys and not kw): - iterable = _default_encoder.iterencode(obj) + json_string = _default_encoder.encode(obj) # Encode JSON in one go else: if cls is None: cls = JSONEncoder - iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + json_string = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, - default=default, sort_keys=sort_keys, **kw).iterencode(obj) - # could accelerate with writelines in some versions of Python, at - # a debuggability cost - for chunk in iterable: - fp.write(chunk) + default=default, sort_keys=sort_keys, **kw).encode(obj) + + # Perform a single efficient write operation + fp.write(json_string) + def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 13b40020781bae..6f0dc35bf9ed7f 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -3,6 +3,10 @@ from test.support import bigmemtest, _1G +import time +import json +import unittest + class TestDump: def test_dump(self): sio = StringIO() @@ -76,3 +80,30 @@ def test_large_list(self, size): self.assertEqual(encoded[:1], "[") self.assertEqual(encoded[-2:], "1]") self.assertEqual(encoded[1:-2], "1, " * (N - 1)) + + + +class TestDumpPerformance(unittest.TestCase): + def test_json_dump_speed(self): + """Test json.dump() performance improvement""" + x = {"numbers": list(range(100000)), "text": "example"} + + # Standard json.dump() (Old Method) + start = time.time() + with open("output1.json", "w", encoding="utf-8") as f: + json.dump(x, f) + original_time = time.time() - start + + # Optimized json.dump() (New Method) + start = time.time() + with open("output2.json", "w", encoding="utf-8") as f: + f.write(json.dumps(x)) + optimized_time = time.time() - start + + print(f"Original json.dump() time: {original_time:.6f} seconds") + print(f"Optimized json.dump() time: {optimized_time:.6f} seconds") + + self.assertLess(optimized_time, original_time, "Optimized json.dump() should be faster") + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-02-07-07-01-53.gh-issue-129711.yYPmzf.rst b/Misc/NEWS.d/next/Library/2025-02-07-07-01-53.gh-issue-129711.yYPmzf.rst new file mode 100644 index 00000000000000..361f17ebe8a99f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-02-07-07-01-53.gh-issue-129711.yYPmzf.rst @@ -0,0 +1 @@ +Optimized json.dump() to improve performance by reducing multiple I/O operations. diff --git a/dummy_commit_file.py b/dummy_commit_file.py new file mode 100644 index 00000000000000..58510a6a3fd3e2 --- /dev/null +++ b/dummy_commit_file.py @@ -0,0 +1 @@ +print("This is a Test Commit File")