From bb14e4831462510a323c9b4c6d4429b332b5f430 Mon Sep 17 00:00:00 2001 From: teja-chalikanti Date: Fri, 7 Feb 2025 00:44:35 -0500 Subject: [PATCH 1/6] A dummy Python File to Test Push --- dummy_commit_file.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 dummy_commit_file.py diff --git a/dummy_commit_file.py b/dummy_commit_file.py new file mode 100644 index 00000000000000..58510a6a3fd3e2 --- /dev/null +++ b/dummy_commit_file.py @@ -0,0 +1 @@ +print("This is a Test Commit File") From e22386b4fba5cfd3487fc9a33e74ba3e7bb141cb Mon Sep 17 00:00:00 2001 From: teja-chalikanti Date: Fri, 7 Feb 2025 00:46:15 -0500 Subject: [PATCH 2/6] Optimize json.dump() to reduce multiple I/O operations and improve performance --- Lib/json/__init__.py | 81 +++++++++++++++++++++++++++++---- Lib/test/test_json/test_dump.py | 31 +++++++++++++ 2 files changed, 103 insertions(+), 9 deletions(-) diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 1d972d22ded072..a34ff471f219d6 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -117,9 +117,72 @@ default=None, ) +# def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, +# allow_nan=True, cls=None, indent=None, separators=None, +# default=None, sort_keys=False, **kw): +# """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a +# ``.write()``-supporting file-like object). + +# If ``skipkeys`` is true then ``dict`` keys that are not basic types +# (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped +# instead of raising a ``TypeError``. + +# If ``ensure_ascii`` is false, then the strings written to ``fp`` can +# contain non-ASCII characters if they appear in strings contained in +# ``obj``. Otherwise, all such characters are escaped in JSON strings. + +# If ``check_circular`` is false, then the circular reference check +# for container types will be skipped and a circular reference will +# result in an ``RecursionError`` (or worse). + +# If ``allow_nan`` is false, then it will be a ``ValueError`` to +# serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) +# in strict compliance of the JSON specification, instead of using the +# JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + +# If ``indent`` is a non-negative integer, then JSON array elements and +# object members will be pretty-printed with that indent level. An indent +# level of 0 will only insert newlines. ``None`` is the most compact +# representation. + +# If specified, ``separators`` should be an ``(item_separator, key_separator)`` +# tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and +# ``(',', ': ')`` otherwise. To get the most compact JSON representation, +# you should specify ``(',', ':')`` to eliminate whitespace. + +# ``default(obj)`` is a function that should return a serializable version +# of obj or raise TypeError. The default simply raises TypeError. + +# If *sort_keys* is true (default: ``False``), then the output of +# dictionaries will be sorted by key. + +# To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the +# ``.default()`` method to serialize additional types), specify it with +# the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. + +# """ +# # cached encoder +# if (not skipkeys and ensure_ascii and +# check_circular and allow_nan and +# cls is None and indent is None and separators is None and +# default is None and not sort_keys and not kw): +# iterable = _default_encoder.iterencode(obj) +# else: +# if cls is None: +# cls = JSONEncoder +# iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, +# check_circular=check_circular, allow_nan=allow_nan, indent=indent, +# separators=separators, +# default=default, sort_keys=sort_keys, **kw).iterencode(obj) +# # could accelerate with writelines in some versions of Python, at +# # a debuggability cost +# for chunk in iterable: +# fp.write(chunk) + + def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - default=None, sort_keys=False, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + default=None, sort_keys=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). @@ -166,18 +229,18 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, check_circular and allow_nan and cls is None and indent is None and separators is None and default is None and not sort_keys and not kw): - iterable = _default_encoder.iterencode(obj) + json_string = _default_encoder.encode(obj) # Encode JSON in one go else: if cls is None: cls = JSONEncoder - iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + json_string = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, - default=default, sort_keys=sort_keys, **kw).iterencode(obj) - # could accelerate with writelines in some versions of Python, at - # a debuggability cost - for chunk in iterable: - fp.write(chunk) + default=default, sort_keys=sort_keys, **kw).encode(obj) + + # Perform a single efficient write operation + fp.write(json_string) + def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 13b40020781bae..8571396009a0d1 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -3,6 +3,10 @@ from test.support import bigmemtest, _1G +import time +import json +import unittest + class TestDump: def test_dump(self): sio = StringIO() @@ -76,3 +80,30 @@ def test_large_list(self, size): self.assertEqual(encoded[:1], "[") self.assertEqual(encoded[-2:], "1]") self.assertEqual(encoded[1:-2], "1, " * (N - 1)) + + + +class TestDumpPerformance(unittest.TestCase): + def test_json_dump_speed(self): + """Test json.dump() performance improvement""" + x = {"numbers": list(range(100000)), "text": "example"} + + # Standard json.dump() (Old Method) + start = time.time() + with open("output1.json", "w", encoding="utf-8") as f: + json.dump(x, f) + original_time = time.time() - start + + # Optimized json.dump() (New Method) + start = time.time() + with open("output2.json", "w", encoding="utf-8") as f: + f.write(json.dumps(x)) + optimized_time = time.time() - start + + print(f"Original json.dump() time: {original_time:.6f} seconds") + print(f"Optimized json.dump() time: {optimized_time:.6f} seconds") + + self.assertLess(optimized_time, original_time, "Optimized json.dump() should be faster") + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From e380a43cdfaccc74db79e127b2dbf57a4c52ab2d Mon Sep 17 00:00:00 2001 From: teja-chalikanti Date: Fri, 7 Feb 2025 01:02:11 -0500 Subject: [PATCH 3/6] Fix linting issue: Add newline at end of test_dump.py --- Lib/test/test_json/test_dump.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 8571396009a0d1..eb2756bca3ba94 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -106,4 +106,5 @@ def test_json_dump_speed(self): self.assertLess(optimized_time, original_time, "Optimized json.dump() should be faster") if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() + \ No newline at end of file From 75e68ee2cb7b5f63e0ef2a2ed6a23535087f9327 Mon Sep 17 00:00:00 2001 From: teja-chalikanti Date: Fri, 7 Feb 2025 01:06:31 -0500 Subject: [PATCH 4/6] Fix linting issue-1: Add newline at end of test_dump.py --- Lib/test/test_json/test_dump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index eb2756bca3ba94..40cec876b38c37 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -107,4 +107,4 @@ def test_json_dump_speed(self): if __name__ == "__main__": unittest.main() - \ No newline at end of file + From b9b7c4d6739a069289c7bb37ec087bb290a73610 Mon Sep 17 00:00:00 2001 From: teja-chalikanti Date: Fri, 7 Feb 2025 01:21:10 -0500 Subject: [PATCH 5/6] Fix linting issue: Ensure proper newline at EOF --- Lib/test/test_json/test_dump.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 40cec876b38c37..6f0dc35bf9ed7f 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -107,4 +107,3 @@ def test_json_dump_speed(self): if __name__ == "__main__": unittest.main() - From 2dbd088bd09db3957e1db56cc1d4469537f80609 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 7 Feb 2025 07:01:54 +0000 Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2025-02-07-07-01-53.gh-issue-129711.yYPmzf.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-02-07-07-01-53.gh-issue-129711.yYPmzf.rst diff --git a/Misc/NEWS.d/next/Library/2025-02-07-07-01-53.gh-issue-129711.yYPmzf.rst b/Misc/NEWS.d/next/Library/2025-02-07-07-01-53.gh-issue-129711.yYPmzf.rst new file mode 100644 index 00000000000000..361f17ebe8a99f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-02-07-07-01-53.gh-issue-129711.yYPmzf.rst @@ -0,0 +1 @@ +Optimized json.dump() to improve performance by reducing multiple I/O operations.