Skip to content

Commit 163873f

Browse files
updated read_json function to auto infer orient from the json schema if its table or split
1 parent 6bcd303 commit 163873f

File tree

3 files changed

+54
-0
lines changed

3 files changed

+54
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ Other enhancements
7070
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
7171
- :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
7272
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
73+
- :meth:`pandas.read_json` now automatically infers the ``orient`` parameter if it is not explicitly specified. This allows the correct format to be detected based on the input JSON structure. This only works if json schema matches for split or table. (:issue:`52713`).
7374
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
7475
- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
7576
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)

pandas/io/json/_json.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
)
77
from collections import abc
88
from itertools import islice
9+
import json
10+
import os
911
from typing import (
1012
TYPE_CHECKING,
1113
Any,
@@ -559,6 +561,12 @@ def read_json(
559561
- ``'values'`` : just the values array
560562
- ``'table'`` : dict like ``{{'schema': {{schema}}, 'data': {{data}}}}``
561563
564+
**Automatic Orient Inference for split or table**:
565+
If the `orient` parameter is not specified,
566+
this function will automatically infer the correct JSON format.
567+
This works only if the schema matches for a table or split.
568+
If the json was created using to_json with orient=split or orient=table
569+
562570
The allowed and default values depend on the value
563571
of the `typ` parameter.
564572
@@ -768,6 +776,19 @@ def read_json(
768776
0 0 1 2.5 True a 1577.2
769777
1 1 <NA> 4.5 False b 1577.1
770778
"""
779+
if orient is None:
780+
if isinstance(path_or_buf, (str, bytes, os.PathLike)):
781+
with open(path_or_buf, encoding="utf-8") as f:
782+
json_data = json.load(f)
783+
else:
784+
json_data = json.load(path_or_buf)
785+
786+
if isinstance(json_data, dict):
787+
if "schema" in json_data and "data" in json_data:
788+
orient = "table"
789+
elif "columns" in json_data and "data" in json_data:
790+
orient = "split"
791+
771792
if orient == "table" and dtype:
772793
raise ValueError("cannot pass both dtype and orient='table'")
773794
if orient == "table" and convert_axes:

pandas/tests/io/json/test_pandas.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2283,3 +2283,35 @@ def test_large_number():
22832283
)
22842284
expected = Series([9999999999999999])
22852285
tm.assert_series_equal(result, expected)
2286+
2287+
2288+
@pytest.mark.parametrize(
2289+
"json_data, should_fail",
2290+
[
2291+
(
2292+
json.dumps(
2293+
{
2294+
"schema": {"fields": [{"name": "A", "type": "integer"}]},
2295+
"data": [{"A": 1}, {"A": 2}, {"A": 3}],
2296+
}
2297+
),
2298+
False,
2299+
),
2300+
(json.dumps({"columns": ["A"], "data": [[1], [2], [3]]}), False),
2301+
],
2302+
)
2303+
def test_read_json_auto_infer(json_data, should_fail, tmp_path):
2304+
"""Test pd.read_json auto-infers 'table' and 'split' formats."""
2305+
2306+
# Use tmp_path to create a temporary file
2307+
temp_file = tmp_path / "test_read_json.json"
2308+
2309+
# Write the json_data to the temporary file
2310+
with open(temp_file, "w") as f:
2311+
f.write(json_data)
2312+
2313+
if should_fail:
2314+
with pytest.raises(ValueError, match=".*expected.*"):
2315+
read_json(temp_file)
2316+
else:
2317+
read_json(temp_file)

0 commit comments

Comments
 (0)