diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4d9a45abe17cd..da09d18c49ce4 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -70,6 +70,7 @@ Other enhancements - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) - :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) +- :meth:`pandas.read_json` now automatically infers the ``orient`` parameter if it is not explicitly specified. This allows the correct format to be detected based on the input JSON structure. This only works if json schema matches for split or table. (:issue:`52713`). - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`) - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index e032e26d771d7..7d26bf3988429 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -6,6 +6,8 @@ ) from collections import abc from itertools import islice +import json +import os from typing import ( TYPE_CHECKING, Any, @@ -559,6 +561,12 @@ def read_json( - ``'values'`` : just the values array - ``'table'`` : dict like ``{{'schema': {{schema}}, 'data': {{data}}}}`` + **Automatic Orient Inference for split or table**: + If the `orient` parameter is not specified, + this function will automatically infer the correct JSON format. + This works only if the schema matches for a table or split. + If the json was created using to_json with orient=split or orient=table + The allowed and default values depend on the value of the `typ` parameter. @@ -768,6 +776,19 @@ def read_json( 0 0 1 2.5 True a 1577.2 1 1 4.5 False b 1577.1 """ + if orient is None: + if isinstance(path_or_buf, (str, bytes, os.PathLike)): + with open(path_or_buf, encoding="utf-8") as f: + json_data = json.load(f) + else: + json_data = json.load(path_or_buf) + + if isinstance(json_data, dict): + if "schema" in json_data and "data" in json_data: + orient = "table" + elif "columns" in json_data and "data" in json_data: + orient = "split" + if orient == "table" and dtype: raise ValueError("cannot pass both dtype and orient='table'") if orient == "table" and convert_axes: diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 144b36166261b..68b31ee8487bb 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2283,3 +2283,35 @@ def test_large_number(): ) expected = Series([9999999999999999]) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "json_data, should_fail", + [ + ( + json.dumps( + { + "schema": {"fields": [{"name": "A", "type": "integer"}]}, + "data": [{"A": 1}, {"A": 2}, {"A": 3}], + } + ), + False, + ), + (json.dumps({"columns": ["A"], "data": [[1], [2], [3]]}), False), + ], +) +def test_read_json_auto_infer_orient_table_split(json_data, should_fail, tmp_path): + """Test pd.read_json auto-infers 'table' and 'split' formats.""" + + # Use tmp_path to create a temporary file + temp_file = tmp_path / "test_read_json.json" + + # Write the json_data to the temporary file + with open(temp_file, "w") as f: + f.write(json_data) + + if should_fail: + with pytest.raises(ValueError, match=".*expected.*"): + read_json(temp_file) + else: + read_json(temp_file)