From 7d772d5113841cd40954f49178f16e3ae1ac9979 Mon Sep 17 00:00:00 2001 From: Sergey Volkov Date: Tue, 11 Jun 2024 13:54:57 +0200 Subject: [PATCH 1/2] BUG: make JSONTableWriter fail if no index.name and 'index' in columns This commit is itended to fix GH #58925. If index.name is empty it will use set_default_names inside __init__ to make check on overlapping names fail. Otherwise it's done during schema creation and not reflected on the dataframe itself which creates inconsistency between the data and its schema. add mention of the raised error to the `to_json` documentation move new logic description from IO docs to to_json docstring --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/generic.py | 3 ++- pandas/io/json/_json.py | 3 +++ pandas/tests/io/json/test_pandas.py | 7 +++++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0acb82ffeca3e..72cb6842a1205 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -559,6 +559,7 @@ MultiIndex I/O ^^^ - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`) +- Bug in :meth:`.DataFrame.to_json` was producing corrupted record (data incompatible with schema) if 'index' was the name of a column and index.name was empty (which is replaced with generic 'index' internally), now it will fail on check if index.name is in columns (:issue:`58925`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5d9e04bd50979..2a0495dff6681 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2387,7 +2387,8 @@ def to_json( index : bool or None, default None The index is only used when 'orient' is 'split', 'index', 'column', or 'table'. Of these, 'index' and 'column' do not support - `index=False`. + `index=False`. The string 'index' as a column name with empty :class:`Index` + or if it is 'index' will raise a ``ValueError``. indent : int, optional Length of whitespace used to indent each record. diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 24fcb78a41e9d..b29ead1d14b1d 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -59,6 +59,7 @@ from pandas.io.json._table_schema import ( build_table_schema, parse_table_schema, + set_default_names, ) from pandas.io.parsers.readers import validate_integer @@ -353,6 +354,8 @@ def __init__( raise ValueError(msg) self.schema = build_table_schema(obj, index=self.index) + if self.index: + obj = set_default_names(obj) # NotImplemented on a column MultiIndex if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index e00c193fd471a..a34c0adc69821 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1610,6 +1610,13 @@ def test_to_json_from_json_columns_dtypes(self, orient): ) tm.assert_frame_equal(result, expected) + def test_to_json_with_index_as_a_column_name(self): + df = DataFrame(data={"index": [1, 2], "a": [2, 3]}) + with pytest.raises( + ValueError, match="Overlapping names between the index and columns" + ): + df.to_json(orient="table") + @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}]) def test_read_json_table_dtype_raises(self, dtype): # GH21345 From 93d6619101886326cd959df9bf59185f82c68a11 Mon Sep 17 00:00:00 2001 From: taranarmo Date: Tue, 9 Jul 2024 00:42:37 +0200 Subject: [PATCH 2/2] Accept the suggestion by mroeschke Rephrase the what's new addition Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 72cb6842a1205..2025474fecb0b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -559,7 +559,7 @@ MultiIndex I/O ^^^ - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`) -- Bug in :meth:`.DataFrame.to_json` was producing corrupted record (data incompatible with schema) if 'index' was the name of a column and index.name was empty (which is replaced with generic 'index' internally), now it will fail on check if index.name is in columns (:issue:`58925`) +- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)