diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a7f63d75a047e..ca78e628a2d7b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -701,6 +701,7 @@ I/O - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) - Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) +- Bug in :meth:`DataFrame.to_csv` where ``quoting=csv.QUOTE_NONNUMERIC`` adds extra decimal places when ``dtype=float32``, ``dtype=float16`` and ``float_format=None`` in the csv output (:issue:`60699`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) - Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e2f9c5e9868a9..384f24cf2b906 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7755,13 +7755,15 @@ def get_values_for_csv( if float_format is None and decimal == ".": mask = isna(values) - if not quoting: - values = values.astype(str) - else: - values = np.array(values, dtype="object") + # GH60699 + # Ensure quoting don't add extra decimal places in output + # for float16, float32 + if values.dtype in [np.float16, np.float32]: + values = np.array(values, dtype="str") + values = values.astype(float, copy=False) - values[mask] = na_rep values = values.astype(object, copy=False) + values[mask] = na_rep return values from pandas.io.formats.format import FloatArrayFormatter diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 9eafc69013ffe..b3881f97beeeb 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -1316,6 +1316,71 @@ def test_to_csv_quoting(self): expected = tm.convert_rows_list_to_csv_str(expected_rows) assert df.to_csv(quoting=csv.QUOTE_ALL) == expected + @pytest.mark.parametrize( + "data, dtype, expected_rows", + [ + # Test Case 1: float16 precision + ( + {"col": [8.57, 0.156, -0.312, 123.3, -54.5, np.nan]}, + "float16", + [ + '"","col"', + "0,8.57", + "1,0.156", + "2,-0.312", + "3,123.3", + "4,-54.5", + '5,""', + ], + ), + # Test Case 2: float32 precision + ( + {"col": [8.57, 1.234567, -2.345678, 1e6, -1.5e6, np.nan]}, + "float32", + [ + '"","col"', + "0,8.57", + "1,1.234567", + "2,-2.345678", + "3,1000000.0", + "4,-1500000.0", + '5,""', + ], + ), + # Test Case 3: float64 precision + ( + { + "col": [ + 8.57, + 3.141592653589793, + -2.718281828459045, + 1.01e12, + -5.67e11, + np.nan, + ] + }, + "float64", + [ + '"","col"', + "0,8.57", + "1,3.141592653589793", + "2,-2.718281828459045", + "3,1010000000000.0", + "4,-567000000000.0", + '5,""', + ], + ), + ], + ) + def test_to_csv_decimal_and_nonnumeric_quoting(self, data, dtype, expected_rows): + # https://github.com/pandas-dev/pandas/issues/60699 + # combination of float dtype, no special formatting and + # quoting is specified (quoting=csv.QUOTE_NONNUMERIC) + df = DataFrame(data, dtype=dtype) + result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + def test_period_index_date_overflow(self): # see gh-15982