deprecated convert_dates and keep_default_dates

phershbe · phershbe · commit 695fa6ff94c7 · 2024-07-29T20:00:12.000-04:00
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -1926,7 +1926,6 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
 * ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data.
 * ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True``
 * ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``.
-* ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns.
 * ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality.
 * ``date_unit`` : string, the timestamp unit to detect if converting dates. Default
   None. By default the timestamp precision will be detected, if this is not desired
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -406,8 +406,6 @@ def read_json(
     typ: Literal["frame"] = ...,
     dtype: DtypeArg | None = ...,
     convert_axes: bool | None = ...,
-    convert_dates: bool | list[str] = ...,
-    keep_default_dates: bool = ...,
     precise_float: bool = ...,
     date_unit: str | None = ...,
     encoding: str | None = ...,
@@ -430,8 +428,6 @@ def read_json(
     typ: Literal["series"],
     dtype: DtypeArg | None = ...,
     convert_axes: bool | None = ...,
-    convert_dates: bool | list[str] = ...,
-    keep_default_dates: bool = ...,
     precise_float: bool = ...,
     date_unit: str | None = ...,
     encoding: str | None = ...,
@@ -454,8 +450,6 @@ def read_json(
     typ: Literal["series"],
     dtype: DtypeArg | None = ...,
     convert_axes: bool | None = ...,
-    convert_dates: bool | list[str] = ...,
-    keep_default_dates: bool = ...,
     precise_float: bool = ...,
     date_unit: str | None = ...,
     encoding: str | None = ...,
@@ -478,8 +472,6 @@ def read_json(
     typ: Literal["frame"] = ...,
     dtype: DtypeArg | None = ...,
     convert_axes: bool | None = ...,
-    convert_dates: bool | list[str] = ...,
-    keep_default_dates: bool = ...,
     precise_float: bool = ...,
     date_unit: str | None = ...,
     encoding: str | None = ...,
@@ -505,8 +497,6 @@ def read_json(
     typ: Literal["frame", "series"] = "frame",
     dtype: DtypeArg | None = None,
     convert_axes: bool | None = None,
-    convert_dates: bool | list[str] = True,
-    keep_default_dates: bool = True,
     precise_float: bool = False,
     date_unit: str | None = None,
     encoding: str | None = None,
@@ -588,29 +578,6 @@ def read_json(
 
         For all ``orient`` values except ``'table'``, default is True.
 
-    convert_dates : bool or list of str, default True
-        If True then default datelike columns may be converted (depending on
-        keep_default_dates).
-        If False, no dates will be converted.
-        If a list of column names, then those columns will be converted and
-        default datelike columns may also be converted (depending on
-        keep_default_dates).
-
-    keep_default_dates : bool, default True
-        If parsing dates (convert_dates is not False), then try to parse the
-        default datelike columns.
-        A column label is datelike if
-
-        * it ends with ``'_at'``,
-
-        * it ends with ``'_time'``,
-
-        * it begins with ``'timestamp'``,
-
-        * it is ``'modified'``, or
-
-        * it is ``'date'``.
-
     precise_float : bool, default False
         Set to enable usage of higher precision (strtod) function when
         decoding string to double values. Default (False) is to use fast but
@@ -786,8 +753,6 @@ def read_json(
         typ=typ,
         dtype=dtype,
         convert_axes=convert_axes,
-        convert_dates=convert_dates,
-        keep_default_dates=keep_default_dates,
         precise_float=precise_float,
         date_unit=date_unit,
         encoding=encoding,
@@ -823,8 +788,6 @@ def __init__(
         typ: FrameSeriesStrT,
         dtype,
         convert_axes: bool | None,
-        convert_dates,
-        keep_default_dates: bool,
         precise_float: bool,
         date_unit,
         encoding,
@@ -841,8 +804,6 @@ def __init__(
         self.typ = typ
         self.dtype = dtype
         self.convert_axes = convert_axes
-        self.convert_dates = convert_dates
-        self.keep_default_dates = keep_default_dates
         self.precise_float = precise_float
         self.date_unit = date_unit
         self.encoding = encoding
@@ -982,8 +943,6 @@ def _get_object_parser(self, json: str) -> DataFrame | Series:
             "orient": self.orient,
             "dtype": self.dtype,
             "convert_axes": self.convert_axes,
-            "convert_dates": self.convert_dates,
-            "keep_default_dates": self.keep_default_dates,
             "precise_float": self.precise_float,
             "date_unit": self.date_unit,
             "dtype_backend": self.dtype_backend,
@@ -1080,8 +1039,6 @@ def __init__(
         orient,
         dtype: DtypeArg | None = None,
         convert_axes: bool = True,
-        convert_dates: bool | list[str] = True,
-        keep_default_dates: bool = False,
         precise_float: bool = False,
         date_unit=None,
         dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
@@ -1105,9 +1062,7 @@ def __init__(
 
         self.precise_float = precise_float
         self.convert_axes = convert_axes
-        self.convert_dates = convert_dates
         self.date_unit = date_unit
-        self.keep_default_dates = keep_default_dates
         self.dtype_backend = dtype_backend
 
     @final
@@ -1144,7 +1099,6 @@ def _convert_axes(self, obj: DataFrame | Series) -> DataFrame | Series:
                 name=axis_name,
                 data=ser,
                 use_dtypes=False,
-                convert_dates=True,
                 is_axis=True,
             )
             if result:
@@ -1161,7 +1115,6 @@ def _try_convert_data(
         name: Hashable,
         data: Series,
         use_dtypes: bool = True,
-        convert_dates: bool | list[str] = True,
         is_axis: bool = False,
     ) -> tuple[Series, bool]:
         """
@@ -1179,10 +1132,7 @@ def _try_convert_data(
 
             elif self.dtype is True:
                 pass
-            elif not _should_convert_dates(
-                convert_dates, self.keep_default_dates, name
-            ):
-                # convert_dates takes precedence over columns listed in dtypes
+            else:
                 dtype = (
                     self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype
                 )
@@ -1192,11 +1142,6 @@ def _try_convert_data(
                     except (TypeError, ValueError):
                         return data, False
 
-        if convert_dates:
-            new_data = self._try_convert_to_date(data)
-            if new_data is not data:
-                return new_data, True
-
         converted = False
         if self.dtype_backend is not lib.no_default and not is_axis:
             # Fall through for conversion later on
@@ -1302,7 +1247,7 @@ def _parse(self) -> Series:
             return Series(data)
 
     def _try_convert_types(self, obj: Series) -> Series:
-        obj, _ = self._try_convert_data("data", obj, convert_dates=self.convert_dates)
+        obj, _ = self._try_convert_data("data", obj)
         return obj
 
 
@@ -1349,40 +1294,8 @@ def _try_convert_types(self, obj: DataFrame) -> DataFrame:
             result, _ = self._try_convert_data(
                 col_label,
                 series,
-                convert_dates=_should_convert_dates(
-                    self.convert_dates,
-                    keep_default_dates=self.keep_default_dates,
-                    col=col_label,
-                ),
             )
             arrays.append(result.array)
         return DataFrame._from_arrays(
             arrays, obj.columns, obj.index, verify_integrity=False
         )
-
-
-def _should_convert_dates(
-    convert_dates: bool | list[str],
-    keep_default_dates: bool,
-    col: Hashable,
-) -> bool:
-    """
-    Return bool whether a DataFrame column should be cast to datetime.
-    """
-    if convert_dates is False:
-        # convert_dates=True means follow keep_default_dates
-        return False
-    elif not isinstance(convert_dates, bool) and col in set(convert_dates):
-        return True
-    elif not keep_default_dates:
-        return False
-    elif not isinstance(col, str):
-        return False
-    col_lower = col.lower()
-    if (
-        col_lower.endswith(("_at", "_time"))
-        or col_lower in {"modified", "date", "datetime"}
-        or col_lower.startswith("timestamp")
-    ):
-        return True
-    return False
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -152,7 +152,7 @@ def test_frame_non_unique_columns(self, orient, data, request):
 
         with tm.assert_produces_warning(expected_warning, match=msg):
             result = read_json(
-                StringIO(df.to_json(orient=orient)), orient=orient, convert_dates=["x"]
+                StringIO(df.to_json(orient=orient)), orient=orient
             )
         if orient == "values":
             expected = DataFrame(data)
@@ -840,7 +840,7 @@ def test_convert_dates(self, datetime_series, datetime_frame):
         with tm.assert_produces_warning(FutureWarning, match=msg):
             json = StringIO(df.to_json(date_unit="ns"))
 
-        result = read_json(json, convert_dates=False)
+        result = read_json(json)
         expected = df.copy()
         expected["date"] = expected["date"].values.view("i8")
         expected["foo"] = expected["foo"].astype("int64")
@@ -1056,7 +1056,7 @@ def test_iso_non_nano_datetimes(self, unit):
         # TODO: check_dtype/check_index_type should be removable
         # once read_json gets non-nano support
         tm.assert_frame_equal(
-            read_json(buf, convert_dates=["date", "date_obj"]),
+            read_json(buf),
             df,
             check_index_type=False,
             check_dtype=False,
@@ -1125,7 +1125,7 @@ def test_round_trip_exception(self, datapath):
     def test_url(self, field, dtype, httpserver):
         data = '{"created_at": ["2023-06-23T18:21:36Z"], "closed_at": ["2023-06-23T18:21:36"], "updated_at": ["2023-06-23T18:21:36Z"]}\n'  # noqa: E501
         httpserver.serve_content(content=data)
-        result = read_json(httpserver.url, convert_dates=True)
+        result = read_json(httpserver.url)
         assert result[field].dtype == dtype
 
     def test_timedelta(self):
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
@@ -224,8 +224,6 @@ def test_readjson_chunks_closes(chunksize):
             typ="frame",
             dtype=True,
             convert_axes=True,
-            convert_dates=True,
-            keep_default_dates=True,
             precise_float=False,
             date_unit=None,
             encoding=None,
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
@@ -86,7 +86,7 @@ def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys, request):
         )
         with tm.assert_produces_warning(FutureWarning, match=msg):
             df1.to_json(path)
-        df2 = read_json(path, convert_dates=["dt"])
+        df2 = read_json(path)
     elif format == "parquet":
         pytest.importorskip("pyarrow")
         pa_fs = pytest.importorskip("pyarrow.fs")