diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index be40710a9e307..1976c29670a97 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1926,7 +1926,6 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` * ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data. * ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True`` * ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``. -* ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns. * ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality. * ``date_unit`` : string, the timestamp unit to detect if converting dates. Default None. By default the timestamp precision will be detected, if this is not desired diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index b29ead1d14b1d..2a3d8d35324c4 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -406,8 +406,6 @@ def read_json( typ: Literal["frame"] = ..., dtype: DtypeArg | None = ..., convert_axes: bool | None = ..., - convert_dates: bool | list[str] = ..., - keep_default_dates: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., encoding: str | None = ..., @@ -430,8 +428,6 @@ def read_json( typ: Literal["series"], dtype: DtypeArg | None = ..., convert_axes: bool | None = ..., - convert_dates: bool | list[str] = ..., - keep_default_dates: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., encoding: str | None = ..., @@ -454,8 +450,6 @@ def read_json( typ: Literal["series"], dtype: DtypeArg | None = ..., convert_axes: bool | None = ..., - convert_dates: bool | list[str] = ..., - keep_default_dates: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., encoding: str | None = ..., @@ -478,8 +472,6 @@ def read_json( typ: Literal["frame"] = ..., dtype: DtypeArg | None = ..., convert_axes: bool | None = ..., - convert_dates: bool | list[str] = ..., - keep_default_dates: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., encoding: str | None = ..., @@ -505,8 +497,6 @@ def read_json( typ: Literal["frame", "series"] = "frame", dtype: DtypeArg | None = None, convert_axes: bool | None = None, - convert_dates: bool | list[str] = True, - keep_default_dates: bool = True, precise_float: bool = False, date_unit: str | None = None, encoding: str | None = None, @@ -588,29 +578,6 @@ def read_json( For all ``orient`` values except ``'table'``, default is True. - convert_dates : bool or list of str, default True - If True then default datelike columns may be converted (depending on - keep_default_dates). - If False, no dates will be converted. - If a list of column names, then those columns will be converted and - default datelike columns may also be converted (depending on - keep_default_dates). - - keep_default_dates : bool, default True - If parsing dates (convert_dates is not False), then try to parse the - default datelike columns. - A column label is datelike if - - * it ends with ``'_at'``, - - * it ends with ``'_time'``, - - * it begins with ``'timestamp'``, - - * it is ``'modified'``, or - - * it is ``'date'``. - precise_float : bool, default False Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (False) is to use fast but @@ -786,8 +753,6 @@ def read_json( typ=typ, dtype=dtype, convert_axes=convert_axes, - convert_dates=convert_dates, - keep_default_dates=keep_default_dates, precise_float=precise_float, date_unit=date_unit, encoding=encoding, @@ -823,8 +788,6 @@ def __init__( typ: FrameSeriesStrT, dtype, convert_axes: bool | None, - convert_dates, - keep_default_dates: bool, precise_float: bool, date_unit, encoding, @@ -841,8 +804,6 @@ def __init__( self.typ = typ self.dtype = dtype self.convert_axes = convert_axes - self.convert_dates = convert_dates - self.keep_default_dates = keep_default_dates self.precise_float = precise_float self.date_unit = date_unit self.encoding = encoding @@ -982,8 +943,6 @@ def _get_object_parser(self, json: str) -> DataFrame | Series: "orient": self.orient, "dtype": self.dtype, "convert_axes": self.convert_axes, - "convert_dates": self.convert_dates, - "keep_default_dates": self.keep_default_dates, "precise_float": self.precise_float, "date_unit": self.date_unit, "dtype_backend": self.dtype_backend, @@ -1080,8 +1039,6 @@ def __init__( orient, dtype: DtypeArg | None = None, convert_axes: bool = True, - convert_dates: bool | list[str] = True, - keep_default_dates: bool = False, precise_float: bool = False, date_unit=None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, @@ -1105,9 +1062,7 @@ def __init__( self.precise_float = precise_float self.convert_axes = convert_axes - self.convert_dates = convert_dates self.date_unit = date_unit - self.keep_default_dates = keep_default_dates self.dtype_backend = dtype_backend @final @@ -1144,7 +1099,6 @@ def _convert_axes(self, obj: DataFrame | Series) -> DataFrame | Series: name=axis_name, data=ser, use_dtypes=False, - convert_dates=True, is_axis=True, ) if result: @@ -1161,7 +1115,6 @@ def _try_convert_data( name: Hashable, data: Series, use_dtypes: bool = True, - convert_dates: bool | list[str] = True, is_axis: bool = False, ) -> tuple[Series, bool]: """ @@ -1179,10 +1132,7 @@ def _try_convert_data( elif self.dtype is True: pass - elif not _should_convert_dates( - convert_dates, self.keep_default_dates, name - ): - # convert_dates takes precedence over columns listed in dtypes + else: dtype = ( self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype ) @@ -1192,11 +1142,6 @@ def _try_convert_data( except (TypeError, ValueError): return data, False - if convert_dates: - new_data = self._try_convert_to_date(data) - if new_data is not data: - return new_data, True - converted = False if self.dtype_backend is not lib.no_default and not is_axis: # Fall through for conversion later on @@ -1302,7 +1247,7 @@ def _parse(self) -> Series: return Series(data) def _try_convert_types(self, obj: Series) -> Series: - obj, _ = self._try_convert_data("data", obj, convert_dates=self.convert_dates) + obj, _ = self._try_convert_data("data", obj) return obj @@ -1349,40 +1294,8 @@ def _try_convert_types(self, obj: DataFrame) -> DataFrame: result, _ = self._try_convert_data( col_label, series, - convert_dates=_should_convert_dates( - self.convert_dates, - keep_default_dates=self.keep_default_dates, - col=col_label, - ), ) arrays.append(result.array) return DataFrame._from_arrays( arrays, obj.columns, obj.index, verify_integrity=False ) - - -def _should_convert_dates( - convert_dates: bool | list[str], - keep_default_dates: bool, - col: Hashable, -) -> bool: - """ - Return bool whether a DataFrame column should be cast to datetime. - """ - if convert_dates is False: - # convert_dates=True means follow keep_default_dates - return False - elif not isinstance(convert_dates, bool) and col in set(convert_dates): - return True - elif not keep_default_dates: - return False - elif not isinstance(col, str): - return False - col_lower = col.lower() - if ( - col_lower.endswith(("_at", "_time")) - or col_lower in {"modified", "date", "datetime"} - or col_lower.startswith("timestamp") - ): - return True - return False diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3c551e80ef00b..aa3e9e55826cb 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -152,7 +152,7 @@ def test_frame_non_unique_columns(self, orient, data, request): with tm.assert_produces_warning(expected_warning, match=msg): result = read_json( - StringIO(df.to_json(orient=orient)), orient=orient, convert_dates=["x"] + StringIO(df.to_json(orient=orient)), orient=orient ) if orient == "values": expected = DataFrame(data) @@ -840,7 +840,7 @@ def test_convert_dates(self, datetime_series, datetime_frame): with tm.assert_produces_warning(FutureWarning, match=msg): json = StringIO(df.to_json(date_unit="ns")) - result = read_json(json, convert_dates=False) + result = read_json(json) expected = df.copy() expected["date"] = expected["date"].values.view("i8") expected["foo"] = expected["foo"].astype("int64") @@ -1056,7 +1056,7 @@ def test_iso_non_nano_datetimes(self, unit): # TODO: check_dtype/check_index_type should be removable # once read_json gets non-nano support tm.assert_frame_equal( - read_json(buf, convert_dates=["date", "date_obj"]), + read_json(buf), df, check_index_type=False, check_dtype=False, @@ -1125,7 +1125,7 @@ def test_round_trip_exception(self, datapath): def test_url(self, field, dtype, httpserver): data = '{"created_at": ["2023-06-23T18:21:36Z"], "closed_at": ["2023-06-23T18:21:36"], "updated_at": ["2023-06-23T18:21:36Z"]}\n' # noqa: E501 httpserver.serve_content(content=data) - result = read_json(httpserver.url, convert_dates=True) + result = read_json(httpserver.url) assert result[field].dtype == dtype def test_timedelta(self): diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 3c843479b446a..1f8f5faf5dd1e 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -224,8 +224,6 @@ def test_readjson_chunks_closes(chunksize): typ="frame", dtype=True, convert_axes=True, - convert_dates=True, - keep_default_dates=True, precise_float=False, date_unit=None, encoding=None, diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 434642ed7fc90..f76aec1cb528f 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -86,7 +86,7 @@ def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys, request): ) with tm.assert_produces_warning(FutureWarning, match=msg): df1.to_json(path) - df2 = read_json(path, convert_dates=["dt"]) + df2 = read_json(path) elif format == "parquet": pytest.importorskip("pyarrow") pa_fs = pytest.importorskip("pyarrow.fs")