Skip to content

DEPR: keep_default_dates and convert_dates in read_json #59349

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1926,7 +1926,6 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
* ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data.
* ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True``
* ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``.
* ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns.
* ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality.
* ``date_unit`` : string, the timestamp unit to detect if converting dates. Default
None. By default the timestamp precision will be detected, if this is not desired
Expand Down
91 changes: 2 additions & 89 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,6 @@ def read_json(
typ: Literal["frame"] = ...,
dtype: DtypeArg | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: str | None = ...,
encoding: str | None = ...,
Expand All @@ -430,8 +428,6 @@ def read_json(
typ: Literal["series"],
dtype: DtypeArg | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: str | None = ...,
encoding: str | None = ...,
Expand All @@ -454,8 +450,6 @@ def read_json(
typ: Literal["series"],
dtype: DtypeArg | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: str | None = ...,
encoding: str | None = ...,
Expand All @@ -478,8 +472,6 @@ def read_json(
typ: Literal["frame"] = ...,
dtype: DtypeArg | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: str | None = ...,
encoding: str | None = ...,
Expand All @@ -505,8 +497,6 @@ def read_json(
typ: Literal["frame", "series"] = "frame",
dtype: DtypeArg | None = None,
convert_axes: bool | None = None,
convert_dates: bool | list[str] = True,
keep_default_dates: bool = True,
precise_float: bool = False,
date_unit: str | None = None,
encoding: str | None = None,
Expand Down Expand Up @@ -588,29 +578,6 @@ def read_json(

For all ``orient`` values except ``'table'``, default is True.

convert_dates : bool or list of str, default True
If True then default datelike columns may be converted (depending on
keep_default_dates).
If False, no dates will be converted.
If a list of column names, then those columns will be converted and
default datelike columns may also be converted (depending on
keep_default_dates).

keep_default_dates : bool, default True
If parsing dates (convert_dates is not False), then try to parse the
default datelike columns.
A column label is datelike if

* it ends with ``'_at'``,

* it ends with ``'_time'``,

* it begins with ``'timestamp'``,

* it is ``'modified'``, or

* it is ``'date'``.

precise_float : bool, default False
Set to enable usage of higher precision (strtod) function when
decoding string to double values. Default (False) is to use fast but
Expand Down Expand Up @@ -786,8 +753,6 @@ def read_json(
typ=typ,
dtype=dtype,
convert_axes=convert_axes,
convert_dates=convert_dates,
keep_default_dates=keep_default_dates,
precise_float=precise_float,
date_unit=date_unit,
encoding=encoding,
Expand Down Expand Up @@ -823,8 +788,6 @@ def __init__(
typ: FrameSeriesStrT,
dtype,
convert_axes: bool | None,
convert_dates,
keep_default_dates: bool,
precise_float: bool,
date_unit,
encoding,
Expand All @@ -841,8 +804,6 @@ def __init__(
self.typ = typ
self.dtype = dtype
self.convert_axes = convert_axes
self.convert_dates = convert_dates
self.keep_default_dates = keep_default_dates
self.precise_float = precise_float
self.date_unit = date_unit
self.encoding = encoding
Expand Down Expand Up @@ -982,8 +943,6 @@ def _get_object_parser(self, json: str) -> DataFrame | Series:
"orient": self.orient,
"dtype": self.dtype,
"convert_axes": self.convert_axes,
"convert_dates": self.convert_dates,
"keep_default_dates": self.keep_default_dates,
"precise_float": self.precise_float,
"date_unit": self.date_unit,
"dtype_backend": self.dtype_backend,
Expand Down Expand Up @@ -1080,8 +1039,6 @@ def __init__(
orient,
dtype: DtypeArg | None = None,
convert_axes: bool = True,
convert_dates: bool | list[str] = True,
keep_default_dates: bool = False,
precise_float: bool = False,
date_unit=None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
Expand All @@ -1105,9 +1062,7 @@ def __init__(

self.precise_float = precise_float
self.convert_axes = convert_axes
self.convert_dates = convert_dates
self.date_unit = date_unit
self.keep_default_dates = keep_default_dates
self.dtype_backend = dtype_backend

@final
Expand Down Expand Up @@ -1144,7 +1099,6 @@ def _convert_axes(self, obj: DataFrame | Series) -> DataFrame | Series:
name=axis_name,
data=ser,
use_dtypes=False,
convert_dates=True,
is_axis=True,
)
if result:
Expand All @@ -1161,7 +1115,6 @@ def _try_convert_data(
name: Hashable,
data: Series,
use_dtypes: bool = True,
convert_dates: bool | list[str] = True,
is_axis: bool = False,
) -> tuple[Series, bool]:
"""
Expand All @@ -1179,10 +1132,7 @@ def _try_convert_data(

elif self.dtype is True:
pass
elif not _should_convert_dates(
convert_dates, self.keep_default_dates, name
):
# convert_dates takes precedence over columns listed in dtypes
else:
dtype = (
self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype
)
Expand All @@ -1192,11 +1142,6 @@ def _try_convert_data(
except (TypeError, ValueError):
return data, False

if convert_dates:
new_data = self._try_convert_to_date(data)
if new_data is not data:
return new_data, True

converted = False
if self.dtype_backend is not lib.no_default and not is_axis:
# Fall through for conversion later on
Expand Down Expand Up @@ -1302,7 +1247,7 @@ def _parse(self) -> Series:
return Series(data)

def _try_convert_types(self, obj: Series) -> Series:
obj, _ = self._try_convert_data("data", obj, convert_dates=self.convert_dates)
obj, _ = self._try_convert_data("data", obj)
return obj


Expand Down Expand Up @@ -1349,40 +1294,8 @@ def _try_convert_types(self, obj: DataFrame) -> DataFrame:
result, _ = self._try_convert_data(
col_label,
series,
convert_dates=_should_convert_dates(
self.convert_dates,
keep_default_dates=self.keep_default_dates,
col=col_label,
),
)
arrays.append(result.array)
return DataFrame._from_arrays(
arrays, obj.columns, obj.index, verify_integrity=False
)


def _should_convert_dates(
convert_dates: bool | list[str],
keep_default_dates: bool,
col: Hashable,
) -> bool:
"""
Return bool whether a DataFrame column should be cast to datetime.
"""
if convert_dates is False:
# convert_dates=True means follow keep_default_dates
return False
elif not isinstance(convert_dates, bool) and col in set(convert_dates):
return True
elif not keep_default_dates:
return False
elif not isinstance(col, str):
return False
col_lower = col.lower()
if (
col_lower.endswith(("_at", "_time"))
or col_lower in {"modified", "date", "datetime"}
or col_lower.startswith("timestamp")
):
return True
return False
8 changes: 4 additions & 4 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def test_frame_non_unique_columns(self, orient, data, request):

with tm.assert_produces_warning(expected_warning, match=msg):
result = read_json(
StringIO(df.to_json(orient=orient)), orient=orient, convert_dates=["x"]
StringIO(df.to_json(orient=orient)), orient=orient
)
if orient == "values":
expected = DataFrame(data)
Expand Down Expand Up @@ -840,7 +840,7 @@ def test_convert_dates(self, datetime_series, datetime_frame):
with tm.assert_produces_warning(FutureWarning, match=msg):
json = StringIO(df.to_json(date_unit="ns"))

result = read_json(json, convert_dates=False)
result = read_json(json)
expected = df.copy()
expected["date"] = expected["date"].values.view("i8")
expected["foo"] = expected["foo"].astype("int64")
Expand Down Expand Up @@ -1056,7 +1056,7 @@ def test_iso_non_nano_datetimes(self, unit):
# TODO: check_dtype/check_index_type should be removable
# once read_json gets non-nano support
tm.assert_frame_equal(
read_json(buf, convert_dates=["date", "date_obj"]),
read_json(buf),
df,
check_index_type=False,
check_dtype=False,
Expand Down Expand Up @@ -1125,7 +1125,7 @@ def test_round_trip_exception(self, datapath):
def test_url(self, field, dtype, httpserver):
data = '{"created_at": ["2023-06-23T18:21:36Z"], "closed_at": ["2023-06-23T18:21:36"], "updated_at": ["2023-06-23T18:21:36Z"]}\n' # noqa: E501
httpserver.serve_content(content=data)
result = read_json(httpserver.url, convert_dates=True)
result = read_json(httpserver.url)
assert result[field].dtype == dtype

def test_timedelta(self):
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/io/json/test_readlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,6 @@ def test_readjson_chunks_closes(chunksize):
typ="frame",
dtype=True,
convert_axes=True,
convert_dates=True,
keep_default_dates=True,
precise_float=False,
date_unit=None,
encoding=None,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys, request):
)
with tm.assert_produces_warning(FutureWarning, match=msg):
df1.to_json(path)
df2 = read_json(path, convert_dates=["dt"])
df2 = read_json(path)
elif format == "parquet":
pytest.importorskip("pyarrow")
pa_fs = pytest.importorskip("pyarrow.fs")
Expand Down
Loading