Skip to content

Commit 7059407

Browse files
committed
Clean up dataframe column casting
1 parent f765bfe commit 7059407

File tree

1 file changed

+45
-67
lines changed

1 file changed

+45
-67
lines changed

pandas/io/json/_json.py

Lines changed: 45 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,8 +1176,10 @@ def _try_convert_data(
11761176

11771177
elif self.dtype is True:
11781178
pass
1179-
else:
1180-
# dtype to force
1179+
elif not _should_convert_dates(
1180+
convert_dates, self.keep_default_dates, name
1181+
):
1182+
# convert_dates takes precedence over columns listed in dtypes
11811183
dtype = (
11821184
self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype
11831185
)
@@ -1244,7 +1246,7 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
12441246
Try to parse a ndarray like into a date column.
12451247
12461248
Try to coerce object in epoch/iso formats and integer/float in epoch
1247-
formats. Return a boolean if parsing was successful.
1249+
formats.
12481250
"""
12491251
# no conversion on empty
12501252
if not len(data):
@@ -1339,70 +1341,46 @@ def _parse(self) -> DataFrame:
13391341
ujson_loads(json, precise_float=self.precise_float), dtype=None
13401342
)
13411343

1342-
@final
1343-
def _process_converter(
1344-
self,
1345-
obj: DataFrame,
1346-
f: Callable[[Hashable, Series], tuple[Series, bool]],
1347-
filt: Callable[[Hashable], bool] | None = None,
1348-
) -> DataFrame:
1349-
"""
1350-
Take a conversion function and possibly recreate the frame.
1351-
"""
1352-
if filt is None:
1353-
filt = lambda col: True
1354-
1355-
needs_new_obj = False
1356-
new_obj = {}
1357-
for i, (col, c) in enumerate(obj.items()):
1358-
if filt(col):
1359-
new_data, result = f(col, c)
1360-
if result:
1361-
c = new_data
1362-
needs_new_obj = True
1363-
new_obj[i] = c
1364-
1365-
if needs_new_obj:
1366-
# possibly handle dup columns
1367-
new_frame = DataFrame(new_obj, index=obj.index)
1368-
new_frame.columns = obj.columns
1369-
return new_frame
1370-
return obj
1371-
13721344
def _try_convert_types(self, obj: DataFrame) -> DataFrame:
1373-
if self.convert_dates:
1374-
# our columns to parse
1375-
convert_dates_list_bool = self.convert_dates
1376-
if isinstance(convert_dates_list_bool, bool):
1377-
convert_dates_list_bool = []
1378-
convert_dates = set(convert_dates_list_bool)
1379-
1380-
def is_ok(col) -> bool:
1381-
"""
1382-
Return if this col is ok to try for a date parse.
1383-
"""
1384-
if col in convert_dates:
1385-
return True
1386-
if not self.keep_default_dates:
1387-
return False
1388-
if not isinstance(col, str):
1389-
return False
1390-
1391-
col_lower = col.lower()
1392-
if (
1393-
col_lower.endswith(("_at", "_time"))
1394-
or col_lower == "modified"
1395-
or col_lower == "date"
1396-
or col_lower == "datetime"
1397-
or col_lower.startswith("timestamp")
1398-
):
1399-
return True
1400-
return False
1401-
1402-
obj = self._process_converter(
1403-
obj, lambda col, c: self._try_convert_to_date(c), filt=is_ok
1345+
arrays = []
1346+
for col_label, series in obj.items():
1347+
result, _ = self._try_convert_data(
1348+
col_label,
1349+
series,
1350+
convert_dates=_should_convert_dates(
1351+
self.convert_dates,
1352+
keep_default_dates=self.keep_default_dates,
1353+
col=col_label,
1354+
),
14041355
)
1405-
1406-
return self._process_converter(
1407-
obj, lambda col, c: self._try_convert_data(col, c, convert_dates=False)
1356+
arrays.append(result.array)
1357+
return DataFrame._from_arrays(
1358+
arrays, obj.columns, obj.index, verify_integrity=False
14081359
)
1360+
1361+
1362+
def _should_convert_dates(
1363+
convert_dates: bool | list[str],
1364+
keep_default_dates: bool,
1365+
col: Hashable,
1366+
) -> bool:
1367+
"""
1368+
Return bool whether a DataFrame column should be cast to datetime.
1369+
"""
1370+
if convert_dates is False:
1371+
# convert_dates=True means follow keep_default_dates
1372+
return False
1373+
elif not isinstance(convert_dates, bool) and col in set(convert_dates):
1374+
return True
1375+
elif not keep_default_dates:
1376+
return False
1377+
elif not isinstance(col, str):
1378+
return False
1379+
col_lower = col.lower()
1380+
if (
1381+
col_lower.endswith(("_at", "_time"))
1382+
or col_lower in {"modified", "date", "datetime"}
1383+
or col_lower.startswith("timestamp")
1384+
):
1385+
return True
1386+
return False

0 commit comments

Comments
 (0)