Skip to content

Commit 8dbdf34

Browse files
committed
Clean up do_date_conversions
1 parent 3541537 commit 8dbdf34

File tree

2 files changed

+52
-82
lines changed

2 files changed

+52
-82
lines changed

pandas/io/parsers/base_parser.py

Lines changed: 47 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
from pandas.core.dtypes.missing import isna
4747

4848
from pandas import (
49-
ArrowDtype,
5049
DataFrame,
5150
DatetimeIndex,
5251
StringDtype,
@@ -142,12 +141,6 @@ def __init__(self, kwds) -> None:
142141
self.false_values = kwds.get("false_values")
143142
self.cache_dates = kwds.pop("cache_dates", True)
144143

145-
self._date_conv = _make_date_converter(
146-
date_format=self.date_format,
147-
dayfirst=self.dayfirst,
148-
cache_dates=self.cache_dates,
149-
)
150-
151144
# validate header options for mi
152145
self.header = kwds.get("header")
153146
if is_list_like(self.header, allow_sets=False):
@@ -355,9 +348,12 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
355348

356349
for i, arr in enumerate(index):
357350
if try_parse_dates and self._should_parse_dates(i):
358-
arr = self._date_conv(
351+
arr = date_converter(
359352
arr,
360353
col=self.index_names[i] if self.index_names is not None else None,
354+
dayfirst=self.dayfirst,
355+
cache_dates=self.cache_dates,
356+
date_format=self.date_format,
361357
)
362358

363359
if self.na_filter:
@@ -667,16 +663,25 @@ def _do_date_conversions(
667663
names: Sequence[Hashable] | Index,
668664
data: Mapping[Hashable, ArrayLike] | DataFrame,
669665
) -> Mapping[Hashable, ArrayLike] | DataFrame:
670-
if isinstance(self.parse_dates, list):
671-
return _process_date_conversion(
672-
data,
673-
self._date_conv,
674-
self.parse_dates,
675-
self.index_col,
676-
self.index_names,
677-
names,
678-
dtype_backend=self.dtype_backend,
666+
if not isinstance(self.parse_dates, list):
667+
return data
668+
for colspec in self.parse_dates:
669+
if isinstance(colspec, int) and colspec not in data:
670+
colspec = names[colspec]
671+
if (isinstance(self.index_col, list) and colspec in self.index_col) or (
672+
isinstance(self.index_names, list) and colspec in self.index_names
673+
):
674+
continue
675+
result = date_converter(
676+
data[colspec],
677+
col=colspec,
678+
dayfirst=self.dayfirst,
679+
cache_dates=self.cache_dates,
680+
date_format=self.date_format,
679681
)
682+
# error: Unsupported target for indexed assignment
683+
# ("Mapping[Hashable, ExtensionArray | ndarray[Any, Any]] | DataFrame")
684+
data[colspec] = result # type: ignore[index]
680685

681686
return data
682687

@@ -910,40 +915,37 @@ def _get_empty_meta(
910915
return index, columns, col_dict
911916

912917

913-
def _make_date_converter(
918+
def date_converter(
919+
date_col,
920+
col: Hashable,
914921
dayfirst: bool = False,
915922
cache_dates: bool = True,
916923
date_format: dict[Hashable, str] | str | None = None,
917924
):
918-
def converter(date_col, col: Hashable):
919-
if date_col.dtype.kind in "Mm":
920-
return date_col
921-
922-
date_fmt = (
923-
date_format.get(col) if isinstance(date_format, dict) else date_format
925+
if date_col.dtype.kind in "Mm":
926+
return date_col
927+
928+
date_fmt = date_format.get(col) if isinstance(date_format, dict) else date_format
929+
930+
str_objs = lib.ensure_string_array(np.asarray(date_col))
931+
try:
932+
result = tools.to_datetime(
933+
str_objs,
934+
format=date_fmt,
935+
utc=False,
936+
dayfirst=dayfirst,
937+
cache=cache_dates,
924938
)
939+
except (ValueError, TypeError):
940+
# test_usecols_with_parse_dates4
941+
# test_multi_index_parse_dates
942+
return str_objs
925943

926-
str_objs = lib.ensure_string_array(date_col)
927-
try:
928-
result = tools.to_datetime(
929-
str_objs,
930-
format=date_fmt,
931-
utc=False,
932-
dayfirst=dayfirst,
933-
cache=cache_dates,
934-
)
935-
except (ValueError, TypeError):
936-
# test_usecols_with_parse_dates4
937-
# test_multi_index_parse_dates
938-
return str_objs
939-
940-
if isinstance(result, DatetimeIndex):
941-
arr = result.to_numpy()
942-
arr.flags.writeable = True
943-
return arr
944-
return result._values
945-
946-
return converter
944+
if isinstance(result, DatetimeIndex):
945+
arr = result.to_numpy()
946+
arr.flags.writeable = True
947+
return arr
948+
return result._values
947949

948950

949951
parser_defaults = {
@@ -986,42 +988,6 @@ def converter(date_col, col: Hashable):
986988
}
987989

988990

989-
def _process_date_conversion(
990-
data_dict: Mapping[Hashable, ArrayLike] | DataFrame,
991-
converter: Callable,
992-
parse_spec: list,
993-
index_col,
994-
index_names,
995-
columns: Sequence[Hashable] | Index,
996-
dtype_backend=lib.no_default,
997-
) -> Mapping[Hashable, ArrayLike] | DataFrame:
998-
for colspec in parse_spec:
999-
if isinstance(colspec, int) and colspec not in data_dict:
1000-
colspec = columns[colspec]
1001-
if (isinstance(index_col, list) and colspec in index_col) or (
1002-
isinstance(index_names, list) and colspec in index_names
1003-
):
1004-
continue
1005-
elif dtype_backend == "pyarrow":
1006-
import pyarrow as pa
1007-
1008-
dtype = data_dict[colspec].dtype
1009-
if isinstance(dtype, ArrowDtype) and (
1010-
pa.types.is_timestamp(dtype.pyarrow_dtype)
1011-
or pa.types.is_date(dtype.pyarrow_dtype)
1012-
):
1013-
continue
1014-
1015-
# Pyarrow engine returns Series which we need to convert to
1016-
# numpy array before converter, its a no-op for other parsers
1017-
result = converter(np.asarray(data_dict[colspec]), col=colspec)
1018-
# error: Unsupported target for indexed assignment
1019-
# ("Mapping[Hashable, ExtensionArray | ndarray[Any, Any]] | DataFrame")
1020-
data_dict[colspec] = result # type: ignore[index]
1021-
1022-
return data_dict
1023-
1024-
1025991
def get_na_values(col, na_values, na_fvalues, keep_default_na: bool):
1026992
"""
1027993
Get the NaN values for a given column.

pandas/io/parsers/c_parser_wrapper.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from pandas.io.parsers.base_parser import (
3131
ParserBase,
3232
ParserError,
33+
date_converter,
3334
is_index_col,
3435
validate_parse_dates_presence,
3536
)
@@ -345,9 +346,12 @@ def _filter_usecols(self, names: SequenceT) -> SequenceT | list[Hashable]:
345346

346347
def _maybe_parse_dates(self, values, index: int, try_parse_dates: bool = True):
347348
if try_parse_dates and self._should_parse_dates(index):
348-
values = self._date_conv(
349+
values = date_converter(
349350
values,
350351
col=self.index_names[index] if self.index_names is not None else None,
352+
dayfirst=self.dayfirst,
353+
cache_dates=self.cache_dates,
354+
date_format=self.date_format,
351355
)
352356
return values
353357

0 commit comments

Comments
 (0)