|
46 | 46 | from pandas.core.dtypes.missing import isna
|
47 | 47 |
|
48 | 48 | from pandas import (
|
49 |
| - ArrowDtype, |
50 | 49 | DataFrame,
|
51 | 50 | DatetimeIndex,
|
52 | 51 | StringDtype,
|
@@ -142,12 +141,6 @@ def __init__(self, kwds) -> None:
|
142 | 141 | self.false_values = kwds.get("false_values")
|
143 | 142 | self.cache_dates = kwds.pop("cache_dates", True)
|
144 | 143 |
|
145 |
| - self._date_conv = _make_date_converter( |
146 |
| - date_format=self.date_format, |
147 |
| - dayfirst=self.dayfirst, |
148 |
| - cache_dates=self.cache_dates, |
149 |
| - ) |
150 |
| - |
151 | 144 | # validate header options for mi
|
152 | 145 | self.header = kwds.get("header")
|
153 | 146 | if is_list_like(self.header, allow_sets=False):
|
@@ -355,9 +348,12 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
|
355 | 348 |
|
356 | 349 | for i, arr in enumerate(index):
|
357 | 350 | if try_parse_dates and self._should_parse_dates(i):
|
358 |
| - arr = self._date_conv( |
| 351 | + arr = date_converter( |
359 | 352 | arr,
|
360 | 353 | col=self.index_names[i] if self.index_names is not None else None,
|
| 354 | + dayfirst=self.dayfirst, |
| 355 | + cache_dates=self.cache_dates, |
| 356 | + date_format=self.date_format, |
361 | 357 | )
|
362 | 358 |
|
363 | 359 | if self.na_filter:
|
@@ -667,16 +663,25 @@ def _do_date_conversions(
|
667 | 663 | names: Sequence[Hashable] | Index,
|
668 | 664 | data: Mapping[Hashable, ArrayLike] | DataFrame,
|
669 | 665 | ) -> Mapping[Hashable, ArrayLike] | DataFrame:
|
670 |
| - if isinstance(self.parse_dates, list): |
671 |
| - return _process_date_conversion( |
672 |
| - data, |
673 |
| - self._date_conv, |
674 |
| - self.parse_dates, |
675 |
| - self.index_col, |
676 |
| - self.index_names, |
677 |
| - names, |
678 |
| - dtype_backend=self.dtype_backend, |
| 666 | + if not isinstance(self.parse_dates, list): |
| 667 | + return data |
| 668 | + for colspec in self.parse_dates: |
| 669 | + if isinstance(colspec, int) and colspec not in data: |
| 670 | + colspec = names[colspec] |
| 671 | + if (isinstance(self.index_col, list) and colspec in self.index_col) or ( |
| 672 | + isinstance(self.index_names, list) and colspec in self.index_names |
| 673 | + ): |
| 674 | + continue |
| 675 | + result = date_converter( |
| 676 | + data[colspec], |
| 677 | + col=colspec, |
| 678 | + dayfirst=self.dayfirst, |
| 679 | + cache_dates=self.cache_dates, |
| 680 | + date_format=self.date_format, |
679 | 681 | )
|
| 682 | + # error: Unsupported target for indexed assignment |
| 683 | + # ("Mapping[Hashable, ExtensionArray | ndarray[Any, Any]] | DataFrame") |
| 684 | + data[colspec] = result # type: ignore[index] |
680 | 685 |
|
681 | 686 | return data
|
682 | 687 |
|
@@ -910,40 +915,37 @@ def _get_empty_meta(
|
910 | 915 | return index, columns, col_dict
|
911 | 916 |
|
912 | 917 |
|
913 |
| -def _make_date_converter( |
| 918 | +def date_converter( |
| 919 | + date_col, |
| 920 | + col: Hashable, |
914 | 921 | dayfirst: bool = False,
|
915 | 922 | cache_dates: bool = True,
|
916 | 923 | date_format: dict[Hashable, str] | str | None = None,
|
917 | 924 | ):
|
918 |
| - def converter(date_col, col: Hashable): |
919 |
| - if date_col.dtype.kind in "Mm": |
920 |
| - return date_col |
921 |
| - |
922 |
| - date_fmt = ( |
923 |
| - date_format.get(col) if isinstance(date_format, dict) else date_format |
| 925 | + if date_col.dtype.kind in "Mm": |
| 926 | + return date_col |
| 927 | + |
| 928 | + date_fmt = date_format.get(col) if isinstance(date_format, dict) else date_format |
| 929 | + |
| 930 | + str_objs = lib.ensure_string_array(np.asarray(date_col)) |
| 931 | + try: |
| 932 | + result = tools.to_datetime( |
| 933 | + str_objs, |
| 934 | + format=date_fmt, |
| 935 | + utc=False, |
| 936 | + dayfirst=dayfirst, |
| 937 | + cache=cache_dates, |
924 | 938 | )
|
| 939 | + except (ValueError, TypeError): |
| 940 | + # test_usecols_with_parse_dates4 |
| 941 | + # test_multi_index_parse_dates |
| 942 | + return str_objs |
925 | 943 |
|
926 |
| - str_objs = lib.ensure_string_array(date_col) |
927 |
| - try: |
928 |
| - result = tools.to_datetime( |
929 |
| - str_objs, |
930 |
| - format=date_fmt, |
931 |
| - utc=False, |
932 |
| - dayfirst=dayfirst, |
933 |
| - cache=cache_dates, |
934 |
| - ) |
935 |
| - except (ValueError, TypeError): |
936 |
| - # test_usecols_with_parse_dates4 |
937 |
| - # test_multi_index_parse_dates |
938 |
| - return str_objs |
939 |
| - |
940 |
| - if isinstance(result, DatetimeIndex): |
941 |
| - arr = result.to_numpy() |
942 |
| - arr.flags.writeable = True |
943 |
| - return arr |
944 |
| - return result._values |
945 |
| - |
946 |
| - return converter |
| 944 | + if isinstance(result, DatetimeIndex): |
| 945 | + arr = result.to_numpy() |
| 946 | + arr.flags.writeable = True |
| 947 | + return arr |
| 948 | + return result._values |
947 | 949 |
|
948 | 950 |
|
949 | 951 | parser_defaults = {
|
@@ -986,42 +988,6 @@ def converter(date_col, col: Hashable):
|
986 | 988 | }
|
987 | 989 |
|
988 | 990 |
|
989 |
| -def _process_date_conversion( |
990 |
| - data_dict: Mapping[Hashable, ArrayLike] | DataFrame, |
991 |
| - converter: Callable, |
992 |
| - parse_spec: list, |
993 |
| - index_col, |
994 |
| - index_names, |
995 |
| - columns: Sequence[Hashable] | Index, |
996 |
| - dtype_backend=lib.no_default, |
997 |
| -) -> Mapping[Hashable, ArrayLike] | DataFrame: |
998 |
| - for colspec in parse_spec: |
999 |
| - if isinstance(colspec, int) and colspec not in data_dict: |
1000 |
| - colspec = columns[colspec] |
1001 |
| - if (isinstance(index_col, list) and colspec in index_col) or ( |
1002 |
| - isinstance(index_names, list) and colspec in index_names |
1003 |
| - ): |
1004 |
| - continue |
1005 |
| - elif dtype_backend == "pyarrow": |
1006 |
| - import pyarrow as pa |
1007 |
| - |
1008 |
| - dtype = data_dict[colspec].dtype |
1009 |
| - if isinstance(dtype, ArrowDtype) and ( |
1010 |
| - pa.types.is_timestamp(dtype.pyarrow_dtype) |
1011 |
| - or pa.types.is_date(dtype.pyarrow_dtype) |
1012 |
| - ): |
1013 |
| - continue |
1014 |
| - |
1015 |
| - # Pyarrow engine returns Series which we need to convert to |
1016 |
| - # numpy array before converter, its a no-op for other parsers |
1017 |
| - result = converter(np.asarray(data_dict[colspec]), col=colspec) |
1018 |
| - # error: Unsupported target for indexed assignment |
1019 |
| - # ("Mapping[Hashable, ExtensionArray | ndarray[Any, Any]] | DataFrame") |
1020 |
| - data_dict[colspec] = result # type: ignore[index] |
1021 |
| - |
1022 |
| - return data_dict |
1023 |
| - |
1024 |
| - |
1025 | 991 | def get_na_values(col, na_values, na_fvalues, keep_default_na: bool):
|
1026 | 992 | """
|
1027 | 993 | Get the NaN values for a given column.
|
|
0 commit comments