|
28 | 28 | )
|
29 | 29 | from pandas.util._exceptions import find_stack_level
|
30 | 30 |
|
31 |
| -from pandas.core.dtypes.astype import astype_array |
32 | 31 | from pandas.core.dtypes.common import (
|
33 | 32 | is_bool_dtype,
|
34 | 33 | is_dict_like,
|
|
39 | 38 | is_object_dtype,
|
40 | 39 | is_string_dtype,
|
41 | 40 | )
|
42 |
| -from pandas.core.dtypes.dtypes import ( |
43 |
| - CategoricalDtype, |
44 |
| - ExtensionDtype, |
45 |
| -) |
46 | 41 | from pandas.core.dtypes.missing import isna
|
47 | 42 |
|
48 | 43 | from pandas import (
|
|
55 | 50 | ArrowExtensionArray,
|
56 | 51 | BaseMaskedArray,
|
57 | 52 | BooleanArray,
|
58 |
| - Categorical, |
59 |
| - ExtensionArray, |
60 | 53 | FloatingArray,
|
61 | 54 | IntegerArray,
|
62 | 55 | )
|
63 |
| -from pandas.core.arrays.boolean import BooleanDtype |
64 | 56 | from pandas.core.indexes.api import (
|
65 | 57 | Index,
|
66 | 58 | MultiIndex,
|
|
83 | 75 | from pandas._typing import (
|
84 | 76 | ArrayLike,
|
85 | 77 | DtypeArg,
|
86 |
| - DtypeObj, |
87 | 78 | Hashable,
|
88 | 79 | HashableT,
|
89 | 80 | Scalar,
|
@@ -171,7 +162,7 @@ def __init__(self, kwds) -> None:
|
171 | 162 |
|
172 | 163 | self._first_chunk = True
|
173 | 164 |
|
174 |
| - self.usecols, self.usecols_dtype = self._validate_usecols_arg(kwds["usecols"]) |
| 165 | + self.usecols, self.usecols_dtype = _validate_usecols_arg(kwds["usecols"]) |
175 | 166 |
|
176 | 167 | # Fallback to error to pass a sketchy test(test_override_set_noconvert_columns)
|
177 | 168 | # Normally, this arg would get pre-processed earlier on
|
@@ -569,80 +560,6 @@ def _infer_types(
|
569 | 560 |
|
570 | 561 | return result, na_count
|
571 | 562 |
|
572 |
| - @final |
573 |
| - def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLike: |
574 |
| - """ |
575 |
| - Cast values to specified type |
576 |
| -
|
577 |
| - Parameters |
578 |
| - ---------- |
579 |
| - values : ndarray or ExtensionArray |
580 |
| - cast_type : np.dtype or ExtensionDtype |
581 |
| - dtype to cast values to |
582 |
| - column : string |
583 |
| - column name - used only for error reporting |
584 |
| -
|
585 |
| - Returns |
586 |
| - ------- |
587 |
| - converted : ndarray or ExtensionArray |
588 |
| - """ |
589 |
| - if isinstance(cast_type, CategoricalDtype): |
590 |
| - known_cats = cast_type.categories is not None |
591 |
| - |
592 |
| - if not is_object_dtype(values.dtype) and not known_cats: |
593 |
| - # TODO: this is for consistency with |
594 |
| - # c-parser which parses all categories |
595 |
| - # as strings |
596 |
| - values = lib.ensure_string_array( |
597 |
| - values, skipna=False, convert_na_value=False |
598 |
| - ) |
599 |
| - |
600 |
| - cats = Index(values).unique().dropna() |
601 |
| - values = Categorical._from_inferred_categories( |
602 |
| - cats, cats.get_indexer(values), cast_type, true_values=self.true_values |
603 |
| - ) |
604 |
| - |
605 |
| - # use the EA's implementation of casting |
606 |
| - elif isinstance(cast_type, ExtensionDtype): |
607 |
| - array_type = cast_type.construct_array_type() |
608 |
| - try: |
609 |
| - if isinstance(cast_type, BooleanDtype): |
610 |
| - # error: Unexpected keyword argument "true_values" for |
611 |
| - # "_from_sequence_of_strings" of "ExtensionArray" |
612 |
| - values_str = [str(val) for val in values] |
613 |
| - return array_type._from_sequence_of_strings( # type: ignore[call-arg] |
614 |
| - values_str, |
615 |
| - dtype=cast_type, |
616 |
| - true_values=self.true_values, |
617 |
| - false_values=self.false_values, |
618 |
| - none_values=self.na_values, |
619 |
| - ) |
620 |
| - else: |
621 |
| - return array_type._from_sequence_of_strings(values, dtype=cast_type) |
622 |
| - except NotImplementedError as err: |
623 |
| - raise NotImplementedError( |
624 |
| - f"Extension Array: {array_type} must implement " |
625 |
| - "_from_sequence_of_strings in order to be used in parser methods" |
626 |
| - ) from err |
627 |
| - |
628 |
| - elif isinstance(values, ExtensionArray): |
629 |
| - values = values.astype(cast_type, copy=False) |
630 |
| - elif issubclass(cast_type.type, str): |
631 |
| - # TODO: why skipna=True here and False above? some tests depend |
632 |
| - # on it here, but nothing fails if we change it above |
633 |
| - # (as no tests get there as of 2022-12-06) |
634 |
| - values = lib.ensure_string_array( |
635 |
| - values, skipna=True, convert_na_value=False |
636 |
| - ) |
637 |
| - else: |
638 |
| - try: |
639 |
| - values = astype_array(values, cast_type, copy=True) |
640 |
| - except ValueError as err: |
641 |
| - raise ValueError( |
642 |
| - f"Unable to convert column {column} to type {cast_type}" |
643 |
| - ) from err |
644 |
| - return values |
645 |
| - |
646 | 563 | @overload
|
647 | 564 | def _do_date_conversions(
|
648 | 565 | self,
|
@@ -776,56 +693,6 @@ def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> Sequen
|
776 | 693 |
|
777 | 694 | return usecols
|
778 | 695 |
|
779 |
| - @final |
780 |
| - def _validate_usecols_arg(self, usecols): |
781 |
| - """ |
782 |
| - Validate the 'usecols' parameter. |
783 |
| -
|
784 |
| - Checks whether or not the 'usecols' parameter contains all integers |
785 |
| - (column selection by index), strings (column by name) or is a callable. |
786 |
| - Raises a ValueError if that is not the case. |
787 |
| -
|
788 |
| - Parameters |
789 |
| - ---------- |
790 |
| - usecols : list-like, callable, or None |
791 |
| - List of columns to use when parsing or a callable that can be used |
792 |
| - to filter a list of table columns. |
793 |
| -
|
794 |
| - Returns |
795 |
| - ------- |
796 |
| - usecols_tuple : tuple |
797 |
| - A tuple of (verified_usecols, usecols_dtype). |
798 |
| -
|
799 |
| - 'verified_usecols' is either a set if an array-like is passed in or |
800 |
| - 'usecols' if a callable or None is passed in. |
801 |
| -
|
802 |
| - 'usecols_dtype` is the inferred dtype of 'usecols' if an array-like |
803 |
| - is passed in or None if a callable or None is passed in. |
804 |
| - """ |
805 |
| - msg = ( |
806 |
| - "'usecols' must either be list-like of all strings, all unicode, " |
807 |
| - "all integers or a callable." |
808 |
| - ) |
809 |
| - if usecols is not None: |
810 |
| - if callable(usecols): |
811 |
| - return usecols, None |
812 |
| - |
813 |
| - if not is_list_like(usecols): |
814 |
| - # see gh-20529 |
815 |
| - # |
816 |
| - # Ensure it is iterable container but not string. |
817 |
| - raise ValueError(msg) |
818 |
| - |
819 |
| - usecols_dtype = lib.infer_dtype(usecols, skipna=False) |
820 |
| - |
821 |
| - if usecols_dtype not in ("empty", "integer", "string"): |
822 |
| - raise ValueError(msg) |
823 |
| - |
824 |
| - usecols = set(usecols) |
825 |
| - |
826 |
| - return usecols, usecols_dtype |
827 |
| - return usecols, None |
828 |
| - |
829 | 696 | @final
|
830 | 697 | def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, list]:
|
831 | 698 | if not is_index_col(index_col):
|
@@ -1071,3 +938,53 @@ def validate_parse_dates_presence(
|
1071 | 938 | missing_cols = ", ".join(sorted(missing))
|
1072 | 939 | raise ValueError(f"Missing column provided to 'parse_dates': '{missing_cols}'")
|
1073 | 940 | return unique_cols
|
| 941 | + |
| 942 | + |
| 943 | +def _validate_usecols_arg(usecols): |
| 944 | + """ |
| 945 | + Validate the 'usecols' parameter. |
| 946 | +
|
| 947 | + Checks whether or not the 'usecols' parameter contains all integers |
| 948 | + (column selection by index), strings (column by name) or is a callable. |
| 949 | + Raises a ValueError if that is not the case. |
| 950 | +
|
| 951 | + Parameters |
| 952 | + ---------- |
| 953 | + usecols : list-like, callable, or None |
| 954 | + List of columns to use when parsing or a callable that can be used |
| 955 | + to filter a list of table columns. |
| 956 | +
|
| 957 | + Returns |
| 958 | + ------- |
| 959 | + usecols_tuple : tuple |
| 960 | + A tuple of (verified_usecols, usecols_dtype). |
| 961 | +
|
| 962 | + 'verified_usecols' is either a set if an array-like is passed in or |
| 963 | + 'usecols' if a callable or None is passed in. |
| 964 | +
|
| 965 | + 'usecols_dtype` is the inferred dtype of 'usecols' if an array-like |
| 966 | + is passed in or None if a callable or None is passed in. |
| 967 | + """ |
| 968 | + msg = ( |
| 969 | + "'usecols' must either be list-like of all strings, all unicode, " |
| 970 | + "all integers or a callable." |
| 971 | + ) |
| 972 | + if usecols is not None: |
| 973 | + if callable(usecols): |
| 974 | + return usecols, None |
| 975 | + |
| 976 | + if not is_list_like(usecols): |
| 977 | + # see gh-20529 |
| 978 | + # |
| 979 | + # Ensure it is iterable container but not string. |
| 980 | + raise ValueError(msg) |
| 981 | + |
| 982 | + usecols_dtype = lib.infer_dtype(usecols, skipna=False) |
| 983 | + |
| 984 | + if usecols_dtype not in ("empty", "integer", "string"): |
| 985 | + raise ValueError(msg) |
| 986 | + |
| 987 | + usecols = set(usecols) |
| 988 | + |
| 989 | + return usecols, usecols_dtype |
| 990 | + return usecols, None |
0 commit comments