|
20 | 20 |
|
21 | 21 | import numpy as np
|
22 | 22 |
|
23 |
| -from pandas._libs import ( |
24 |
| - lib, |
25 |
| - parsers, |
26 |
| -) |
27 |
| -import pandas._libs.ops as libops |
28 |
| -from pandas.compat._optional import import_optional_dependency |
| 23 | +from pandas._libs import lib |
29 | 24 | from pandas.errors import (
|
30 | 25 | EmptyDataError,
|
31 | 26 | ParserError,
|
|
38 | 33 | from pandas.core.dtypes.common import (
|
39 | 34 | is_bool_dtype,
|
40 | 35 | is_extension_array_dtype,
|
41 |
| - is_float_dtype, |
42 | 36 | is_integer,
|
43 |
| - is_integer_dtype, |
44 | 37 | is_numeric_dtype,
|
45 | 38 | is_object_dtype,
|
46 | 39 | is_string_dtype,
|
|
51 | 44 | ExtensionDtype,
|
52 | 45 | )
|
53 | 46 | from pandas.core.dtypes.inference import is_dict_like
|
54 |
| -from pandas.core.dtypes.missing import isna |
55 | 47 |
|
56 | 48 | from pandas.core import algorithms
|
57 | 49 | from pandas.core.arrays import (
|
58 |
| - ArrowExtensionArray, |
59 |
| - BaseMaskedArray, |
60 |
| - BooleanArray, |
61 | 50 | Categorical,
|
62 | 51 | ExtensionArray,
|
63 |
| - FloatingArray, |
64 |
| - IntegerArray, |
65 | 52 | )
|
66 | 53 | from pandas.core.arrays.boolean import BooleanDtype
|
67 |
| -from pandas.core.arrays.string_ import StringDtype |
68 | 54 | from pandas.core.indexes.api import Index
|
69 | 55 |
|
70 | 56 | from pandas.io.common import (
|
@@ -563,119 +549,6 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi
|
563 | 549 | ) from err
|
564 | 550 | return values
|
565 | 551 |
|
566 |
| - @final |
567 |
| - def _infer_types( |
568 |
| - self, values, na_values, no_dtype_specified, try_num_bool: bool = True |
569 |
| - ) -> tuple[ArrayLike, int]: |
570 |
| - """ |
571 |
| - Infer types of values, possibly casting |
572 |
| -
|
573 |
| - Parameters |
574 |
| - ---------- |
575 |
| - values : ndarray |
576 |
| - na_values : set |
577 |
| - no_dtype_specified: Specifies if we want to cast explicitly |
578 |
| - try_num_bool : bool, default try |
579 |
| - try to cast values to numeric (first preference) or boolean |
580 |
| -
|
581 |
| - Returns |
582 |
| - ------- |
583 |
| - converted : ndarray or ExtensionArray |
584 |
| - na_count : int |
585 |
| - """ |
586 |
| - na_count = 0 |
587 |
| - if issubclass(values.dtype.type, (np.number, np.bool_)): |
588 |
| - # If our array has numeric dtype, we don't have to check for strings in isin |
589 |
| - na_values = np.array([val for val in na_values if not isinstance(val, str)]) |
590 |
| - mask = algorithms.isin(values, na_values) |
591 |
| - na_count = mask.astype("uint8", copy=False).sum() |
592 |
| - if na_count > 0: |
593 |
| - if is_integer_dtype(values): |
594 |
| - values = values.astype(np.float64) |
595 |
| - np.putmask(values, mask, np.nan) |
596 |
| - return values, na_count |
597 |
| - |
598 |
| - dtype_backend = self.dtype_backend |
599 |
| - non_default_dtype_backend = ( |
600 |
| - no_dtype_specified and dtype_backend is not lib.no_default |
601 |
| - ) |
602 |
| - result: ArrayLike |
603 |
| - |
604 |
| - if try_num_bool and is_object_dtype(values.dtype): |
605 |
| - # exclude e.g DatetimeIndex here |
606 |
| - try: |
607 |
| - result, result_mask = lib.maybe_convert_numeric( |
608 |
| - values, |
609 |
| - na_values, |
610 |
| - False, |
611 |
| - convert_to_masked_nullable=non_default_dtype_backend, # type: ignore[arg-type] |
612 |
| - ) |
613 |
| - except (ValueError, TypeError): |
614 |
| - # e.g. encountering datetime string gets ValueError |
615 |
| - # TypeError can be raised in floatify |
616 |
| - na_count = parsers.sanitize_objects(values, na_values) |
617 |
| - result = values |
618 |
| - else: |
619 |
| - if non_default_dtype_backend: |
620 |
| - if result_mask is None: |
621 |
| - result_mask = np.zeros(result.shape, dtype=np.bool_) |
622 |
| - |
623 |
| - if result_mask.all(): |
624 |
| - result = IntegerArray( |
625 |
| - np.ones(result_mask.shape, dtype=np.int64), result_mask |
626 |
| - ) |
627 |
| - elif is_integer_dtype(result): |
628 |
| - result = IntegerArray(result, result_mask) |
629 |
| - elif is_bool_dtype(result): |
630 |
| - result = BooleanArray(result, result_mask) |
631 |
| - elif is_float_dtype(result): |
632 |
| - result = FloatingArray(result, result_mask) |
633 |
| - |
634 |
| - na_count = result_mask.sum() |
635 |
| - else: |
636 |
| - na_count = isna(result).sum() |
637 |
| - else: |
638 |
| - result = values |
639 |
| - if values.dtype == np.object_: |
640 |
| - na_count = parsers.sanitize_objects(values, na_values) |
641 |
| - |
642 |
| - if result.dtype == np.object_ and try_num_bool: |
643 |
| - result, bool_mask = libops.maybe_convert_bool( |
644 |
| - np.asarray(values), |
645 |
| - true_values=self.true_values, |
646 |
| - false_values=self.false_values, |
647 |
| - convert_to_masked_nullable=non_default_dtype_backend, # type: ignore[arg-type] |
648 |
| - ) |
649 |
| - if result.dtype == np.bool_ and non_default_dtype_backend: |
650 |
| - if bool_mask is None: |
651 |
| - bool_mask = np.zeros(result.shape, dtype=np.bool_) |
652 |
| - result = BooleanArray(result, bool_mask) |
653 |
| - elif result.dtype == np.object_ and non_default_dtype_backend: |
654 |
| - # read_excel sends array of datetime objects |
655 |
| - if not lib.is_datetime_array(result, skipna=True): |
656 |
| - dtype = StringDtype() |
657 |
| - cls = dtype.construct_array_type() |
658 |
| - result = cls._from_sequence(values, dtype=dtype) |
659 |
| - |
660 |
| - if dtype_backend == "pyarrow": |
661 |
| - pa = import_optional_dependency("pyarrow") |
662 |
| - if isinstance(result, np.ndarray): |
663 |
| - result = ArrowExtensionArray(pa.array(result, from_pandas=True)) |
664 |
| - elif isinstance(result, BaseMaskedArray): |
665 |
| - if result._mask.all(): |
666 |
| - # We want an arrow null array here |
667 |
| - result = ArrowExtensionArray(pa.array([None] * len(result))) |
668 |
| - else: |
669 |
| - result = ArrowExtensionArray( |
670 |
| - pa.array(result._data, mask=result._mask) |
671 |
| - ) |
672 |
| - else: |
673 |
| - result = ArrowExtensionArray( |
674 |
| - pa.array(result.to_numpy(), from_pandas=True) |
675 |
| - ) |
676 |
| - |
677 |
| - return result, na_count |
678 |
| - |
679 | 552 | @cache_readonly
|
680 | 553 | def _have_mi_columns(self) -> bool:
|
681 | 554 | if self.header is None:
|
|
0 commit comments