|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
| 3 | + |
3 | 4 | from typing import (
|
4 | 5 | TYPE_CHECKING,
|
5 | 6 | Literal,
|
|
34 | 35 | from pandas.core.arrays import BaseMaskedArray
|
35 | 36 | from pandas.core.arrays.string_ import StringDtype
|
36 | 37 |
|
| 38 | +from pandas.core.dtypes.common import is_string_dtype |
| 39 | + |
37 | 40 | if TYPE_CHECKING:
|
38 | 41 | from pandas._typing import (
|
39 | 42 | DateTimeErrorChoices,
|
40 | 43 | DtypeBackend,
|
41 | 44 | npt,
|
42 | 45 | )
|
43 | 46 |
|
| 47 | +def parse_numeric(value): |
| 48 | + if isinstance(value, str): |
| 49 | + try: |
| 50 | + return int(value, 0) # Automatically detect radix |
| 51 | + except ValueError: |
| 52 | + try: |
| 53 | + return float(value) |
| 54 | + except ValueError: |
| 55 | + return libmissing.NA |
| 56 | + return value |
44 | 57 |
|
45 | 58 | def to_numeric(
|
46 | 59 | arg,
|
@@ -161,6 +174,7 @@ def to_numeric(
|
161 | 174 | 2 3.0
|
162 | 175 | dtype: Float32
|
163 | 176 | """
|
| 177 | + |
164 | 178 | if downcast not in (None, "integer", "signed", "unsigned", "float"):
|
165 | 179 | raise ValueError("invalid downcasting method provided")
|
166 | 180 |
|
@@ -214,15 +228,25 @@ def to_numeric(
|
214 | 228 | values = values.view(np.int64)
|
215 | 229 | else:
|
216 | 230 | values = ensure_object(values)
|
217 |
| - coerce_numeric = errors != "raise" |
218 |
| - values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload] |
219 |
| - values, |
220 |
| - set(), |
221 |
| - coerce_numeric=coerce_numeric, |
222 |
| - convert_to_masked_nullable=dtype_backend is not lib.no_default |
223 |
| - or isinstance(values_dtype, StringDtype) |
224 |
| - and values_dtype.na_value is libmissing.NA, |
225 |
| - ) |
| 231 | + parsed_values = [] |
| 232 | + new_mask = [] |
| 233 | + |
| 234 | + for idx, x in enumerate(values): |
| 235 | + parsed_value = parse_numeric(x) |
| 236 | + if libmissing.checknull(parsed_value): |
| 237 | + if errors == 'raise': |
| 238 | + raise ValueError(f"Unable to parse string '{x}' at position {idx}") |
| 239 | + elif errors == 'coerce': |
| 240 | + parsed_values.append(libmissing.NA) |
| 241 | + new_mask.append(True) |
| 242 | + continue |
| 243 | + else: |
| 244 | + parsed_values.append(parsed_value) |
| 245 | + new_mask.append(False) |
| 246 | + |
| 247 | + values = np.array(parsed_values, dtype=object) |
| 248 | + new_mask = np.array(new_mask, dtype=bool) |
| 249 | + |
226 | 250 |
|
227 | 251 | if new_mask is not None:
|
228 | 252 | # Remove unnecessary values, is expected later anyway and enables
|
|
0 commit comments