|
42 | 42 | )
|
43 | 43 |
|
44 | 44 |
|
| 45 | +def parse_numeric(value): |
| 46 | + if isinstance(value, str): |
| 47 | + try: |
| 48 | + return int(value, 0) # Automatically detect radix |
| 49 | + except ValueError: |
| 50 | + try: |
| 51 | + return float(value) |
| 52 | + except ValueError: |
| 53 | + return libmissing.NA |
| 54 | + return value |
| 55 | + |
| 56 | + |
45 | 57 | def to_numeric(
|
46 |
| - arg, |
47 |
| - errors: DateTimeErrorChoices = "raise", |
48 |
| - downcast: Literal["integer", "signed", "unsigned", "float"] | None = None, |
49 |
| - dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, |
| 58 | + arg, |
| 59 | + errors: DateTimeErrorChoices = "raise", |
| 60 | + downcast: Literal["integer", "signed", "unsigned", "float"] | None = None, |
| 61 | + dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, |
50 | 62 | ):
|
51 | 63 | """
|
52 | 64 | Convert argument to a numeric type.
|
@@ -214,25 +226,33 @@ def to_numeric(
|
214 | 226 | values = values.view(np.int64)
|
215 | 227 | else:
|
216 | 228 | values = ensure_object(values)
|
217 |
| - coerce_numeric = errors != "raise" |
218 |
| - values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload] |
219 |
| - values, |
220 |
| - set(), |
221 |
| - coerce_numeric=coerce_numeric, |
222 |
| - convert_to_masked_nullable=dtype_backend is not lib.no_default |
223 |
| - or isinstance(values_dtype, StringDtype) |
224 |
| - and values_dtype.na_value is libmissing.NA, |
225 |
| - ) |
| 229 | + parsed_values = [] |
| 230 | + new_mask = [] |
| 231 | + for idx, x in enumerate(values): |
| 232 | + parsed_value = parse_numeric(x) |
| 233 | + if libmissing.checknull(parsed_values): |
| 234 | + if errors == 'raise': |
| 235 | + raise ValueError(f"Unable to parse string '{x}' at position{idx}") |
| 236 | + elif errors == 'coerce': |
| 237 | + parsed_values.append(libmissing.NA) |
| 238 | + new_mask.append(True) |
| 239 | + continue |
| 240 | + else: |
| 241 | + parsed_values.append(parsed_value) |
| 242 | + new_mask.append(False) |
| 243 | + |
| 244 | + values = np.array(parsed_values, dtype=object) |
| 245 | + new_mask = np.array(new_mask, dtype=bool) |
226 | 246 |
|
227 | 247 | if new_mask is not None:
|
228 | 248 | # Remove unnecessary values, is expected later anyway and enables
|
229 | 249 | # downcasting
|
230 | 250 | values = values[~new_mask]
|
231 | 251 | elif (
|
232 |
| - dtype_backend is not lib.no_default |
233 |
| - and new_mask is None |
234 |
| - or isinstance(values_dtype, StringDtype) |
235 |
| - and values_dtype.na_value is libmissing.NA |
| 252 | + dtype_backend is not lib.no_default |
| 253 | + and new_mask is None |
| 254 | + or isinstance(values_dtype, StringDtype) |
| 255 | + and values_dtype.na_value is libmissing.NA |
236 | 256 | ):
|
237 | 257 | new_mask = np.zeros(values.shape, dtype=np.bool_)
|
238 | 258 |
|
@@ -309,3 +329,12 @@ def to_numeric(
|
309 | 329 | return values[0]
|
310 | 330 | else:
|
311 | 331 | return values
|
| 332 | + |
| 333 | + |
| 334 | +if __name__ == "__main__": |
| 335 | + import numpy as np |
| 336 | + |
| 337 | + test_data = ['0x1A', '0b1010', '0o17', '25', '3.14', 'invalid'] |
| 338 | + result = to_numeric(test_data, errors='coerce') |
| 339 | + print("Inputs:", test_data) |
| 340 | + print("ParseResult:", result) |
0 commit comments