Skip to content

Commit 7c3aa3b

Browse files
author
Lzforevr
committed
Enhanced numeric.py to process hexadecimal,decimal,binary formats like 0x,0o,0b
1 parent fd823d2 commit 7c3aa3b

File tree

1 file changed

+46
-17
lines changed

1 file changed

+46
-17
lines changed

pandas/core/tools/numeric.py

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,23 @@
4242
)
4343

4444

45+
def parse_numeric(value):
46+
if isinstance(value, str):
47+
try:
48+
return int(value, 0) # Automatically detect radix
49+
except ValueError:
50+
try:
51+
return float(value)
52+
except ValueError:
53+
return libmissing.NA
54+
return value
55+
56+
4557
def to_numeric(
46-
arg,
47-
errors: DateTimeErrorChoices = "raise",
48-
downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
49-
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
58+
arg,
59+
errors: DateTimeErrorChoices = "raise",
60+
downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
61+
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
5062
):
5163
"""
5264
Convert argument to a numeric type.
@@ -214,25 +226,33 @@ def to_numeric(
214226
values = values.view(np.int64)
215227
else:
216228
values = ensure_object(values)
217-
coerce_numeric = errors != "raise"
218-
values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload]
219-
values,
220-
set(),
221-
coerce_numeric=coerce_numeric,
222-
convert_to_masked_nullable=dtype_backend is not lib.no_default
223-
or isinstance(values_dtype, StringDtype)
224-
and values_dtype.na_value is libmissing.NA,
225-
)
229+
parsed_values = []
230+
new_mask = []
231+
for idx, x in enumerate(values):
232+
parsed_value = parse_numeric(x)
233+
if libmissing.checknull(parsed_values):
234+
if errors == 'raise':
235+
raise ValueError(f"Unable to parse string '{x}' at position{idx}")
236+
elif errors == 'coerce':
237+
parsed_values.append(libmissing.NA)
238+
new_mask.append(True)
239+
continue
240+
else:
241+
parsed_values.append(parsed_value)
242+
new_mask.append(False)
243+
244+
values = np.array(parsed_values, dtype=object)
245+
new_mask = np.array(new_mask, dtype=bool)
226246

227247
if new_mask is not None:
228248
# Remove unnecessary values, is expected later anyway and enables
229249
# downcasting
230250
values = values[~new_mask]
231251
elif (
232-
dtype_backend is not lib.no_default
233-
and new_mask is None
234-
or isinstance(values_dtype, StringDtype)
235-
and values_dtype.na_value is libmissing.NA
252+
dtype_backend is not lib.no_default
253+
and new_mask is None
254+
or isinstance(values_dtype, StringDtype)
255+
and values_dtype.na_value is libmissing.NA
236256
):
237257
new_mask = np.zeros(values.shape, dtype=np.bool_)
238258

@@ -309,3 +329,12 @@ def to_numeric(
309329
return values[0]
310330
else:
311331
return values
332+
333+
334+
if __name__ == "__main__":
335+
import numpy as np
336+
337+
test_data = ['0x1A', '0b1010', '0o17', '25', '3.14', 'invalid']
338+
result = to_numeric(test_data, errors='coerce')
339+
print("Inputs:", test_data)
340+
print("ParseResult:", result)

0 commit comments

Comments
 (0)