@@ -62,116 +62,7 @@ def to_numeric(
62
62
):
63
63
"""
64
64
Convert argument to a numeric type.
65
-
66
- The default return dtype is `float64` or `int64`
67
- depending on the data supplied. Use the `downcast` parameter
68
- to obtain other dtypes.
69
-
70
- Please note that precision loss may occur if really large numbers
71
- are passed in. Due to the internal limitations of `ndarray`, if
72
- numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
73
- or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
74
- passed in, it is very likely they will be converted to float so that
75
- they can be stored in an `ndarray`. These warnings apply similarly to
76
- `Series` since it internally leverages `ndarray`.
77
-
78
- Parameters
79
- ----------
80
- arg : scalar, list, tuple, 1-d array, or Series
81
- Argument to be converted.
82
-
83
- errors : {'raise', 'coerce'}, default 'raise'
84
- - If 'raise', then invalid parsing will raise an exception.
85
- - If 'coerce', then invalid parsing will be set as NaN.
86
-
87
- downcast : str, default None
88
- Can be 'integer', 'signed', 'unsigned', or 'float'.
89
- If not None, and if the data has been successfully cast to a
90
- numerical dtype (or if the data was numeric to begin with),
91
- downcast that resulting data to the smallest numerical dtype
92
- possible according to the following rules:
93
-
94
- - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
95
- - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
96
- - 'float': smallest float dtype (min.: np.float32)
97
-
98
- As this behaviour is separate from the core conversion to
99
- numeric values, any errors raised during the downcasting
100
- will be surfaced regardless of the value of the 'errors' input.
101
-
102
- In addition, downcasting will only occur if the size
103
- of the resulting data's dtype is strictly larger than
104
- the dtype it is to be cast to, so if none of the dtypes
105
- checked satisfy that specification, no downcasting will be
106
- performed on the data.
107
-
108
- dtype_backend : {'numpy_nullable', 'pyarrow'}
109
- Back-end data type applied to the resultant :class:`DataFrame`
110
- (still experimental). If not specified, the default behavior
111
- is to not use nullable data types. If specified, the behavior
112
- is as follows:
113
-
114
- * ``"numpy_nullable"``: returns nullable-dtype-backed object
115
- * ``"pyarrow"``: returns with pyarrow-backed nullable object
116
-
117
- .. versionadded:: 2.0
118
-
119
- Returns
120
- -------
121
- ret
122
- Numeric if parsing succeeded.
123
- Return type depends on input. Series if Series, otherwise ndarray.
124
-
125
- See Also
126
- --------
127
- DataFrame.astype : Cast argument to a specified dtype.
128
- to_datetime : Convert argument to datetime.
129
- to_timedelta : Convert argument to timedelta.
130
- numpy.ndarray.astype : Cast a numpy array to a specified type.
131
- DataFrame.convert_dtypes : Convert dtypes.
132
-
133
- Examples
134
- --------
135
- Take separate series and convert to numeric, coercing when told to
136
-
137
- >>> s = pd.Series(["1.0", "2", -3])
138
- >>> pd.to_numeric(s)
139
- 0 1.0
140
- 1 2.0
141
- 2 -3.0
142
- dtype: float64
143
- >>> pd.to_numeric(s, downcast="float")
144
- 0 1.0
145
- 1 2.0
146
- 2 -3.0
147
- dtype: float32
148
- >>> pd.to_numeric(s, downcast="signed")
149
- 0 1
150
- 1 2
151
- 2 -3
152
- dtype: int8
153
- >>> s = pd.Series(["apple", "1.0", "2", -3])
154
- >>> pd.to_numeric(s, errors="coerce")
155
- 0 NaN
156
- 1 1.0
157
- 2 2.0
158
- 3 -3.0
159
- dtype: float64
160
-
161
- Downcasting of nullable integer and floating dtypes is supported:
162
-
163
- >>> s = pd.Series([1, 2, 3], dtype="Int64")
164
- >>> pd.to_numeric(s, downcast="integer")
165
- 0 1
166
- 1 2
167
- 2 3
168
- dtype: Int8
169
- >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
170
- >>> pd.to_numeric(s, downcast="float")
171
- 0 1.0
172
- 1 2.1
173
- 2 3.0
174
- dtype: Float32
65
+ ...
175
66
"""
176
67
if downcast not in (None , "integer" , "signed" , "unsigned" , "float" ):
177
68
raise ValueError ("invalid downcasting method provided" )
@@ -208,8 +99,6 @@ def to_numeric(
208
99
else :
209
100
values = arg
210
101
211
- # GH33013: for IntegerArray & FloatingArray extract non-null values for casting
212
- # save mask to reconstruct the full array after casting
213
102
mask : npt .NDArray [np .bool_ ] | None = None
214
103
if isinstance (values , BaseMaskedArray ):
215
104
mask = values ._mask
@@ -220,6 +109,7 @@ def to_numeric(
220
109
mask = values .isna ()
221
110
values = values .dropna ().to_numpy ()
222
111
new_mask : np .ndarray | None = None
112
+
223
113
if is_numeric_dtype (values_dtype ):
224
114
pass
225
115
elif lib .is_np_dtype (values_dtype , "mM" ):
@@ -231,9 +121,9 @@ def to_numeric(
231
121
for idx , x in enumerate (values ):
232
122
parsed_value = parse_numeric (x )
233
123
if libmissing .checknull (parsed_values ):
234
- if errors == ' raise' :
235
- raise ValueError (f"Unable to parse string '{ x } ' at position{ idx } " )
236
- elif errors == ' coerce' :
124
+ if errors == " raise" :
125
+ raise ValueError (f"Unable to parse string '{ x } ' at position { idx } " )
126
+ elif errors == " coerce" :
237
127
parsed_values .append (libmissing .NA )
238
128
new_mask .append (True )
239
129
continue
@@ -245,8 +135,6 @@ def to_numeric(
245
135
new_mask = np .array (new_mask , dtype = bool )
246
136
247
137
if new_mask is not None :
248
- # Remove unnecessary values, is expected later anyway and enables
249
- # downcasting
250
138
values = values [~ new_mask ]
251
139
elif (
252
140
dtype_backend is not lib .no_default
@@ -256,8 +144,6 @@ def to_numeric(
256
144
):
257
145
new_mask = np .zeros (values .shape , dtype = np .bool_ )
258
146
259
- # attempt downcast only if the data has been successfully converted
260
- # to a numerical dtype and if a downcast method has been specified
261
147
if downcast is not None and is_numeric_dtype (values .dtype ):
262
148
typecodes : str | None = None
263
149
@@ -267,30 +153,23 @@ def to_numeric(
267
153
typecodes = np .typecodes ["UnsignedInteger" ]
268
154
elif downcast == "float" :
269
155
typecodes = np .typecodes ["Float" ]
270
-
271
- # pandas support goes only to np.float32,
272
- # as float dtypes smaller than that are
273
- # extremely rare and not well supported
274
156
float_32_char = np .dtype (np .float32 ).char
275
157
float_32_ind = typecodes .index (float_32_char )
276
158
typecodes = typecodes [float_32_ind :]
277
159
278
160
if typecodes is not None :
279
- # from smallest to largest
280
161
for typecode in typecodes :
281
162
dtype = np .dtype (typecode )
282
163
if dtype .itemsize <= values .dtype .itemsize :
164
+ # Only downcast if values are all integers
165
+ if downcast in ("integer" , "signed" , "unsigned" ) and not np .isin (np .mod (values , 1 ), 0 ).all ():
166
+ continue # Skip downcasting if there are any float values
283
167
values = maybe_downcast_numeric (values , dtype )
284
-
285
- # successful conversion
286
168
if values .dtype == dtype :
287
169
break
288
170
289
- # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
290
- # masked array
291
171
if (mask is not None or new_mask is not None ) and not is_string_dtype (values .dtype ):
292
172
if mask is None or (new_mask is not None and new_mask .shape == mask .shape ):
293
- # GH 52588
294
173
mask = new_mask
295
174
else :
296
175
mask = mask .copy ()
@@ -320,10 +199,7 @@ def to_numeric(
320
199
if is_series :
321
200
return arg ._constructor (values , index = arg .index , name = arg .name )
322
201
elif is_index :
323
- # because we want to coerce to numeric if possible,
324
- # do not use _shallow_copy
325
202
from pandas import Index
326
-
327
203
return Index (values , name = arg .name )
328
204
elif is_scalars :
329
205
return values [0 ]
@@ -334,7 +210,7 @@ def to_numeric(
334
210
if __name__ == "__main__" :
335
211
import numpy as np
336
212
337
- test_data = [' 0x1A' , ' 0b1010' , ' 0o17' , '25' , ' 3.14' , ' invalid' ]
338
- result = to_numeric (test_data , errors = ' coerce' )
213
+ test_data = [" 0x1A" , " 0b1010" , " 0o17" , "25" , " 3.14" , " invalid" ]
214
+ result = to_numeric (test_data , errors = " coerce" )
339
215
print ("Inputs:" , test_data )
340
216
print ("ParseResult:" , result )
0 commit comments