Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
378 changes: 188 additions & 190 deletions doc/source/user_guide/style.ipynb

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions pandas/core/_numba/kernels/min_max_.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,9 @@ def grouped_min_max(
continue

if is_max:
if val > output[lab]:
output[lab] = val
output[lab] = max(val, output[lab])
else:
if val < output[lab]:
output[lab] = val
output[lab] = min(val, output[lab])

# Set labels that don't satisfy min_periods as np.nan
for lab, count in enumerate(nobs):
Expand Down
8 changes: 2 additions & 6 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,9 +683,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):

elif dtype.kind == "f":
mst = np.min_scalar_type(fill_value)
if mst > dtype:
# e.g. mst is np.float64 and dtype is np.float32
dtype = mst
dtype = max(mst, dtype)

elif dtype.kind == "c":
mst = np.min_scalar_type(fill_value)
Expand Down Expand Up @@ -718,9 +716,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):

elif dtype.kind == "c":
mst = np.min_scalar_type(fill_value)
if mst > dtype:
# e.g. mst is np.complex128 and dtype is np.complex64
dtype = mst
dtype = max(mst, dtype)

else:
dtype = np.dtype(np.object_)
Expand Down
53 changes: 36 additions & 17 deletions pandas/core/tools/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@


def to_numeric(
arg,
errors: DateTimeErrorChoices = "raise",
downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
arg,
errors: DateTimeErrorChoices = "raise",
downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
):
"""
Convert argument to a numeric type.
Expand Down Expand Up @@ -220,25 +220,32 @@ def to_numeric(
values = values.view(np.int64)
else:
values = ensure_object(values)
coerce_numeric = errors != "raise"
values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload]
values,
set(),
coerce_numeric=coerce_numeric,
convert_to_masked_nullable=dtype_backend is not lib.no_default
or isinstance(values_dtype, StringDtype)
and values_dtype.na_value is libmissing.NA,
)
parsed_values = []
new_mask = []
for idx, x in enumerate(values):
parsed_value = parse_numeric(x)
if libmissing.checknull(parsed_value):
if errors == "raise":
raise ValueError(f"Unable to parse string '{x}' at position {idx}")
elif errors == "coerce":
parsed_values.append(libmissing.NA)
new_mask.append(True)
continue
else:
parsed_values.append(parsed_value)
new_mask.append(False)
values = np.array(parsed_values, dtype=object)
new_mask = np.array(new_mask, dtype=bool)

if new_mask is not None:
# Remove unnecessary values, is expected later anyway and enables
# downcasting
values = values[~new_mask]
elif (
dtype_backend is not lib.no_default
and new_mask is None
or isinstance(values_dtype, StringDtype)
and values_dtype.na_value is libmissing.NA
dtype_backend is not lib.no_default
and new_mask is None
or isinstance(values_dtype, StringDtype)
and values_dtype.na_value is libmissing.NA
):
new_mask = np.zeros(values.shape, dtype=np.bool_)

Expand Down Expand Up @@ -315,3 +322,15 @@ def to_numeric(
return values[0]
else:
return values


def parse_numeric(value):
if isinstance(value, str):
try:
return int(value, 0) # Automatically detect radix
except ValueError:
try:
return float(value)
except ValueError:
return libmissing.NA
return value
3 changes: 1 addition & 2 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ def get_sheet_data(
empty_cells = 0
table_row.extend([value] * column_repeat)

if max_row_len < len(table_row):
max_row_len = len(table_row)
max_row_len = max(max_row_len, len(table_row))

row_repeat = self._get_row_repeat(sheet_row)
if len(table_row) == 0:
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
expected = TimedeltaIndex(["3 Days", "36 Hours"])
if isinstance(three_days, np.timedelta64):
dtype = three_days.dtype
if dtype < np.dtype("m8[s]"):
# i.e. resolution is lower -> use lowest supported resolution
dtype = np.dtype("m8[s]")
dtype = max(dtype, np.dtype("m8[s]"))
expected = expected.astype(dtype)
elif type(three_days) is timedelta:
expected = expected.astype("m8[us]")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,7 +1388,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self):
def test_expr_with_column_name_with_backtick(self):
# GH 59285
df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
result = df.query("`a``b` < 2") # noqa
result = df.query("`a``b` < 2")
# Note: Formatting checks may wrongly consider the above ``inline code``.
expected = df[df["a`b"] < 2]
tm.assert_frame_equal(result, expected)
Expand Down
Loading