Skip to content

Commit a499400

Browse files
committed
Merge remote-tracking branch 'upstream/main' into ref/index_equiv
2 parents 1e8f4a4 + 04487b3 commit a499400

File tree

30 files changed

+220
-192
lines changed

30 files changed

+220
-192
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,11 @@ Performance improvements
272272
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
273273
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
274274
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
275-
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
275+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
276276
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
277-
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`)
278-
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`)
279-
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
277+
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`)
278+
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
279+
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
280280
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
281281
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
282282

@@ -289,6 +289,7 @@ Bug fixes
289289
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
290290
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
291291
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
292+
- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
292293
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
293294
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
294295

@@ -393,6 +394,7 @@ Other
393394
^^^^^
394395
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
395396
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
397+
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
396398
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
397399
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
398400
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ dependencies:
6262
# downstream packages
6363
- dask-core
6464
- seaborn-base
65+
- dask-expr
6566

6667
# local testing dependencies
6768
- moto

pandas/core/algorithms.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,10 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
439439
# Dispatch to extension dtype's unique.
440440
return values.unique()
441441

442+
if isinstance(values, ABCIndex):
443+
# Dispatch to Index's unique.
444+
return values.unique()
445+
442446
original = values
443447
hashtable, values = _get_hashtable_algo(values)
444448

pandas/core/groupby/generic.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,8 +1642,11 @@ def _wrap_applied_output(
16421642
first_not_none = next(com.not_none(*values), None)
16431643

16441644
if first_not_none is None:
1645-
# GH9684 - All values are None, return an empty frame.
1646-
return self.obj._constructor()
1645+
# GH9684 - All values are None, return an empty frame
1646+
# GH57775 - Ensure that columns and dtypes from original frame are kept.
1647+
result = self.obj._constructor(columns=data.columns)
1648+
result = result.astype(data.dtypes)
1649+
return result
16471650
elif isinstance(first_not_none, DataFrame):
16481651
return self._concat_objects(
16491652
values,

pandas/core/groupby/groupby.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,14 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
16361636
a 5
16371637
b 2
16381638
dtype: int64
1639+
1640+
Example 4: The function passed to ``apply`` returns ``None`` for one of the
1641+
group. This group is filtered from the result:
1642+
1643+
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
1644+
B C
1645+
0 1 4
1646+
1 2 6
16391647
"""
16401648
if isinstance(func, str):
16411649
if hasattr(self, func):

pandas/core/indexes/base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4235,7 +4235,6 @@ def join(
42354235

42364236
return self._join_via_get_indexer(other, how, sort)
42374237

4238-
@final
42394238
def _join_empty(
42404239
self, other: Index, how: JoinHow, sort: bool
42414240
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

pandas/core/indexes/range.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -472,18 +472,31 @@ def _shallow_copy(self, values, name: Hashable = no_default):
472472

473473
if values.dtype.kind == "f":
474474
return Index(values, name=name, dtype=np.float64)
475-
if values.dtype.kind == "i" and values.ndim == 1 and len(values) > 1:
475+
if values.dtype.kind == "i" and values.ndim == 1:
476476
# GH 46675 & 43885: If values is equally spaced, return a
477477
# more memory-compact RangeIndex instead of Index with 64-bit dtype
478+
if len(values) == 0:
479+
return type(self)._simple_new(_empty_range, name=name)
480+
elif len(values) == 1:
481+
start = values[0]
482+
new_range = range(start, start + self.step, self.step)
483+
return type(self)._simple_new(new_range, name=name)
478484
diff = values[1] - values[0]
479485
if not missing.isna(diff) and diff != 0:
480-
maybe_range_indexer, remainder = np.divmod(values - values[0], diff)
481-
if (
482-
lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer))
483-
and not remainder.any()
484-
):
486+
if len(values) == 2:
487+
# Can skip is_range_indexer check
485488
new_range = range(values[0], values[-1] + diff, diff)
486489
return type(self)._simple_new(new_range, name=name)
490+
else:
491+
maybe_range_indexer, remainder = np.divmod(values - values[0], diff)
492+
if (
493+
lib.is_range_indexer(
494+
maybe_range_indexer, len(maybe_range_indexer)
495+
)
496+
and not remainder.any()
497+
):
498+
new_range = range(values[0], values[-1] + diff, diff)
499+
return type(self)._simple_new(new_range, name=name)
487500
return self._constructor._simple_new(values, name=name)
488501

489502
def _view(self) -> Self:
@@ -894,12 +907,19 @@ def symmetric_difference(
894907
result = result.rename(result_name)
895908
return result
896909

910+
def _join_empty(
911+
self, other: Index, how: JoinHow, sort: bool
912+
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
913+
if other.dtype.kind == "i":
914+
other = self._shallow_copy(other._values, name=other.name)
915+
return super()._join_empty(other, how=how, sort=sort)
916+
897917
def _join_monotonic(
898918
self, other: Index, how: JoinHow = "left"
899919
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
900920
# This currently only gets called for the monotonic increasing case
901921
if not isinstance(other, type(self)):
902-
maybe_ri = self._shallow_copy(other._values)
922+
maybe_ri = self._shallow_copy(other._values, name=other.name)
903923
if not isinstance(maybe_ri, type(self)):
904924
return super()._join_monotonic(other, how=how)
905925
other = maybe_ri
@@ -1075,6 +1095,8 @@ def __getitem__(self, key):
10751095
"""
10761096
Conserve RangeIndex type for scalar and slice keys.
10771097
"""
1098+
if key is Ellipsis:
1099+
key = slice(None)
10781100
if isinstance(key, slice):
10791101
return self._getitem_slice(key)
10801102
elif is_integer(key):
@@ -1094,17 +1116,20 @@ def __getitem__(self, key):
10941116
)
10951117
elif com.is_bool_indexer(key):
10961118
if isinstance(getattr(key, "dtype", None), ExtensionDtype):
1097-
np_key = key.to_numpy(dtype=bool, na_value=False)
1119+
key = key.to_numpy(dtype=bool, na_value=False)
10981120
else:
1099-
np_key = np.asarray(key, dtype=bool)
1100-
check_array_indexer(self._range, np_key) # type: ignore[arg-type]
1121+
key = np.asarray(key, dtype=bool)
1122+
check_array_indexer(self._range, key) # type: ignore[arg-type]
11011123
# Short circuit potential _shallow_copy check
1102-
if np_key.all():
1124+
if key.all():
11031125
return self._simple_new(self._range, name=self.name)
1104-
elif not np_key.any():
1126+
elif not key.any():
11051127
return self._simple_new(_empty_range, name=self.name)
1106-
return self.take(np.flatnonzero(np_key))
1107-
return super().__getitem__(key)
1128+
key = np.flatnonzero(key)
1129+
try:
1130+
return self.take(key)
1131+
except (TypeError, ValueError):
1132+
return super().__getitem__(key)
11081133

11091134
def _getitem_slice(self, slobj: slice) -> Self:
11101135
"""

pandas/core/internals/__init__.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ def __getattr__(name: str):
3535
return create_block_manager_from_blocks
3636

3737
if name in [
38-
"NumericBlock",
39-
"ObjectBlock",
4038
"Block",
4139
"ExtensionBlock",
4240
"DatetimeTZBlock",
@@ -49,25 +47,17 @@ def __getattr__(name: str):
4947
# on hard-coding stacklevel
5048
stacklevel=2,
5149
)
52-
if name == "NumericBlock":
53-
from pandas.core.internals.blocks import NumericBlock
54-
55-
return NumericBlock
56-
elif name == "DatetimeTZBlock":
50+
if name == "DatetimeTZBlock":
5751
from pandas.core.internals.blocks import DatetimeTZBlock
5852

5953
return DatetimeTZBlock
6054
elif name == "ExtensionBlock":
6155
from pandas.core.internals.blocks import ExtensionBlock
6256

6357
return ExtensionBlock
64-
elif name == "Block":
58+
else:
6559
from pandas.core.internals.blocks import Block
6660

6761
return Block
68-
else:
69-
from pandas.core.internals.blocks import ObjectBlock
70-
71-
return ObjectBlock
7262

7363
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")

pandas/core/internals/blocks.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2148,18 +2148,6 @@ def is_numeric(self) -> bool: # type: ignore[override]
21482148
return kind in "fciub"
21492149

21502150

2151-
class NumericBlock(NumpyBlock):
2152-
# this Block type is kept for backwards-compatibility
2153-
# TODO(3.0): delete and remove deprecation in __init__.py.
2154-
__slots__ = ()
2155-
2156-
2157-
class ObjectBlock(NumpyBlock):
2158-
# this Block type is kept for backwards-compatibility
2159-
# TODO(3.0): delete and remove deprecation in __init__.py.
2160-
__slots__ = ()
2161-
2162-
21632151
class NDArrayBackedExtensionBlock(EABackedBlock):
21642152
"""
21652153
Block backed by an NDArrayBackedExtensionArray

pandas/core/methods/selectn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def compute(self, method: str) -> DataFrame:
213213
f"cannot use method {method!r} with this dtype"
214214
)
215215

216-
def get_indexer(current_indexer, other_indexer):
216+
def get_indexer(current_indexer: Index, other_indexer: Index) -> Index:
217217
"""
218218
Helper function to concat `current_indexer` and `other_indexer`
219219
depending on `method`

0 commit comments

Comments
 (0)