Skip to content

Commit b3d4747

Browse files
committed
Merge branch 'main' into mutation
2 parents 0638f30 + 8fa78ec commit b3d4747

File tree

21 files changed

+349
-114
lines changed

21 files changed

+349
-114
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7171
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
7373
-i "pandas.MultiIndex.reorder_levels RT03,SA01" \
74-
-i "pandas.MultiIndex.to_frame RT03" \
7574
-i "pandas.NA SA01" \
7675
-i "pandas.NaT SA01" \
7776
-i "pandas.Period.freq GL08" \
@@ -81,27 +80,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8180
-i "pandas.Period.to_timestamp SA01" \
8281
-i "pandas.PeriodDtype SA01" \
8382
-i "pandas.PeriodDtype.freq SA01" \
84-
-i "pandas.PeriodIndex.day SA01" \
85-
-i "pandas.PeriodIndex.day_of_week SA01" \
86-
-i "pandas.PeriodIndex.day_of_year SA01" \
87-
-i "pandas.PeriodIndex.dayofweek SA01" \
88-
-i "pandas.PeriodIndex.dayofyear SA01" \
89-
-i "pandas.PeriodIndex.days_in_month SA01" \
90-
-i "pandas.PeriodIndex.daysinmonth SA01" \
91-
-i "pandas.PeriodIndex.from_fields PR07,SA01" \
92-
-i "pandas.PeriodIndex.from_ordinals SA01" \
93-
-i "pandas.PeriodIndex.hour SA01" \
94-
-i "pandas.PeriodIndex.is_leap_year SA01" \
95-
-i "pandas.PeriodIndex.minute SA01" \
96-
-i "pandas.PeriodIndex.month SA01" \
97-
-i "pandas.PeriodIndex.quarter SA01" \
98-
-i "pandas.PeriodIndex.qyear GL08" \
99-
-i "pandas.PeriodIndex.second SA01" \
100-
-i "pandas.PeriodIndex.to_timestamp RT03,SA01" \
101-
-i "pandas.PeriodIndex.week SA01" \
102-
-i "pandas.PeriodIndex.weekday SA01" \
103-
-i "pandas.PeriodIndex.weekofyear SA01" \
104-
-i "pandas.PeriodIndex.year SA01" \
10583
-i "pandas.RangeIndex PR07" \
10684
-i "pandas.RangeIndex.from_range PR01,SA01" \
10785
-i "pandas.RangeIndex.start SA01" \
@@ -124,7 +102,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
124102
-i "pandas.Series.dt.month_name PR01,PR02" \
125103
-i "pandas.Series.dt.nanoseconds SA01" \
126104
-i "pandas.Series.dt.normalize PR01" \
127-
-i "pandas.Series.dt.qyear GL08" \
128105
-i "pandas.Series.dt.round PR01,PR02" \
129106
-i "pandas.Series.dt.seconds SA01" \
130107
-i "pandas.Series.dt.strftime PR01,PR02" \
@@ -134,12 +111,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
134111
-i "pandas.Series.dt.tz_localize PR01,PR02" \
135112
-i "pandas.Series.dt.unit GL08" \
136113
-i "pandas.Series.pad PR01,SA01" \
137-
-i "pandas.Series.sparse PR01,SA01" \
138114
-i "pandas.Series.sparse.fill_value SA01" \
139115
-i "pandas.Series.sparse.from_coo PR07,SA01" \
140116
-i "pandas.Series.sparse.npoints SA01" \
141117
-i "pandas.Series.sparse.sp_values SA01" \
142-
-i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
143118
-i "pandas.Timedelta.asm8 SA01" \
144119
-i "pandas.Timedelta.ceil SA01" \
145120
-i "pandas.Timedelta.components SA01" \
@@ -152,7 +127,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
152127
-i "pandas.Timedelta.to_timedelta64 SA01" \
153128
-i "pandas.Timedelta.total_seconds SA01" \
154129
-i "pandas.Timedelta.view SA01" \
155-
-i "pandas.TimedeltaIndex.as_unit RT03,SA01" \
156130
-i "pandas.TimedeltaIndex.components SA01" \
157131
-i "pandas.TimedeltaIndex.microseconds SA01" \
158132
-i "pandas.TimedeltaIndex.nanoseconds SA01" \
@@ -253,11 +227,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
253227
-i "pandas.core.resample.Resampler.sum SA01" \
254228
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
255229
-i "pandas.core.resample.Resampler.var SA01" \
256-
-i "pandas.core.window.expanding.Expanding.corr PR01" \
257-
-i "pandas.core.window.expanding.Expanding.count PR01" \
258-
-i "pandas.core.window.rolling.Rolling.max PR01" \
259-
-i "pandas.core.window.rolling.Window.std PR01" \
260-
-i "pandas.core.window.rolling.Window.var PR01" \
261230
-i "pandas.date_range RT03" \
262231
-i "pandas.errors.AbstractMethodError PR01,SA01" \
263232
-i "pandas.errors.AttributeConflictWarning SA01" \

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ Conversion
102102

103103
Strings
104104
^^^^^^^
105-
-
105+
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
106106
-
107107

108108
Interval

pandas/_libs/internals.pyx

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
from collections import defaultdict
2-
import weakref
32

43
cimport cython
4+
from cpython.object cimport PyObject
55
from cpython.pyport cimport PY_SSIZE_T_MAX
66
from cpython.slice cimport PySlice_GetIndicesEx
7+
from cpython.weakref cimport (
8+
PyWeakref_GetObject,
9+
PyWeakref_NewRef,
10+
)
711
from cython cimport Py_ssize_t
812

913
import numpy as np
@@ -26,6 +30,10 @@ from pandas._libs.util cimport (
2630
)
2731

2832

33+
cdef extern from "Python.h":
34+
PyObject* Py_None
35+
36+
2937
@cython.final
3038
@cython.freelist(32)
3139
cdef class BlockPlacement:
@@ -746,7 +754,7 @@ cdef class BlockManager:
746754
# -------------------------------------------------------------------
747755
# Block Placement
748756

749-
def _rebuild_blknos_and_blklocs(self) -> None:
757+
cpdef _rebuild_blknos_and_blklocs(self):
750758
"""
751759
Update mgr._blknos / mgr._blklocs.
752760
"""
@@ -890,27 +898,28 @@ cdef class BlockValuesRefs:
890898

891899
def __cinit__(self, blk: Block | None = None) -> None:
892900
if blk is not None:
893-
self.referenced_blocks = [weakref.ref(blk)]
901+
self.referenced_blocks = [PyWeakref_NewRef(blk, None)]
894902
else:
895903
self.referenced_blocks = []
896904
self.clear_counter = 500 # set reasonably high
897905

898-
def _clear_dead_references(self, force=False) -> None:
906+
cdef _clear_dead_references(self, bint force=False):
899907
# Use exponential backoff to decide when we want to clear references
900908
# if force=False. Clearing for every insertion causes slowdowns if
901909
# all these objects stay alive, e.g. df.items() for wide DataFrames
902910
# see GH#55245 and GH#55008
903911
if force or len(self.referenced_blocks) > self.clear_counter:
904912
self.referenced_blocks = [
905-
ref for ref in self.referenced_blocks if ref() is not None
913+
ref for ref in self.referenced_blocks
914+
if PyWeakref_GetObject(ref) != Py_None
906915
]
907916
nr_of_refs = len(self.referenced_blocks)
908917
if nr_of_refs < self.clear_counter // 2:
909918
self.clear_counter = max(self.clear_counter // 2, 500)
910919
elif nr_of_refs > self.clear_counter:
911920
self.clear_counter = max(self.clear_counter * 2, nr_of_refs)
912921

913-
def add_reference(self, blk: Block) -> None:
922+
cpdef add_reference(self, Block blk):
914923
"""Adds a new reference to our reference collection.
915924
916925
Parameters
@@ -919,7 +928,7 @@ cdef class BlockValuesRefs:
919928
The block that the new references should point to.
920929
"""
921930
self._clear_dead_references()
922-
self.referenced_blocks.append(weakref.ref(blk))
931+
self.referenced_blocks.append(PyWeakref_NewRef(blk, None))
923932

924933
def add_index_reference(self, index: object) -> None:
925934
"""Adds a new reference to our reference collection when creating an index.
@@ -930,7 +939,7 @@ cdef class BlockValuesRefs:
930939
The index that the new reference should point to.
931940
"""
932941
self._clear_dead_references()
933-
self.referenced_blocks.append(weakref.ref(index))
942+
self.referenced_blocks.append(PyWeakref_NewRef(index, None))
934943

935944
def has_reference(self) -> bool:
936945
"""Checks if block has foreign references.

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ class ArrowStringArrayMixin:
2323
def __init__(self, *args, **kwargs) -> None:
2424
raise NotImplementedError
2525

26+
def _convert_bool_result(self, result):
27+
# Convert a bool-dtype result to the appropriate result type
28+
raise NotImplementedError
29+
30+
def _convert_int_result(self, result):
31+
# Convert an integer-dtype result to the appropriate result type
32+
raise NotImplementedError
33+
2634
def _str_pad(
2735
self,
2836
width: int,

pandas/core/arrays/arrow/array.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2311,6 +2311,12 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
23112311
for chunk in self._pa_array.iterchunks()
23122312
]
23132313

2314+
def _convert_bool_result(self, result):
2315+
return type(self)(result)
2316+
2317+
def _convert_int_result(self, result):
2318+
return type(self)(result)
2319+
23142320
def _str_count(self, pat: str, flags: int = 0) -> Self:
23152321
if flags:
23162322
raise NotImplementedError(f"count not implemented with {flags=}")

0 commit comments

Comments
 (0)