Skip to content

Commit 85093a6

Browse files
authored
Merge branch 'main' into enh_cumsum_for_np_str
2 parents 2fd9779 + d4dff29 commit 85093a6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+272
-198
lines changed

doc/source/development/contributing_gitpod.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ development experience:
109109

110110
* `VSCode rst extension <https://marketplace.visualstudio.com/items?itemName=lextudio.restructuredtext>`_
111111
* `Markdown All in One <https://marketplace.visualstudio.com/items?itemName=yzhang.markdown-all-in-one>`_
112-
* `VSCode Gitlens extension <https://marketplace.visualstudio.com/items?itemName=eamodio.gitlens>`_
112+
* `VSCode GitLens extension <https://marketplace.visualstudio.com/items?itemName=eamodio.gitlens>`_
113113
* `VSCode Git Graph extension <https://marketplace.visualstudio.com/items?itemName=mhutchie.git-graph>`_
114114

115115
Development workflow with Gitpod

doc/source/whatsnew/v2.3.0.rst

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ Other enhancements
3737
updated to work correctly with NumPy >= 2 (:issue:`57739`)
3838
- :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`)
3939
- :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype`` (:issue:`60663`)
40-
- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`, :issue:`60633`)
40+
- The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`60940`)
41+
- The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`)
4142
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
4243

4344
.. ---------------------------------------------------------------------------
@@ -53,6 +54,16 @@ These are bug fixes that might have notable behavior changes.
5354
notable_bug_fix1
5455
^^^^^^^^^^^^^^^^
5556

57+
.. _whatsnew_230.api_changes:
58+
59+
API changes
60+
~~~~~~~~~~~
61+
62+
- When enabling the ``future.infer_string`` option: Index set operations (like
63+
union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or
64+
empty ``Index`` with object dtype when determining the dtype of the resulting
65+
Index (:issue:`60797`)
66+
5667
.. ---------------------------------------------------------------------------
5768
.. _whatsnew_230.deprecations:
5869

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,9 @@ Other API changes
361361
- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
362362
- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)
363363
- when comparing the indexes in :func:`testing.assert_series_equal`, check_exact defaults to True if an :class:`Index` is of integer dtypes. (:issue:`57386`)
364+
- Index set operations (like union or intersection) will now ignore the dtype of
365+
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
366+
the dtype of the resulting Index (:issue:`60797`)
364367

365368
.. ---------------------------------------------------------------------------
366369
.. _whatsnew_300.deprecations:
@@ -667,6 +670,7 @@ Conversion
667670
- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`)
668671
- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
669672
- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
673+
- Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`)
670674
- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
671675

672676
Strings

pandas/_libs/algos.pyx

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -818,33 +818,7 @@ def is_monotonic(const numeric_object_t[:] arr, bint timelike):
818818
if timelike and <int64_t>arr[0] == NPY_NAT:
819819
return False, False, False
820820

821-
if numeric_object_t is not object:
822-
with nogil:
823-
prev = arr[0]
824-
for i in range(1, n):
825-
cur = arr[i]
826-
if timelike and <int64_t>cur == NPY_NAT:
827-
is_monotonic_inc = 0
828-
is_monotonic_dec = 0
829-
break
830-
if cur < prev:
831-
is_monotonic_inc = 0
832-
elif cur > prev:
833-
is_monotonic_dec = 0
834-
elif cur == prev:
835-
is_unique = 0
836-
else:
837-
# cur or prev is NaN
838-
is_monotonic_inc = 0
839-
is_monotonic_dec = 0
840-
break
841-
if not is_monotonic_inc and not is_monotonic_dec:
842-
is_monotonic_inc = 0
843-
is_monotonic_dec = 0
844-
break
845-
prev = cur
846-
else:
847-
# object-dtype, identical to above except we cannot use `with nogil`
821+
with nogil(numeric_object_t is not object):
848822
prev = arr[0]
849823
for i in range(1, n):
850824
cur = arr[i]

pandas/_libs/hashtable_func_helper.pxi.in

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -415,20 +415,7 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
415415

416416
modes = np.empty(nkeys, dtype=values.dtype)
417417

418-
if htfunc_t is not object:
419-
with nogil:
420-
for k in range(nkeys):
421-
count = counts[k]
422-
if count == max_count:
423-
j += 1
424-
elif count > max_count:
425-
max_count = count
426-
j = 0
427-
else:
428-
continue
429-
430-
modes[j] = keys[k]
431-
else:
418+
with nogil(htfunc_t is not object):
432419
for k in range(nkeys):
433420
count = counts[k]
434421
if count == max_count:

pandas/_libs/internals.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def get_concat_blkno_indexers(list blknos_list not None):
502502
@cython.boundscheck(False)
503503
@cython.wraparound(False)
504504
def get_blkno_indexers(
505-
int64_t[:] blknos, bint group=True
505+
const int64_t[:] blknos, bint group=True
506506
) -> list[tuple[int, slice | np.ndarray]]:
507507
"""
508508
Enumerate contiguous runs of integers in ndarray.
@@ -596,8 +596,8 @@ def get_blkno_placements(blknos, group: bool = True):
596596
@cython.boundscheck(False)
597597
@cython.wraparound(False)
598598
cpdef update_blklocs_and_blknos(
599-
ndarray[intp_t, ndim=1] blklocs,
600-
ndarray[intp_t, ndim=1] blknos,
599+
const intp_t[:] blklocs,
600+
const intp_t[:] blknos,
601601
Py_ssize_t loc,
602602
intp_t nblocks,
603603
):

pandas/_libs/join.pyx

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,10 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
225225

226226
@cython.wraparound(False)
227227
@cython.boundscheck(False)
228-
cdef void _get_result_indexer(intp_t[::1] sorter, intp_t[::1] indexer) noexcept nogil:
228+
cdef void _get_result_indexer(
229+
const intp_t[::1] sorter,
230+
intp_t[::1] indexer,
231+
) noexcept nogil:
229232
"""NOTE: overwrites indexer with the result to avoid allocating another array"""
230233
cdef:
231234
Py_ssize_t i, n, idx
@@ -681,8 +684,8 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
681684
from pandas._libs.hashtable cimport Int64HashTable
682685

683686

684-
def asof_join_backward_on_X_by_Y(ndarray[numeric_t] left_values,
685-
ndarray[numeric_t] right_values,
687+
def asof_join_backward_on_X_by_Y(const numeric_t[:] left_values,
688+
const numeric_t[:] right_values,
686689
const int64_t[:] left_by_values,
687690
const int64_t[:] right_by_values,
688691
bint allow_exact_matches=True,
@@ -752,8 +755,8 @@ def asof_join_backward_on_X_by_Y(ndarray[numeric_t] left_values,
752755
return left_indexer, right_indexer
753756

754757

755-
def asof_join_forward_on_X_by_Y(ndarray[numeric_t] left_values,
756-
ndarray[numeric_t] right_values,
758+
def asof_join_forward_on_X_by_Y(const numeric_t[:] left_values,
759+
const numeric_t[:] right_values,
757760
const int64_t[:] left_by_values,
758761
const int64_t[:] right_by_values,
759762
bint allow_exact_matches=1,
@@ -824,8 +827,8 @@ def asof_join_forward_on_X_by_Y(ndarray[numeric_t] left_values,
824827
return left_indexer, right_indexer
825828

826829

827-
def asof_join_nearest_on_X_by_Y(ndarray[numeric_t] left_values,
828-
ndarray[numeric_t] right_values,
830+
def asof_join_nearest_on_X_by_Y(const numeric_t[:] left_values,
831+
const numeric_t[:] right_values,
829832
const int64_t[:] left_by_values,
830833
const int64_t[:] right_by_values,
831834
bint allow_exact_matches=True,

pandas/_libs/lib.pyx

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -981,16 +981,14 @@ def get_level_sorter(
981981

982982
@cython.boundscheck(False)
983983
@cython.wraparound(False)
984-
def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
984+
def count_level_2d(const uint8_t[:, :] mask,
985985
const intp_t[:] labels,
986986
Py_ssize_t max_bin,
987987
):
988988
cdef:
989-
Py_ssize_t i, j, k, n
989+
Py_ssize_t i, j, k = mask.shape[1], n = mask.shape[0]
990990
ndarray[int64_t, ndim=2] counts
991991

992-
n, k = (<object>mask).shape
993-
994992
counts = np.zeros((n, max_bin), dtype="i8")
995993
with nogil:
996994
for i in range(n):

pandas/_libs/reshape.pyx

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,7 @@ def unstack(const numeric_object_t[:, :] values, const uint8_t[:] mask,
4040
cdef:
4141
Py_ssize_t i, j, w, nulls, s, offset
4242

43-
if numeric_object_t is not object:
44-
# evaluated at compile-time
45-
with nogil:
46-
for i in range(stride):
47-
48-
nulls = 0
49-
for j in range(length):
50-
51-
for w in range(width):
52-
53-
offset = j * width + w
54-
55-
if mask[offset]:
56-
s = i * width + w
57-
new_values[j, s] = values[offset - nulls, i]
58-
new_mask[j, s] = 1
59-
else:
60-
nulls += 1
61-
62-
else:
63-
# object-dtype, identical to above but we cannot use nogil
43+
with nogil(numeric_object_t is not object):
6444
for i in range(stride):
6545

6646
nulls = 0

pandas/core/dtypes/cast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1113,7 +1113,7 @@ def convert_dtypes(
11131113
else:
11141114
inferred_dtype = input_array.dtype
11151115

1116-
if dtype_backend == "pyarrow":
1116+
if dtype_backend == "pyarrow" and not isinstance(inferred_dtype, ArrowDtype):
11171117
from pandas.core.arrays.arrow.array import to_pyarrow_type
11181118
from pandas.core.arrays.string_ import StringDtype
11191119

0 commit comments

Comments
 (0)