Skip to content

Commit ee5ff56

Browse files
committed
Merge branch 'issue-60550-fix-v2' of https://github.com/Abhibhav2003/pandas-Abhibhav into issue-60550-fix-v2
2 parents d7f4177 + 0007ab4 commit ee5ff56

File tree

16 files changed

+157
-52
lines changed

16 files changed

+157
-52
lines changed

doc/source/getting_started/overview.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,4 @@ License
174174
-------
175175

176176
.. literalinclude:: ../../../LICENSE
177+
:language: none

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,8 +694,10 @@ Interval
694694
Indexing
695695
^^^^^^^^
696696
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
697+
- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`)
697698
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
698699
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
700+
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
699701
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
700702
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
701703

@@ -712,7 +714,7 @@ MultiIndex
712714
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
713715
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
714716
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
715-
-
717+
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
716718

717719
I/O
718720
^^^
@@ -787,6 +789,7 @@ Reshaping
787789
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
788790
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
789791
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
792+
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
790793

791794
Sparse
792795
^^^^^^

pandas/_libs/hashtable.pxd

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ cdef class HashTable:
4141

4242
cdef class UInt64HashTable(HashTable):
4343
cdef kh_uint64_t *table
44-
cdef int64_t na_position
44+
cdef Py_ssize_t na_position
4545
cdef bint uses_mask
4646

4747
cpdef get_item(self, uint64_t val)
@@ -51,7 +51,7 @@ cdef class UInt64HashTable(HashTable):
5151

5252
cdef class Int64HashTable(HashTable):
5353
cdef kh_int64_t *table
54-
cdef int64_t na_position
54+
cdef Py_ssize_t na_position
5555
cdef bint uses_mask
5656

5757
cpdef get_item(self, int64_t val)
@@ -61,7 +61,7 @@ cdef class Int64HashTable(HashTable):
6161

6262
cdef class UInt32HashTable(HashTable):
6363
cdef kh_uint32_t *table
64-
cdef int64_t na_position
64+
cdef Py_ssize_t na_position
6565
cdef bint uses_mask
6666

6767
cpdef get_item(self, uint32_t val)
@@ -71,7 +71,7 @@ cdef class UInt32HashTable(HashTable):
7171

7272
cdef class Int32HashTable(HashTable):
7373
cdef kh_int32_t *table
74-
cdef int64_t na_position
74+
cdef Py_ssize_t na_position
7575
cdef bint uses_mask
7676

7777
cpdef get_item(self, int32_t val)
@@ -81,7 +81,7 @@ cdef class Int32HashTable(HashTable):
8181

8282
cdef class UInt16HashTable(HashTable):
8383
cdef kh_uint16_t *table
84-
cdef int64_t na_position
84+
cdef Py_ssize_t na_position
8585
cdef bint uses_mask
8686

8787
cpdef get_item(self, uint16_t val)
@@ -91,7 +91,7 @@ cdef class UInt16HashTable(HashTable):
9191

9292
cdef class Int16HashTable(HashTable):
9393
cdef kh_int16_t *table
94-
cdef int64_t na_position
94+
cdef Py_ssize_t na_position
9595
cdef bint uses_mask
9696

9797
cpdef get_item(self, int16_t val)
@@ -101,7 +101,7 @@ cdef class Int16HashTable(HashTable):
101101

102102
cdef class UInt8HashTable(HashTable):
103103
cdef kh_uint8_t *table
104-
cdef int64_t na_position
104+
cdef Py_ssize_t na_position
105105
cdef bint uses_mask
106106

107107
cpdef get_item(self, uint8_t val)
@@ -111,7 +111,7 @@ cdef class UInt8HashTable(HashTable):
111111

112112
cdef class Int8HashTable(HashTable):
113113
cdef kh_int8_t *table
114-
cdef int64_t na_position
114+
cdef Py_ssize_t na_position
115115
cdef bint uses_mask
116116

117117
cpdef get_item(self, int8_t val)
@@ -121,7 +121,7 @@ cdef class Int8HashTable(HashTable):
121121

122122
cdef class Float64HashTable(HashTable):
123123
cdef kh_float64_t *table
124-
cdef int64_t na_position
124+
cdef Py_ssize_t na_position
125125
cdef bint uses_mask
126126

127127
cpdef get_item(self, float64_t val)
@@ -131,7 +131,7 @@ cdef class Float64HashTable(HashTable):
131131

132132
cdef class Float32HashTable(HashTable):
133133
cdef kh_float32_t *table
134-
cdef int64_t na_position
134+
cdef Py_ssize_t na_position
135135
cdef bint uses_mask
136136

137137
cpdef get_item(self, float32_t val)
@@ -141,7 +141,7 @@ cdef class Float32HashTable(HashTable):
141141

142142
cdef class Complex64HashTable(HashTable):
143143
cdef kh_complex64_t *table
144-
cdef int64_t na_position
144+
cdef Py_ssize_t na_position
145145
cdef bint uses_mask
146146

147147
cpdef get_item(self, complex64_t val)
@@ -151,7 +151,7 @@ cdef class Complex64HashTable(HashTable):
151151

152152
cdef class Complex128HashTable(HashTable):
153153
cdef kh_complex128_t *table
154-
cdef int64_t na_position
154+
cdef Py_ssize_t na_position
155155
cdef bint uses_mask
156156

157157
cpdef get_item(self, complex128_t val)

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ cdef class {{name}}HashTable(HashTable):
535535
int ret = 0
536536
{{c_type}} val
537537
khiter_t k
538-
int8_t na_position = self.na_position
538+
Py_ssize_t na_position = self.na_position
539539

540540
if self.uses_mask and mask is None:
541541
raise NotImplementedError # pragma: no cover
@@ -567,7 +567,7 @@ cdef class {{name}}HashTable(HashTable):
567567
Int64Vector self_locs = Int64Vector()
568568
Int64VectorData *l
569569
Int64VectorData *sl
570-
int8_t na_position = self.na_position
570+
Py_ssize_t na_position = self.na_position
571571

572572
l = &locs.data
573573
sl = &self_locs.data
@@ -609,7 +609,7 @@ cdef class {{name}}HashTable(HashTable):
609609
{{c_type}} val
610610
khiter_t k
611611
intp_t[::1] locs = np.empty(n, dtype=np.intp)
612-
int8_t na_position = self.na_position
612+
Py_ssize_t na_position = self.na_position
613613

614614
if self.uses_mask and mask is None:
615615
raise NotImplementedError # pragma: no cover

pandas/_libs/lib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def has_only_ints_or_nan(const floating[:] arr) -> bool:
502502
return True
503503

504504

505-
def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len):
505+
def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, intp_t max_len):
506506
cdef:
507507
Py_ssize_t i, n = len(indices)
508508
intp_t k, vstart, vlast, v

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1740,7 +1740,8 @@ cdef class _Timedelta(timedelta):
17401740
Format the Timedelta as ISO 8601 Duration.
17411741

17421742
``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the
1743-
values. See https://en.wikipedia.org/wiki/ISO_8601#Durations.
1743+
values. See Wikipedia:
1744+
`ISO 8601 § Durations <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_.
17441745

17451746
Returns
17461747
-------

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,7 @@ cdef class _Timestamp(ABCTimestamp):
13091309
By default, the fractional part is omitted if self.microsecond == 0
13101310
and self._nanosecond == 0.
13111311

1312-
If self.tzinfo is not None, the UTC offset is also attached, giving
1312+
If self.tzinfo is not None, the UTC offset is also attached,
13131313
giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmmnnn+HH:MM'.
13141314

13151315
Parameters

pandas/core/algorithms.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,8 @@ def map_array(
16471647
If the function returns a tuple with more than one element
16481648
a MultiIndex will be returned.
16491649
"""
1650+
from pandas import Index
1651+
16501652
if na_action not in (None, "ignore"):
16511653
msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
16521654
raise ValueError(msg)
@@ -1676,6 +1678,10 @@ def map_array(
16761678

16771679
if len(mapper) == 0:
16781680
mapper = Series(mapper, dtype=np.float64)
1681+
elif isinstance(mapper, dict):
1682+
mapper = Series(
1683+
mapper.values(), index=Index(mapper.keys(), tupleize_cols=False)
1684+
)
16791685
else:
16801686
mapper = Series(mapper)
16811687

pandas/core/frame.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10286,7 +10286,9 @@ def apply(
1028610286
either the DataFrame's index (``axis=0``) or the DataFrame's columns
1028710287
(``axis=1``). By default (``result_type=None``), the final return type
1028810288
is inferred from the return type of the applied function. Otherwise,
10289-
it depends on the `result_type` argument.
10289+
it depends on the `result_type` argument. The return type of the applied
10290+
function is inferred based on the first computed result obtained after
10291+
applying the function to a Series object.
1029010292
1029110293
Parameters
1029210294
----------

pandas/core/indexes/multi.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Sequence,
1010
)
1111
from functools import wraps
12+
from itertools import zip_longest
1213
from sys import getsizeof
1314
from typing import (
1415
TYPE_CHECKING,
@@ -588,7 +589,7 @@ def from_tuples(
588589
elif isinstance(tuples, list):
589590
arrays = list(lib.to_object_array_tuples(tuples).T)
590591
else:
591-
arrs = zip(*tuples)
592+
arrs = zip_longest(*tuples, fillvalue=np.nan)
592593
arrays = cast(list[Sequence[Hashable]], arrs)
593594

594595
return cls.from_arrays(arrays, sortorder=sortorder, names=names)

0 commit comments

Comments
 (0)