Skip to content

Commit dadf6fc

Browse files
authored
Merge branch 'main' into BUG-56994/pyarrow-assignment-unexpected-dtypes
2 parents 1805626 + d11ed2f commit dadf6fc

File tree

6 files changed

+73
-12
lines changed

6 files changed

+73
-12
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8989
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
9090
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
9191
-i "pandas.arrays.IntegerArray SA01" \
92-
-i "pandas.arrays.IntervalArray.left SA01" \
9392
-i "pandas.arrays.IntervalArray.length SA01" \
9493
-i "pandas.arrays.IntervalArray.right SA01" \
9594
-i "pandas.arrays.NumpyExtensionArray SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,7 @@ Reshaping
740740
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
741741
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
742742
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
743+
- Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`)
743744
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
744745
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
745746

pandas/core/arrays/interval.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,6 +1233,22 @@ def left(self) -> Index:
12331233
"""
12341234
Return the left endpoints of each Interval in the IntervalArray as an Index.
12351235
1236+
This property provides access to the left endpoints of the intervals
1237+
contained within the IntervalArray. This can be useful for analyses where
1238+
the starting point of each interval is of interest, such as in histogram
1239+
creation, data aggregation, or any scenario requiring the identification
1240+
of the beginning of defined ranges. This property returns a ``pandas.Index``
1241+
object containing the midpoint for each interval.
1242+
1243+
See Also
1244+
--------
1245+
arrays.IntervalArray.right : Return the right endpoints of each Interval in
1246+
the IntervalArray as an Index.
1247+
arrays.IntervalArray.mid : Return the midpoint of each Interval in the
1248+
IntervalArray as an Index.
1249+
arrays.IntervalArray.contains : Check elementwise if the Intervals contain
1250+
the value.
1251+
12361252
Examples
12371253
--------
12381254

pandas/core/generic.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3324,9 +3324,9 @@ def to_latex(
33243324
r"""
33253325
Render object to a LaTeX tabular, longtable, or nested table.
33263326
3327-
Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted
3327+
Requires ``\usepackage{booktabs}``. The output can be copy/pasted
33283328
into a main LaTeX document or read from an external file
3329-
with ``\input{{table.tex}}``.
3329+
with ``\input{table.tex}``.
33303330
33313331
.. versionchanged:: 2.0.0
33323332
Refactored to use the Styler implementation via jinja2 templating.
@@ -3344,13 +3344,13 @@ def to_latex(
33443344
Write row names (index).
33453345
na_rep : str, default 'NaN'
33463346
Missing data representation.
3347-
formatters : list of functions or dict of {{str: function}}, optional
3347+
formatters : list of functions or dict of {str: function}, optional
33483348
Formatter functions to apply to columns' elements by position or
33493349
name. The result of each function must be a unicode string.
33503350
List must be of length equal to the number of columns.
33513351
float_format : one-parameter function or str, optional, default None
33523352
Formatter for floating point numbers. For example
3353-
``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will
3353+
``float_format="%.2f"`` and ``float_format="{:0.2f}".format`` will
33543354
both result in 0.1234 being formatted as 0.12.
33553355
sparsify : bool, optional
33563356
Set to False for a DataFrame with a hierarchical index to print
@@ -3367,7 +3367,7 @@ def to_latex(
33673367
columns of numbers, which default to 'r'.
33683368
longtable : bool, optional
33693369
Use a longtable environment instead of tabular. Requires
3370-
adding a \usepackage{{longtable}} to your LaTeX preamble.
3370+
adding a \usepackage{longtable} to your LaTeX preamble.
33713371
By default, the value will be read from the pandas config
33723372
module, and set to `True` if the option ``styler.latex.environment`` is
33733373
`"longtable"`.
@@ -3405,7 +3405,7 @@ def to_latex(
34053405
default value to "r".
34063406
multirow : bool, default True
34073407
Use \multirow to enhance MultiIndex rows. Requires adding a
3408-
\usepackage{{multirow}} to your LaTeX preamble. Will print
3408+
\usepackage{multirow} to your LaTeX preamble. Will print
34093409
centered labels (instead of top-aligned) across the contained
34103410
rows, separating groups via clines. The default will be read
34113411
from the pandas config module, and is set as the option
@@ -3416,15 +3416,15 @@ def to_latex(
34163416
default value to `True`.
34173417
caption : str or tuple, optional
34183418
Tuple (full_caption, short_caption),
3419-
which results in ``\caption[short_caption]{{full_caption}}``;
3419+
which results in ``\caption[short_caption]{full_caption}``;
34203420
if a single string is passed, no short caption will be set.
34213421
label : str, optional
3422-
The LaTeX label to be placed inside ``\label{{}}`` in the output.
3423-
This is used with ``\ref{{}}`` in the main ``.tex`` file.
3422+
The LaTeX label to be placed inside ``\label{}`` in the output.
3423+
This is used with ``\ref{}`` in the main ``.tex`` file.
34243424
34253425
position : str, optional
34263426
The LaTeX positional argument for tables, to be placed after
3427-
``\begin{{}}`` in the output.
3427+
``\begin{}`` in the output.
34283428
34293429
Returns
34303430
-------

pandas/core/reshape/merge.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,17 @@
123123

124124
# See https://github.com/pandas-dev/pandas/issues/52451
125125
if np.intc is not np.int32:
126-
_factorizers[np.intc] = libhashtable.Int64Factorizer
126+
if np.dtype(np.intc).itemsize == 4:
127+
_factorizers[np.intc] = libhashtable.Int32Factorizer
128+
else:
129+
_factorizers[np.intc] = libhashtable.Int64Factorizer
130+
131+
if np.uintc is not np.uint32:
132+
if np.dtype(np.uintc).itemsize == 4:
133+
_factorizers[np.uintc] = libhashtable.UInt32Factorizer
134+
else:
135+
_factorizers[np.uintc] = libhashtable.UInt64Factorizer
136+
127137

128138
_known = (np.ndarray, ExtensionArray, Index, ABCSeries)
129139

pandas/tests/reshape/merge/test_merge.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,6 +1843,41 @@ def test_merge_empty(self, left_empty, how, exp):
18431843

18441844
tm.assert_frame_equal(result, expected)
18451845

1846+
def test_merge_with_uintc_columns(self):
1847+
df1 = DataFrame({"a": ["foo", "bar"], "b": np.array([1, 2], dtype=np.uintc)})
1848+
df2 = DataFrame({"a": ["foo", "baz"], "b": np.array([3, 4], dtype=np.uintc)})
1849+
result = df1.merge(df2, how="outer")
1850+
expected = DataFrame(
1851+
{
1852+
"a": ["bar", "baz", "foo", "foo"],
1853+
"b": np.array([2, 4, 1, 3], dtype=np.uintc),
1854+
}
1855+
)
1856+
tm.assert_frame_equal(result.reset_index(drop=True), expected)
1857+
1858+
def test_merge_with_intc_columns(self):
1859+
df1 = DataFrame({"a": ["foo", "bar"], "b": np.array([1, 2], dtype=np.intc)})
1860+
df2 = DataFrame({"a": ["foo", "baz"], "b": np.array([3, 4], dtype=np.intc)})
1861+
result = df1.merge(df2, how="outer")
1862+
expected = DataFrame(
1863+
{
1864+
"a": ["bar", "baz", "foo", "foo"],
1865+
"b": np.array([2, 4, 1, 3], dtype=np.intc),
1866+
}
1867+
)
1868+
tm.assert_frame_equal(result.reset_index(drop=True), expected)
1869+
1870+
def test_merge_intc_non_monotonic(self):
1871+
df = DataFrame({"join_key": Series([0, 2, 1], dtype=np.intc)})
1872+
df_details = DataFrame(
1873+
{"join_key": Series([0, 1, 2], dtype=np.intc), "value": ["a", "b", "c"]}
1874+
)
1875+
merged = df.merge(df_details, on="join_key", how="left")
1876+
expected = DataFrame(
1877+
{"join_key": np.array([0, 2, 1], dtype=np.intc), "value": ["a", "c", "b"]}
1878+
)
1879+
tm.assert_frame_equal(merged.reset_index(drop=True), expected)
1880+
18461881

18471882
@pytest.fixture
18481883
def left():

0 commit comments

Comments
 (0)