Skip to content

Commit 37b175a

Browse files
committed
TST: Add regression test for pyarrow datetime merge with duplicates
Add test for GH#61926 to ensure merge operations work correctly with pyarrow datetime columns when there are duplicate values on the right side. This was fixed by PR#62276 which improved Index._get_join_target handling for pyarrow datetime types.
1 parent b20d6ab commit 37b175a

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

pandas/tests/reshape/merge/test_merge.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3097,3 +3097,28 @@ def test_merge_categorical_key_recursion():
30973097
right.astype("float64"), on="key", how="outer"
30983098
)
30993099
tm.assert_frame_equal(result, expected)
3100+
3101+
3102+
def test_merge_pyarrow_datetime_duplicates():
3103+
# GH#61926
3104+
# Regression test for merge failing on pyarrow datetime columns with duplicates
3105+
pytest.importorskip("pyarrow")
3106+
3107+
# Create datetime index
3108+
t = pd.date_range("2025-07-06", periods=3, freq="h")
3109+
3110+
# Left dataframe: one row per timestamp
3111+
df1 = DataFrame({"time": t, "val1": [1, 2, 3]})
3112+
df1 = df1.convert_dtypes(dtype_backend="pyarrow")
3113+
3114+
# Right dataframe: two rows per timestamp (duplicates)
3115+
df2 = DataFrame({"time": t.repeat(2), "val2": [10, 20, 30, 40, 50, 60]})
3116+
df2 = df2.convert_dtypes(dtype_backend="pyarrow")
3117+
3118+
# This should work without raising ValueError
3119+
result = merge(df1, df2, on="time", how="left")
3120+
3121+
# Should return 6 rows (df1's 3 timestamps × 2 matches each from df2)
3122+
assert len(result) == 6
3123+
assert result["val1"].tolist() == [1, 1, 2, 2, 3, 3]
3124+
assert result["val2"].tolist() == [10, 20, 30, 40, 50, 60]

0 commit comments

Comments
 (0)