@@ -3101,24 +3101,23 @@ def test_merge_categorical_key_recursion():
3101
3101
3102
3102
def test_merge_pyarrow_datetime_duplicates ():
3103
3103
# GH#61926
3104
- # Regression test for merge failing on pyarrow datetime columns with duplicates
3105
3104
pytest .importorskip ("pyarrow" )
3106
3105
3107
- # Create datetime index
3108
3106
t = pd .date_range ("2025-07-06" , periods = 3 , freq = "h" )
3109
-
3110
- # Left dataframe: one row per timestamp
3111
3107
df1 = DataFrame ({"time" : t , "val1" : [1 , 2 , 3 ]})
3112
3108
df1 = df1 .convert_dtypes (dtype_backend = "pyarrow" )
3113
3109
3114
- # Right dataframe: two rows per timestamp (duplicates)
3115
3110
df2 = DataFrame ({"time" : t .repeat (2 ), "val2" : [10 , 20 , 30 , 40 , 50 , 60 ]})
3116
3111
df2 = df2 .convert_dtypes (dtype_backend = "pyarrow" )
3117
3112
3118
- # This should work without raising ValueError
3119
3113
result = merge (df1 , df2 , on = "time" , how = "left" )
3120
3114
3121
- # Should return 6 rows (df1's 3 timestamps x 2 matches each from df2)
3122
- assert len (result ) == 6
3123
- assert result ["val1" ].tolist () == [1 , 1 , 2 , 2 , 3 , 3 ]
3124
- assert result ["val2" ].tolist () == [10 , 20 , 30 , 40 , 50 , 60 ]
3115
+ expected = DataFrame (
3116
+ {
3117
+ "time" : t .repeat (2 ),
3118
+ "val1" : [1 , 1 , 2 , 2 , 3 , 3 ],
3119
+ "val2" : [10 , 20 , 30 , 40 , 50 , 60 ],
3120
+ }
3121
+ )
3122
+ expected = expected .convert_dtypes (dtype_backend = "pyarrow" )
3123
+ tm .assert_frame_equal (result , expected )
0 commit comments