@@ -162,21 +162,28 @@ def test_merge_index_singlekey_inner(self):
162162 {
163163 "key" : ["a" , "b" , "c" , "d" , "e" , "e" , "a" ],
164164 "v1" : np .random .default_rng (2 ).standard_normal (7 ),
165+ "left_index" : range (7 ),
165166 }
166167 )
168+
167169 right = DataFrame (
168170 {"v2" : np .random .default_rng (2 ).standard_normal (4 )},
169171 index = ["d" , "b" , "c" , "a" ],
170172 )
171173
172174 # inner join
173175 result = merge (left , right , left_on = "key" , right_index = True , how = "inner" )
174- expected = left .join (right , on = "key" ).loc [result .index ]
175- tm .assert_frame_equal (result , expected )
176+ expected = left .join (right , on = "key" ).loc [result ["left_index" ]]
177+ tm .assert_frame_equal (
178+ result .reset_index (drop = True ), expected .reset_index (drop = True )
179+ )
176180
177181 result = merge (right , left , right_on = "key" , left_index = True , how = "inner" )
178- expected = left .join (right , on = "key" ).loc [result .index ]
179- tm .assert_frame_equal (result , expected .loc [:, result .columns ])
182+ expected = left .join (right , on = "key" ).loc [result ["left_index" ]]
183+ tm .assert_frame_equal (
184+ result .reset_index (drop = True ),
185+ expected .loc [:, result .columns ].reset_index (drop = True ),
186+ )
180187
181188 def test_merge_misspecified (self , df , df2 , left ):
182189 right = DataFrame (
@@ -349,8 +356,9 @@ def test_handle_join_key_pass_array(self):
349356 right = DataFrame ({"rvalue" : np .arange (6 )})
350357
351358 key = np .array ([0 , 1 , 1 , 2 , 2 , 3 ], dtype = np .int64 )
359+ index = np .array ([0 , 1 , 1 , 2 , 2 , np .nan ], dtype = np .float64 )
352360 merged = merge (left , right , left_index = True , right_on = key , how = "outer" )
353- tm .assert_series_equal (merged ["key_0" ], Series (key , name = "key_0" ))
361+ tm .assert_series_equal (merged ["key_0" ], Series (key , index = index , name = "key_0" ))
354362
355363 def test_no_overlap_more_informative_error (self ):
356364 dt = datetime .now ()
@@ -453,6 +461,9 @@ def test_merge_left_empty_right_notempty(self):
453461 )
454462 exp_in = exp_out [0 :0 ] # make empty DataFrame keeping dtype
455463
464+ exp_nan = exp_out .copy ()
465+ exp_nan .index = [np .nan ] * 3
466+
456467 def check1 (exp , kwarg ):
457468 result = merge (left , right , how = "inner" , ** kwarg )
458469 tm .assert_frame_equal (result , exp )
@@ -465,12 +476,13 @@ def check2(exp, kwarg):
465476 result = merge (left , right , how = "outer" , ** kwarg )
466477 tm .assert_frame_equal (result , exp )
467478
468- for kwarg in [
469- {"left_index" : True , "right_index" : True },
470- {"left_index" : True , "right_on" : "x" },
471- ]:
472- check1 (exp_in , kwarg )
473- check2 (exp_out , kwarg )
479+ kwarg = {"left_index" : True , "right_on" : "x" }
480+ check1 (exp_in , kwarg )
481+ check2 (exp_nan , kwarg )
482+
483+ kwarg = {"left_index" : True , "right_index" : True }
484+ check1 (exp_in , kwarg )
485+ check2 (exp_out , kwarg )
474486
475487 kwarg = {"left_on" : "a" , "right_index" : True }
476488 check1 (exp_in , kwarg )
@@ -762,6 +774,7 @@ def test_other_datetime_unit(self, unit):
762774 "days" : days ,
763775 },
764776 columns = ["entity_id" , "days" ],
777+ index = [101 , 102 ],
765778 )
766779 assert exp ["days" ].dtype == exp_dtype
767780 tm .assert_frame_equal (result , exp )
@@ -789,6 +802,7 @@ def test_other_timedelta_unit(self, unit):
789802 exp = DataFrame (
790803 {"entity_id" : [101 , 102 ], "days" : np .array (["nat" , "nat" ], dtype = dtype )},
791804 columns = ["entity_id" , "days" ],
805+ index = [101 , 102 ],
792806 )
793807 tm .assert_frame_equal (result , exp )
794808
@@ -1190,7 +1204,7 @@ def test_validation(self):
11901204 "c" : ["meow" , "bark" , "um... weasel noise?" , "nay" ],
11911205 },
11921206 columns = ["b" , "a" , "c" ],
1193- index = range ( 4 ),
1207+ index = Index ([ "a" , "b" , "c" , "d" ], name = "a" ),
11941208 )
11951209
11961210 left_index_reset = left .set_index ("a" )
@@ -1331,48 +1345,17 @@ def test_merge_two_empty_df_no_division_error(self):
13311345
13321346 @pytest .mark .parametrize ("how" , ["right" , "outer" ])
13331347 @pytest .mark .parametrize (
1334- "index,expected_index " ,
1348+ "index" ,
13351349 [
1336- (
1337- CategoricalIndex ([1 , 2 , 4 ]),
1338- CategoricalIndex ([1 , 2 , 4 , None , None , None ]),
1339- ),
1340- (
1341- DatetimeIndex (
1342- ["2001-01-01" , "2002-02-02" , "2003-03-03" ], dtype = "M8[ns]"
1343- ),
1344- DatetimeIndex (
1345- ["2001-01-01" , "2002-02-02" , "2003-03-03" , pd .NaT , pd .NaT , pd .NaT ],
1346- dtype = "M8[ns]" ,
1347- ),
1348- ),
1349- * [
1350- (
1351- Index ([1 , 2 , 3 ], dtype = dtyp ),
1352- Index ([1 , 2 , 3 , None , None , None ], dtype = np .float64 ),
1353- )
1354- for dtyp in tm .ALL_REAL_NUMPY_DTYPES
1355- ],
1356- (
1357- IntervalIndex .from_tuples ([(1 , 2 ), (2 , 3 ), (3 , 4 )]),
1358- IntervalIndex .from_tuples (
1359- [(1 , 2 ), (2 , 3 ), (3 , 4 ), np .nan , np .nan , np .nan ]
1360- ),
1361- ),
1362- (
1363- PeriodIndex (["2001-01-01" , "2001-01-02" , "2001-01-03" ], freq = "D" ),
1364- PeriodIndex (
1365- ["2001-01-01" , "2001-01-02" , "2001-01-03" , pd .NaT , pd .NaT , pd .NaT ],
1366- freq = "D" ,
1367- ),
1368- ),
1369- (
1370- TimedeltaIndex (["1D" , "2D" , "3D" ]),
1371- TimedeltaIndex (["1D" , "2D" , "3D" , pd .NaT , pd .NaT , pd .NaT ]),
1372- ),
1350+ CategoricalIndex ([1 , 2 , 4 ]),
1351+ DatetimeIndex (["2001-01-01" , "2002-02-02" , "2003-03-03" ], dtype = "M8[ns]" ),
1352+ * [Index ([1 , 2 , 3 ], dtype = dtyp ) for dtyp in tm .ALL_REAL_NUMPY_DTYPES ],
1353+ IntervalIndex .from_tuples ([(1 , 2 ), (2 , 3 ), (3 , 4 )]),
1354+ PeriodIndex (["2001-01-01" , "2001-01-02" , "2001-01-03" ], freq = "D" ),
1355+ TimedeltaIndex (["1D" , "2D" , "3D" ]),
13731356 ],
13741357 )
1375- def test_merge_on_index_with_more_values (self , how , index , expected_index ):
1358+ def test_merge_on_index_with_more_values (self , how , index ):
13761359 # GH 24212
13771360 # pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
13781361 # -1 is interpreted as a missing value instead of the last element
@@ -1390,20 +1373,17 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
13901373 ],
13911374 columns = ["a" , "key" , "b" ],
13921375 )
1393- expected .set_index (expected_index , inplace = True )
13941376 tm .assert_frame_equal (result , expected )
13951377
13961378 def test_merge_right_index_right (self ):
1397- # Note: the expected output here is probably incorrect.
1398- # See https://github.com/pandas-dev/pandas/issues/17257 for more.
1399- # We include this as a regression test for GH-24897.
1379+ # Regression test for GH-24897.
14001380 left = DataFrame ({"a" : [1 , 2 , 3 ], "key" : [0 , 1 , 1 ]})
14011381 right = DataFrame ({"b" : [1 , 2 , 3 ]})
14021382
14031383 expected = DataFrame (
14041384 {"a" : [1 , 2 , 3 , None ], "key" : [0 , 1 , 1 , 2 ], "b" : [1 , 2 , 2 , 3 ]},
14051385 columns = ["a" , "key" , "b" ],
1406- index = [0 , 1 , 2 , np . nan ],
1386+ index = [0 , 1 , 1 , 2 ],
14071387 )
14081388 result = left .merge (right , left_on = "key" , right_index = True , how = "right" )
14091389 tm .assert_frame_equal (result , expected )
@@ -1436,7 +1416,7 @@ def test_merge_take_missing_values_from_index_of_other_dtype(self):
14361416 "key" : Categorical (["a" , "a" , "b" , "c" ]),
14371417 "b" : [1 , 1 , 2 , 3 ],
14381418 },
1439- index = [ 0 , 1 , 2 , np . nan ] ,
1419+ index = Categorical ([ "a" , "a" , "b" , "c" ], categories = list ( "abc" )) ,
14401420 )
14411421 expected = expected .reindex (columns = ["a" , "key" , "b" ])
14421422 tm .assert_frame_equal (result , expected )
@@ -2661,7 +2641,8 @@ def test_merge_right_left_index():
26612641 "z_x" : ["foo" , "foo" ],
26622642 "x_y" : [1 , 1 ],
26632643 "z_y" : ["foo" , "foo" ],
2664- }
2644+ },
2645+ index = [1 , 1 ],
26652646 )
26662647 tm .assert_frame_equal (result , expected )
26672648
@@ -2670,7 +2651,7 @@ def test_merge_result_empty_index_and_on():
26702651 # GH#33814
26712652 df1 = DataFrame ({"a" : [1 ], "b" : [2 ]}).set_index (["a" , "b" ])
26722653 df2 = DataFrame ({"b" : [1 ]}).set_index (["b" ])
2673- expected = DataFrame ({"a" : [], " b" : []}, dtype = np .int64 ).set_index (["a" , "b" ])
2654+ expected = DataFrame ({"b" : []}, dtype = np .int64 ).set_index (["b" ])
26742655 result = merge (df1 , df2 , left_on = ["b" ], right_index = True )
26752656 tm .assert_frame_equal (result , expected )
26762657
@@ -2850,7 +2831,9 @@ def test_merge_multiindex_single_level():
28502831 data = {"b" : [100 ]},
28512832 index = MultiIndex .from_tuples ([("A" ,), ("C" ,)], names = ["col" ]),
28522833 )
2853- expected = DataFrame ({"col" : ["A" , "B" ], "b" : [100 , np .nan ]})
2834+ expected = DataFrame (
2835+ {"col" : ["A" , "B" ], "b" : [100 , np .nan ]}, index = Index ([("A" ,), np .nan ])
2836+ )
28542837
28552838 result = df .merge (df2 , left_on = ["col" ], right_index = True , how = "left" )
28562839 tm .assert_frame_equal (result , expected )
@@ -2957,14 +2940,20 @@ def test_merge_ea_int_and_float_numpy():
29572940 tm .assert_frame_equal (result , expected .astype ("float64" ))
29582941
29592942
2943+ from pandas .core .dtypes .missing import na_value_for_dtype
2944+
2945+
29602946def test_merge_arrow_string_index (any_string_dtype ):
29612947 # GH#54894
29622948 pytest .importorskip ("pyarrow" )
29632949 left = DataFrame ({"a" : ["a" , "b" ]}, dtype = any_string_dtype )
29642950 right = DataFrame ({"b" : 1 }, index = Index (["a" , "c" ], dtype = any_string_dtype ))
29652951 result = left .merge (right , left_on = "a" , right_index = True , how = "left" )
29662952 expected = DataFrame (
2967- {"a" : Series (["a" , "b" ], dtype = any_string_dtype ), "b" : [1 , np .nan ]}
2953+ {"a" : Series (["a" , "b" ], dtype = any_string_dtype ), "b" : [1.0 , np .nan ]},
2954+ )
2955+ expected .index = Index (["a" ], dtype = any_string_dtype ).append (
2956+ Index ([na_value_for_dtype (any_string_dtype )])
29682957 )
29692958 tm .assert_frame_equal (result , expected )
29702959
@@ -3022,3 +3011,12 @@ def test_merge_on_all_nan_column():
30223011 {"x" : [1 , 2 , 3 ], "y" : [np .nan , np .nan , np .nan ], "z" : [4 , 5 , 6 ], "zz" : [4 , 5 , 6 ]}
30233012 )
30243013 tm .assert_frame_equal (result , expected )
3014+
3015+
3016+ def test_merge_index ():
3017+ # GH 57291
3018+ dfa = DataFrame (range (10 ), columns = ["a" ])
3019+ dfb = DataFrame ({"b" : range (5 ), "key" : [5 + x for x in range (5 )]})
3020+
3021+ result = dfa .merge (dfb , left_index = True , right_on = "key" , how = "left" )
3022+ tm .assert_index_equal (result .index , dfa .index )
0 commit comments