@@ -347,6 +347,51 @@ def merge(
347347 2 bar 7
348348 3 bar 8
349349 """
350+ return merge_pick_index (
351+ left ,
352+ right ,
353+ how ,
354+ on ,
355+ left_on ,
356+ right_on ,
357+ left_index ,
358+ right_index ,
359+ sort ,
360+ suffixes ,
361+ copy ,
362+ indicator ,
363+ validate ,
364+ )
365+
366+
367+ def merge_pick_index (
368+ left : DataFrame | Series ,
369+ right : DataFrame | Series ,
370+ how : MergeHow = "inner" ,
371+ on : IndexLabel | AnyArrayLike | None = None ,
372+ left_on : IndexLabel | AnyArrayLike | None = None ,
373+ right_on : IndexLabel | AnyArrayLike | None = None ,
374+ left_index : bool = False ,
375+ right_index : bool = False ,
376+ sort : bool = False ,
377+ suffixes : Suffixes = ("_x" , "_y" ),
378+ copy : bool | lib .NoDefault = lib .no_default ,
379+ indicator : str | bool = False ,
380+ validate : str | None = None ,
381+ index : Literal ["left" , "right" , "reset" ] | None = None ,
382+ ) -> DataFrame :
383+ """A helper function for merge that returns a specified index.
384+
385+ If index is "left" or "right" then the returned DataFrame will
386+ use the index from the left or right DataFrames respectively.
387+
388+ If index is "reset" then the DataFrame will have the default
389+ index: zero for the first row, one for the second, etc.
390+
391+ If index is None then the value will be inferred based on the
392+ merge. If merging on both indexes then None is the only accepted
393+ value.
394+ """
350395 left_df = _validate_operand (left )
351396 left ._check_copy_deprecation (copy )
352397 right_df = _validate_operand (right )
@@ -378,6 +423,7 @@ def merge(
378423 suffixes = suffixes ,
379424 indicator = indicator ,
380425 validate = validate ,
426+ index = index ,
381427 )
382428 return op .get_result ()
383429
@@ -932,6 +978,7 @@ class _MergeOperation:
932978 join_names : list [Hashable ]
933979 right_join_keys : list [ArrayLike ]
934980 left_join_keys : list [ArrayLike ]
981+ index : Literal ["left" , "right" , "reset" ] | None
935982
936983 def __init__ (
937984 self ,
@@ -947,6 +994,7 @@ def __init__(
947994 suffixes : Suffixes = ("_x" , "_y" ),
948995 indicator : str | bool = False ,
949996 validate : str | None = None ,
997+ index : Literal ["left" , "right" , "reset" ] | None = None ,
950998 ) -> None :
951999 _left = _validate_operand (left )
9521000 _right = _validate_operand (right )
@@ -964,6 +1012,29 @@ def __init__(
9641012
9651013 self .indicator = indicator
9661014
1015+ # Identify which index will be used for the output
1016+ if self .left_index and self .right_index and self .how != "asof" :
1017+ if index is not None :
1018+ raise ValueError (
1019+ f'Index "{ index } " is not supported for merges on both indexes.'
1020+ )
1021+ elif self .right_index :
1022+ if len (self .left ) > 0 :
1023+ index = "left"
1024+ else :
1025+ index = "right"
1026+ elif self .left_index :
1027+ if self .how == "asof" :
1028+ index = "left"
1029+ elif len (self .right ) > 0 :
1030+ index = "right"
1031+ else :
1032+ index = "left"
1033+ else :
1034+ index = "reset"
1035+
1036+ self .index = index
1037+
9671038 if not is_bool (left_index ):
9681039 raise ValueError (
9691040 f"left_index parameter must be of type bool, not { type (left_index )} "
@@ -1341,53 +1412,32 @@ def _get_join_info(
13411412 )
13421413
13431414 elif self .right_index and self .how == "left" :
1344- join_index , left_indexer , right_indexer = _left_join_on_index (
1415+ left_indexer , right_indexer = _left_join_on_index (
13451416 left_ax , right_ax , self .left_join_keys , sort = self .sort
13461417 )
13471418
13481419 elif self .left_index and self .how == "right" :
1349- join_index , right_indexer , left_indexer = _left_join_on_index (
1420+ right_indexer , left_indexer = _left_join_on_index (
13501421 right_ax , left_ax , self .right_join_keys , sort = self .sort
13511422 )
13521423 else :
1353- ( left_indexer , right_indexer ) = self ._get_join_indexers ()
1424+ left_indexer , right_indexer = self ._get_join_indexers ()
13541425
1355- if self .right_index :
1356- if len (self .left ) > 0 :
1357- join_index = self ._create_join_index (
1358- left_ax ,
1359- right_ax ,
1360- left_indexer ,
1361- how = "right" ,
1362- )
1363- elif right_indexer is None :
1364- join_index = right_ax .copy ()
1365- else :
1366- join_index = right_ax .take (right_indexer )
1367- elif self .left_index :
1368- if self .how == "asof" :
1369- # GH#33463 asof should always behave like a left merge
1370- join_index = self ._create_join_index (
1371- left_ax ,
1372- right_ax ,
1373- left_indexer ,
1374- how = "left" ,
1375- )
1376-
1377- elif len (self .right ) > 0 :
1378- join_index = self ._create_join_index (
1379- right_ax ,
1380- left_ax ,
1381- right_indexer ,
1382- how = "left" ,
1383- )
1384- elif left_indexer is None :
1385- join_index = left_ax .copy ()
1386- else :
1387- join_index = left_ax .take (left_indexer )
1388- else :
1389- n = len (left_ax ) if left_indexer is None else len (left_indexer )
1390- join_index = default_index (n )
1426+ if self .index == "left" :
1427+ join_index = self ._create_join_index (
1428+ left_ax ,
1429+ right_ax ,
1430+ left_indexer ,
1431+ )
1432+ elif self .index == "right" :
1433+ join_index = self ._create_join_index (
1434+ right_ax ,
1435+ left_ax ,
1436+ right_indexer ,
1437+ )
1438+ elif self .index == "reset" :
1439+ n = len (left_ax ) if left_indexer is None else len (left_indexer )
1440+ join_index = default_index (n )
13911441
13921442 return join_index , left_indexer , right_indexer
13931443
@@ -1397,7 +1447,6 @@ def _create_join_index(
13971447 index : Index ,
13981448 other_index : Index ,
13991449 indexer : npt .NDArray [np .intp ] | None ,
1400- how : JoinHow = "left" ,
14011450 ) -> Index :
14021451 """
14031452 Create a join index by rearranging one index to match another
@@ -1407,17 +1456,15 @@ def _create_join_index(
14071456 index : Index
14081457 index being rearranged
14091458 other_index : Index
1410- used to supply values not found in index
1459+ do not fill with nulls if the other_index is a MultiIndex
14111460 indexer : np.ndarray[np.intp] or None
14121461 how to rearrange index
1413- how : str
1414- Replacement is only necessary if indexer based on other_index.
14151462
14161463 Returns
14171464 -------
14181465 Index
14191466 """
1420- if self . how in ( how , "outer" ) and not isinstance (other_index , MultiIndex ):
1467+ if not isinstance (other_index , MultiIndex ):
14211468 # if final index requires values in other_index but not target
14221469 # index, indexer may hold missing (-1) values, causing Index.take
14231470 # to take the final value in target index. So, we set the last
@@ -2574,7 +2621,7 @@ def _get_no_sort_one_missing_indexer(
25742621
25752622def _left_join_on_index (
25762623 left_ax : Index , right_ax : Index , join_keys : list [ArrayLike ], sort : bool = False
2577- ) -> tuple [Index , npt .NDArray [np .intp ] | None , npt .NDArray [np .intp ]]:
2624+ ) -> tuple [npt .NDArray [np .intp ] | None , npt .NDArray [np .intp ]]:
25782625 if isinstance (right_ax , MultiIndex ):
25792626 lkey , rkey = _get_multiindex_indexer (join_keys , right_ax , sort = sort )
25802627 else :
@@ -2593,11 +2640,10 @@ def _left_join_on_index(
25932640
25942641 if sort or len (left_ax ) != len (left_indexer ):
25952642 # if asked to sort or there are 1-to-many matches
2596- join_index = left_ax .take (left_indexer )
2597- return join_index , left_indexer , right_indexer
2643+ return left_indexer , right_indexer
25982644
25992645 # left frame preserves order & length of its index
2600- return left_ax , None , right_indexer
2646+ return None , right_indexer
26012647
26022648
26032649def _factorize_keys (
0 commit comments