@@ -347,6 +347,51 @@ def merge(
347
347
2 bar 7
348
348
3 bar 8
349
349
"""
350
+ return merge_pick_index (
351
+ left ,
352
+ right ,
353
+ how ,
354
+ on ,
355
+ left_on ,
356
+ right_on ,
357
+ left_index ,
358
+ right_index ,
359
+ sort ,
360
+ suffixes ,
361
+ copy ,
362
+ indicator ,
363
+ validate ,
364
+ )
365
+
366
+
367
+ def merge_pick_index (
368
+ left : DataFrame | Series ,
369
+ right : DataFrame | Series ,
370
+ how : MergeHow = "inner" ,
371
+ on : IndexLabel | AnyArrayLike | None = None ,
372
+ left_on : IndexLabel | AnyArrayLike | None = None ,
373
+ right_on : IndexLabel | AnyArrayLike | None = None ,
374
+ left_index : bool = False ,
375
+ right_index : bool = False ,
376
+ sort : bool = False ,
377
+ suffixes : Suffixes = ("_x" , "_y" ),
378
+ copy : bool | lib .NoDefault = lib .no_default ,
379
+ indicator : str | bool = False ,
380
+ validate : str | None = None ,
381
+ index : Literal ["left" , "right" , "reset" ] | None = None ,
382
+ ) -> DataFrame :
383
+ """A helper function for merge that returns a specified index.
384
+
385
+ If index is "left" or "right" then the returned DataFrame will
386
+ use the index from the left or right DataFrames respectively.
387
+
388
+ If index is "reset" then the DataFrame will have the default
389
+ index: zero for the first row, one for the second, etc.
390
+
391
+ If index is None then the value will be inferred based on the
392
+ merge. If merging on both indexes then None is the only accepted
393
+ value.
394
+ """
350
395
left_df = _validate_operand (left )
351
396
left ._check_copy_deprecation (copy )
352
397
right_df = _validate_operand (right )
@@ -378,6 +423,7 @@ def merge(
378
423
suffixes = suffixes ,
379
424
indicator = indicator ,
380
425
validate = validate ,
426
+ index = index ,
381
427
)
382
428
return op .get_result ()
383
429
@@ -932,6 +978,7 @@ class _MergeOperation:
932
978
join_names : list [Hashable ]
933
979
right_join_keys : list [ArrayLike ]
934
980
left_join_keys : list [ArrayLike ]
981
+ index : Literal ["left" , "right" , "reset" ] | None
935
982
936
983
def __init__ (
937
984
self ,
@@ -947,6 +994,7 @@ def __init__(
947
994
suffixes : Suffixes = ("_x" , "_y" ),
948
995
indicator : str | bool = False ,
949
996
validate : str | None = None ,
997
+ index : Literal ["left" , "right" , "reset" ] | None = None ,
950
998
) -> None :
951
999
_left = _validate_operand (left )
952
1000
_right = _validate_operand (right )
@@ -964,6 +1012,29 @@ def __init__(
964
1012
965
1013
self .indicator = indicator
966
1014
1015
+ # Identify which index will be used for the output
1016
+ if self .left_index and self .right_index and self .how != "asof" :
1017
+ if index is not None :
1018
+ raise ValueError (
1019
+ f'Index "{ index } " is not supported for merges on both indexes.'
1020
+ )
1021
+ elif self .right_index :
1022
+ if len (self .left ) > 0 :
1023
+ index = "left"
1024
+ else :
1025
+ index = "right"
1026
+ elif self .left_index :
1027
+ if self .how == "asof" :
1028
+ index = "left"
1029
+ elif len (self .right ) > 0 :
1030
+ index = "right"
1031
+ else :
1032
+ index = "left"
1033
+ else :
1034
+ index = "reset"
1035
+
1036
+ self .index = index
1037
+
967
1038
if not is_bool (left_index ):
968
1039
raise ValueError (
969
1040
f"left_index parameter must be of type bool, not { type (left_index )} "
@@ -1341,53 +1412,32 @@ def _get_join_info(
1341
1412
)
1342
1413
1343
1414
elif self .right_index and self .how == "left" :
1344
- join_index , left_indexer , right_indexer = _left_join_on_index (
1415
+ left_indexer , right_indexer = _left_join_on_index (
1345
1416
left_ax , right_ax , self .left_join_keys , sort = self .sort
1346
1417
)
1347
1418
1348
1419
elif self .left_index and self .how == "right" :
1349
- join_index , right_indexer , left_indexer = _left_join_on_index (
1420
+ right_indexer , left_indexer = _left_join_on_index (
1350
1421
right_ax , left_ax , self .right_join_keys , sort = self .sort
1351
1422
)
1352
1423
else :
1353
- ( left_indexer , right_indexer ) = self ._get_join_indexers ()
1424
+ left_indexer , right_indexer = self ._get_join_indexers ()
1354
1425
1355
- if self .right_index :
1356
- if len (self .left ) > 0 :
1357
- join_index = self ._create_join_index (
1358
- left_ax ,
1359
- right_ax ,
1360
- left_indexer ,
1361
- how = "right" ,
1362
- )
1363
- elif right_indexer is None :
1364
- join_index = right_ax .copy ()
1365
- else :
1366
- join_index = right_ax .take (right_indexer )
1367
- elif self .left_index :
1368
- if self .how == "asof" :
1369
- # GH#33463 asof should always behave like a left merge
1370
- join_index = self ._create_join_index (
1371
- left_ax ,
1372
- right_ax ,
1373
- left_indexer ,
1374
- how = "left" ,
1375
- )
1376
-
1377
- elif len (self .right ) > 0 :
1378
- join_index = self ._create_join_index (
1379
- right_ax ,
1380
- left_ax ,
1381
- right_indexer ,
1382
- how = "left" ,
1383
- )
1384
- elif left_indexer is None :
1385
- join_index = left_ax .copy ()
1386
- else :
1387
- join_index = left_ax .take (left_indexer )
1388
- else :
1389
- n = len (left_ax ) if left_indexer is None else len (left_indexer )
1390
- join_index = default_index (n )
1426
+ if self .index == "left" :
1427
+ join_index = self ._create_join_index (
1428
+ left_ax ,
1429
+ right_ax ,
1430
+ left_indexer ,
1431
+ )
1432
+ elif self .index == "right" :
1433
+ join_index = self ._create_join_index (
1434
+ right_ax ,
1435
+ left_ax ,
1436
+ right_indexer ,
1437
+ )
1438
+ elif self .index == "reset" :
1439
+ n = len (left_ax ) if left_indexer is None else len (left_indexer )
1440
+ join_index = default_index (n )
1391
1441
1392
1442
return join_index , left_indexer , right_indexer
1393
1443
@@ -1397,7 +1447,6 @@ def _create_join_index(
1397
1447
index : Index ,
1398
1448
other_index : Index ,
1399
1449
indexer : npt .NDArray [np .intp ] | None ,
1400
- how : JoinHow = "left" ,
1401
1450
) -> Index :
1402
1451
"""
1403
1452
Create a join index by rearranging one index to match another
@@ -1407,17 +1456,15 @@ def _create_join_index(
1407
1456
index : Index
1408
1457
index being rearranged
1409
1458
other_index : Index
1410
- used to supply values not found in index
1459
+ do not fill with nulls if the other_index is a MultiIndex
1411
1460
indexer : np.ndarray[np.intp] or None
1412
1461
how to rearrange index
1413
- how : str
1414
- Replacement is only necessary if indexer based on other_index.
1415
1462
1416
1463
Returns
1417
1464
-------
1418
1465
Index
1419
1466
"""
1420
- if self . how in ( how , "outer" ) and not isinstance (other_index , MultiIndex ):
1467
+ if not isinstance (other_index , MultiIndex ):
1421
1468
# if final index requires values in other_index but not target
1422
1469
# index, indexer may hold missing (-1) values, causing Index.take
1423
1470
# to take the final value in target index. So, we set the last
@@ -2574,7 +2621,7 @@ def _get_no_sort_one_missing_indexer(
2574
2621
2575
2622
def _left_join_on_index (
2576
2623
left_ax : Index , right_ax : Index , join_keys : list [ArrayLike ], sort : bool = False
2577
- ) -> tuple [Index , npt .NDArray [np .intp ] | None , npt .NDArray [np .intp ]]:
2624
+ ) -> tuple [npt .NDArray [np .intp ] | None , npt .NDArray [np .intp ]]:
2578
2625
if isinstance (right_ax , MultiIndex ):
2579
2626
lkey , rkey = _get_multiindex_indexer (join_keys , right_ax , sort = sort )
2580
2627
else :
@@ -2593,11 +2640,10 @@ def _left_join_on_index(
2593
2640
2594
2641
if sort or len (left_ax ) != len (left_indexer ):
2595
2642
# if asked to sort or there are 1-to-many matches
2596
- join_index = left_ax .take (left_indexer )
2597
- return join_index , left_indexer , right_indexer
2643
+ return left_indexer , right_indexer
2598
2644
2599
2645
# left frame preserves order & length of its index
2600
- return left_ax , None , right_indexer
2646
+ return None , right_indexer
2601
2647
2602
2648
2603
2649
def _factorize_keys (
0 commit comments