diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index da0d85b7bb529..1d6f401b26e4e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -664,6 +664,7 @@ Reshaping
 - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
 - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
+- Bug in :meth:`DataFrame.merge` where merging on an index could return a dataframe with the wrong index. (:issue:`57291`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b84fb33af26e5..41da5d041c38c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -10700,7 +10700,7 @@ def join(
         5  K1  A5   B1
         """
         from pandas.core.reshape.concat import concat
-        from pandas.core.reshape.merge import merge
+        from pandas.core.reshape.merge import merge_pick_index
 
         if isinstance(other, Series):
             if other.name is None:
@@ -10709,7 +10709,7 @@ def join(
 
         if isinstance(other, DataFrame):
             if how == "cross":
-                return merge(
+                return merge_pick_index(
                     self,
                     other,
                     how=how,
@@ -10717,8 +10717,9 @@ def join(
                     suffixes=(lsuffix, rsuffix),
                     sort=sort,
                     validate=validate,
+                    index="left",
                 )
-            return merge(
+            return merge_pick_index(
                 self,
                 other,
                 left_on=on,
@@ -10728,6 +10729,7 @@ def join(
                 suffixes=(lsuffix, rsuffix),
                 sort=sort,
                 validate=validate,
+                index=None if on is None else "left",
             )
         else:
             if on is not None:
@@ -10763,7 +10765,7 @@ def join(
             joined = frames[0]
 
             for frame in frames[1:]:
-                joined = merge(
+                joined = merge_pick_index(
                     joined,
                     frame,
                     how=how,
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 07e8fa4841c04..563f61e4d5662 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -347,6 +347,51 @@ def merge(
     2   bar      7
     3   bar      8
     """
+    return merge_pick_index(
+        left,
+        right,
+        how,
+        on,
+        left_on,
+        right_on,
+        left_index,
+        right_index,
+        sort,
+        suffixes,
+        copy,
+        indicator,
+        validate,
+    )
+
+
+def merge_pick_index(
+    left: DataFrame | Series,
+    right: DataFrame | Series,
+    how: MergeHow = "inner",
+    on: IndexLabel | AnyArrayLike | None = None,
+    left_on: IndexLabel | AnyArrayLike | None = None,
+    right_on: IndexLabel | AnyArrayLike | None = None,
+    left_index: bool = False,
+    right_index: bool = False,
+    sort: bool = False,
+    suffixes: Suffixes = ("_x", "_y"),
+    copy: bool | lib.NoDefault = lib.no_default,
+    indicator: str | bool = False,
+    validate: str | None = None,
+    index: Literal["left", "right", "reset"] | None = None,
+) -> DataFrame:
+    """A helper function for merge that returns a specified index.
+
+    If index is "left" or "right" then the returned DataFrame will
+    use the index from the left or right DataFrames respectively.
+
+    If index is "reset" then the DataFrame will have the default
+    index: zero for the first row, one for the second, etc.
+
+    If index is None then the value will be inferred based on the
+    merge. If merging on both indexes then None is the only accepted
+    value.
+    """
     left_df = _validate_operand(left)
     left._check_copy_deprecation(copy)
     right_df = _validate_operand(right)
@@ -378,6 +423,7 @@ def merge(
             suffixes=suffixes,
             indicator=indicator,
             validate=validate,
+            index=index,
         )
         return op.get_result()
 
@@ -932,6 +978,7 @@ class _MergeOperation:
     join_names: list[Hashable]
     right_join_keys: list[ArrayLike]
     left_join_keys: list[ArrayLike]
+    index: Literal["left", "right", "reset"] | None
 
     def __init__(
         self,
@@ -947,6 +994,7 @@ def __init__(
         suffixes: Suffixes = ("_x", "_y"),
         indicator: str | bool = False,
         validate: str | None = None,
+        index: Literal["left", "right", "reset"] | None = None,
     ) -> None:
         _left = _validate_operand(left)
         _right = _validate_operand(right)
@@ -964,6 +1012,23 @@ def __init__(
 
         self.indicator = indicator
 
+        # Identify which index will be used for the output
+        if self.left_index and self.right_index and self.how != "asof":
+            if index is not None:
+                raise ValueError(
+                    f'Index "{index}" is not supported for merges on both indexes.'
+                )
+        elif index is not None:
+            pass
+        elif self.right_index:
+            index = "right"
+        elif self.left_index:
+            index = "left"
+        else:
+            index = "reset"
+
+        self.index = index
+
         if not is_bool(left_index):
             raise ValueError(
                 f"left_index parameter must be of type bool, not {type(left_index)}"
@@ -1341,53 +1406,32 @@ def _get_join_info(
             )
 
         elif self.right_index and self.how == "left":
-            join_index, left_indexer, right_indexer = _left_join_on_index(
+            left_indexer, right_indexer = _left_join_on_index(
                 left_ax, right_ax, self.left_join_keys, sort=self.sort
             )
 
         elif self.left_index and self.how == "right":
-            join_index, right_indexer, left_indexer = _left_join_on_index(
+            right_indexer, left_indexer = _left_join_on_index(
                 right_ax, left_ax, self.right_join_keys, sort=self.sort
             )
         else:
-            (left_indexer, right_indexer) = self._get_join_indexers()
+            left_indexer, right_indexer = self._get_join_indexers()
 
-            if self.right_index:
-                if len(self.left) > 0:
-                    join_index = self._create_join_index(
-                        left_ax,
-                        right_ax,
-                        left_indexer,
-                        how="right",
-                    )
-                elif right_indexer is None:
-                    join_index = right_ax.copy()
-                else:
-                    join_index = right_ax.take(right_indexer)
-            elif self.left_index:
-                if self.how == "asof":
-                    # GH#33463 asof should always behave like a left merge
-                    join_index = self._create_join_index(
-                        left_ax,
-                        right_ax,
-                        left_indexer,
-                        how="left",
-                    )
-
-                elif len(self.right) > 0:
-                    join_index = self._create_join_index(
-                        right_ax,
-                        left_ax,
-                        right_indexer,
-                        how="left",
-                    )
-                elif left_indexer is None:
-                    join_index = left_ax.copy()
-                else:
-                    join_index = left_ax.take(left_indexer)
-            else:
-                n = len(left_ax) if left_indexer is None else len(left_indexer)
-                join_index = default_index(n)
+        if self.index == "left":
+            join_index = self._create_join_index(
+                left_ax,
+                right_ax,
+                left_indexer,
+            )
+        elif self.index == "right":
+            join_index = self._create_join_index(
+                right_ax,
+                left_ax,
+                right_indexer,
+            )
+        elif self.index == "reset":
+            n = len(left_ax) if left_indexer is None else len(left_indexer)
+            join_index = default_index(n)
 
         return join_index, left_indexer, right_indexer
 
@@ -1397,7 +1441,6 @@ def _create_join_index(
         index: Index,
         other_index: Index,
         indexer: npt.NDArray[np.intp] | None,
-        how: JoinHow = "left",
     ) -> Index:
         """
         Create a join index by rearranging one index to match another
@@ -1407,17 +1450,15 @@ def _create_join_index(
         index : Index
             index being rearranged
         other_index : Index
-            used to supply values not found in index
+            do not fill with nulls if the other_index is a MultiIndex
         indexer : np.ndarray[np.intp] or None
             how to rearrange index
-        how : str
-            Replacement is only necessary if indexer based on other_index.
 
         Returns
         -------
         Index
         """
-        if self.how in (how, "outer") and not isinstance(other_index, MultiIndex):
+        if not isinstance(other_index, MultiIndex):
             # if final index requires values in other_index but not target
             # index, indexer may hold missing (-1) values, causing Index.take
             # to take the final value in target index. So, we set the last
@@ -2125,6 +2166,7 @@ def __init__(
             how=how,
             suffixes=suffixes,
             sort=True,  # factorize sorts
+            index="left" if left_index or right_index else "reset",
         )
 
     def get_result(self) -> DataFrame:
@@ -2574,7 +2616,7 @@ def _get_no_sort_one_missing_indexer(
 
 def _left_join_on_index(
     left_ax: Index, right_ax: Index, join_keys: list[ArrayLike], sort: bool = False
-) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp]]:
+) -> tuple[npt.NDArray[np.intp] | None, npt.NDArray[np.intp]]:
     if isinstance(right_ax, MultiIndex):
         lkey, rkey = _get_multiindex_indexer(join_keys, right_ax, sort=sort)
     else:
@@ -2593,11 +2635,10 @@ def _left_join_on_index(
 
     if sort or len(left_ax) != len(left_indexer):
         # if asked to sort or there are 1-to-many matches
-        join_index = left_ax.take(left_indexer)
-        return join_index, left_indexer, right_indexer
+        return left_indexer, right_indexer
 
     # left frame preserves order & length of its index
-    return left_ax, None, right_indexer
+    return None, right_indexer
 
 
 def _factorize_keys(
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 0f743332acbbe..1afeac051d66c 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -436,24 +436,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex):
             sort=False,
         )
 
-        expected2 = merge(
-            to_join,
-            data,
-            right_on=["key1", "key2"],
-            left_index=True,
-            how="inner",
-            sort=False,
-        )
-        tm.assert_frame_equal(joined, expected2.reindex_like(joined))
-
-        expected2 = merge(
-            to_join,
-            data,
-            right_on=["key1", "key2"],
-            left_index=True,
-            how="inner",
-            sort=False,
-        )
+        tm.assert_frame_equal(joined.reset_index(drop=True), expected[joined.columns])
 
         expected = expected.drop(["first", "second"], axis=1)
         expected.index = joined.index
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index d4766242b8460..d8cad84c52d5b 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -162,8 +162,10 @@ def test_merge_index_singlekey_inner(self):
             {
                 "key": ["a", "b", "c", "d", "e", "e", "a"],
                 "v1": np.random.default_rng(2).standard_normal(7),
+                "left_index": range(7),
             }
         )
+
         right = DataFrame(
             {"v2": np.random.default_rng(2).standard_normal(4)},
             index=["d", "b", "c", "a"],
@@ -171,12 +173,17 @@ def test_merge_index_singlekey_inner(self):
 
         # inner join
         result = merge(left, right, left_on="key", right_index=True, how="inner")
-        expected = left.join(right, on="key").loc[result.index]
-        tm.assert_frame_equal(result, expected)
+        expected = left.join(right, on="key").loc[result["left_index"]]
+        tm.assert_frame_equal(
+            result.reset_index(drop=True), expected.reset_index(drop=True)
+        )
 
         result = merge(right, left, right_on="key", left_index=True, how="inner")
-        expected = left.join(right, on="key").loc[result.index]
-        tm.assert_frame_equal(result, expected.loc[:, result.columns])
+        expected = left.join(right, on="key").loc[result["left_index"]]
+        tm.assert_frame_equal(
+            result.reset_index(drop=True),
+            expected.loc[:, result.columns].reset_index(drop=True),
+        )
 
     def test_merge_misspecified(self, df, df2, left):
         right = DataFrame(
@@ -349,8 +356,9 @@ def test_handle_join_key_pass_array(self):
         right = DataFrame({"rvalue": np.arange(6)})
 
         key = np.array([0, 1, 1, 2, 2, 3], dtype=np.int64)
+        index = np.array([0, 1, 1, 2, 2, np.nan], dtype=np.float64)
         merged = merge(left, right, left_index=True, right_on=key, how="outer")
-        tm.assert_series_equal(merged["key_0"], Series(key, name="key_0"))
+        tm.assert_series_equal(merged["key_0"], Series(key, index=index, name="key_0"))
 
     def test_no_overlap_more_informative_error(self):
         dt = datetime.now()
@@ -453,6 +461,9 @@ def test_merge_left_empty_right_notempty(self):
         )
         exp_in = exp_out[0:0]  # make empty DataFrame keeping dtype
 
+        exp_nan = exp_out.copy()
+        exp_nan.index = [np.nan] * 3
+
         def check1(exp, kwarg):
             result = merge(left, right, how="inner", **kwarg)
             tm.assert_frame_equal(result, exp)
@@ -465,12 +476,13 @@ def check2(exp, kwarg):
             result = merge(left, right, how="outer", **kwarg)
             tm.assert_frame_equal(result, exp)
 
-        for kwarg in [
-            {"left_index": True, "right_index": True},
-            {"left_index": True, "right_on": "x"},
-        ]:
-            check1(exp_in, kwarg)
-            check2(exp_out, kwarg)
+        kwarg = {"left_index": True, "right_on": "x"}
+        check1(exp_in, kwarg)
+        check2(exp_nan, kwarg)
+
+        kwarg = {"left_index": True, "right_index": True}
+        check1(exp_in, kwarg)
+        check2(exp_out, kwarg)
 
         kwarg = {"left_on": "a", "right_index": True}
         check1(exp_in, kwarg)
@@ -762,6 +774,7 @@ def test_other_datetime_unit(self, unit):
                 "days": days,
             },
             columns=["entity_id", "days"],
+            index=[101, 102],
         )
         assert exp["days"].dtype == exp_dtype
         tm.assert_frame_equal(result, exp)
@@ -789,6 +802,7 @@ def test_other_timedelta_unit(self, unit):
         exp = DataFrame(
             {"entity_id": [101, 102], "days": np.array(["nat", "nat"], dtype=dtype)},
             columns=["entity_id", "days"],
+            index=[101, 102],
         )
         tm.assert_frame_equal(result, exp)
 
@@ -1190,7 +1204,7 @@ def test_validation(self):
                 "c": ["meow", "bark", "um... weasel noise?", "nay"],
             },
             columns=["b", "a", "c"],
-            index=range(4),
+            index=Index(["a", "b", "c", "d"], name="a"),
         )
 
         left_index_reset = left.set_index("a")
@@ -1331,48 +1345,17 @@ def test_merge_two_empty_df_no_division_error(self):
 
     @pytest.mark.parametrize("how", ["right", "outer"])
     @pytest.mark.parametrize(
-        "index,expected_index",
+        "index",
         [
-            (
-                CategoricalIndex([1, 2, 4]),
-                CategoricalIndex([1, 2, 4, None, None, None]),
-            ),
-            (
-                DatetimeIndex(
-                    ["2001-01-01", "2002-02-02", "2003-03-03"], dtype="M8[ns]"
-                ),
-                DatetimeIndex(
-                    ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT],
-                    dtype="M8[ns]",
-                ),
-            ),
-            *[
-                (
-                    Index([1, 2, 3], dtype=dtyp),
-                    Index([1, 2, 3, None, None, None], dtype=np.float64),
-                )
-                for dtyp in tm.ALL_REAL_NUMPY_DTYPES
-            ],
-            (
-                IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)]),
-                IntervalIndex.from_tuples(
-                    [(1, 2), (2, 3), (3, 4), np.nan, np.nan, np.nan]
-                ),
-            ),
-            (
-                PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D"),
-                PeriodIndex(
-                    ["2001-01-01", "2001-01-02", "2001-01-03", pd.NaT, pd.NaT, pd.NaT],
-                    freq="D",
-                ),
-            ),
-            (
-                TimedeltaIndex(["1D", "2D", "3D"]),
-                TimedeltaIndex(["1D", "2D", "3D", pd.NaT, pd.NaT, pd.NaT]),
-            ),
+            CategoricalIndex([1, 2, 4]),
+            DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"], dtype="M8[ns]"),
+            *[Index([1, 2, 3], dtype=dtyp) for dtyp in tm.ALL_REAL_NUMPY_DTYPES],
+            IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)]),
+            PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D"),
+            TimedeltaIndex(["1D", "2D", "3D"]),
         ],
     )
-    def test_merge_on_index_with_more_values(self, how, index, expected_index):
+    def test_merge_on_index_with_more_values(self, how, index):
         # GH 24212
         # pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
         # -1 is interpreted as a missing value instead of the last element
@@ -1390,20 +1373,17 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
             ],
             columns=["a", "key", "b"],
         )
-        expected.set_index(expected_index, inplace=True)
         tm.assert_frame_equal(result, expected)
 
     def test_merge_right_index_right(self):
-        # Note: the expected output here is probably incorrect.
-        # See https://github.com/pandas-dev/pandas/issues/17257 for more.
-        # We include this as a regression test for GH-24897.
+        # Regression test for GH-24897.
         left = DataFrame({"a": [1, 2, 3], "key": [0, 1, 1]})
         right = DataFrame({"b": [1, 2, 3]})
 
         expected = DataFrame(
             {"a": [1, 2, 3, None], "key": [0, 1, 1, 2], "b": [1, 2, 2, 3]},
             columns=["a", "key", "b"],
-            index=[0, 1, 2, np.nan],
+            index=[0, 1, 1, 2],
         )
         result = left.merge(right, left_on="key", right_index=True, how="right")
         tm.assert_frame_equal(result, expected)
@@ -1436,7 +1416,7 @@ def test_merge_take_missing_values_from_index_of_other_dtype(self):
                 "key": Categorical(["a", "a", "b", "c"]),
                 "b": [1, 1, 2, 3],
             },
-            index=[0, 1, 2, np.nan],
+            index=Categorical(["a", "a", "b", "c"], categories=list("abc")),
         )
         expected = expected.reindex(columns=["a", "key", "b"])
         tm.assert_frame_equal(result, expected)
@@ -2661,7 +2641,8 @@ def test_merge_right_left_index():
             "z_x": ["foo", "foo"],
             "x_y": [1, 1],
             "z_y": ["foo", "foo"],
-        }
+        },
+        index=[1, 1],
     )
     tm.assert_frame_equal(result, expected)
 
@@ -2670,7 +2651,7 @@ def test_merge_result_empty_index_and_on():
     # GH#33814
     df1 = DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"])
     df2 = DataFrame({"b": [1]}).set_index(["b"])
-    expected = DataFrame({"a": [], "b": []}, dtype=np.int64).set_index(["a", "b"])
+    expected = DataFrame({"b": []}, dtype=np.int64).set_index(["b"])
     result = merge(df1, df2, left_on=["b"], right_index=True)
     tm.assert_frame_equal(result, expected)
 
@@ -2850,7 +2831,9 @@ def test_merge_multiindex_single_level():
         data={"b": [100]},
         index=MultiIndex.from_tuples([("A",), ("C",)], names=["col"]),
     )
-    expected = DataFrame({"col": ["A", "B"], "b": [100, np.nan]})
+    expected = DataFrame(
+        {"col": ["A", "B"], "b": [100, np.nan]}, index=Index([("A",), np.nan])
+    )
 
     result = df.merge(df2, left_on=["col"], right_index=True, how="left")
     tm.assert_frame_equal(result, expected)
@@ -2957,6 +2940,9 @@ def test_merge_ea_int_and_float_numpy():
     tm.assert_frame_equal(result, expected.astype("float64"))
 
 
+from pandas.core.dtypes.missing import na_value_for_dtype
+
+
 def test_merge_arrow_string_index(any_string_dtype):
     # GH#54894
     pytest.importorskip("pyarrow")
@@ -2964,7 +2950,10 @@ def test_merge_arrow_string_index(any_string_dtype):
     right = DataFrame({"b": 1}, index=Index(["a", "c"], dtype=any_string_dtype))
     result = left.merge(right, left_on="a", right_index=True, how="left")
     expected = DataFrame(
-        {"a": Series(["a", "b"], dtype=any_string_dtype), "b": [1, np.nan]}
+        {"a": Series(["a", "b"], dtype=any_string_dtype), "b": [1.0, np.nan]},
+    )
+    expected.index = Index(["a"], dtype=any_string_dtype).append(
+        Index([na_value_for_dtype(any_string_dtype)])
     )
     tm.assert_frame_equal(result, expected)
 
@@ -3022,3 +3011,12 @@ def test_merge_on_all_nan_column():
         {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan], "z": [4, 5, 6], "zz": [4, 5, 6]}
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_merge_index():
+    # GH 57291
+    dfa = DataFrame(range(10), columns=["a"])
+    dfb = DataFrame({"b": range(5), "key": [5 + x for x in range(5)]})
+
+    result = dfa.merge(dfb, left_index=True, right_on="key", how="left")
+    tm.assert_index_equal(result.index, dfa.index)