diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index e048015298461..320b79203c0b3 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -235,8 +235,10 @@ class ConstantOffsetExtractor {
   /// \p GEP The given GEP
   /// \p UserChainTail Outputs the tail of UserChain so that we can
   ///                  garbage-collect unused instructions in UserChain.
+  /// \p PreservesNUW  Outputs whether the extraction allows preserving the
+  ///                  GEP's nuw flag, if it has one.
   static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
-                        User *&UserChainTail);
+                        User *&UserChainTail, bool &PreservesNUW);
 
   /// Looks for a constant offset from the given GEP index without extracting
   /// it. It returns the numeric value of the extracted constant offset (0 if
@@ -778,8 +780,32 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
   return NewBO;
 }
 
+/// A helper function to check if reassociating through an entry in the user
+/// chain would invalidate the GEP's nuw flag.
+static bool allowsPreservingNUW(const User *U) {
+  if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+    // Binary operations need to be effectively add nuw.
+    auto Opcode = BO->getOpcode();
+    if (Opcode == BinaryOperator::Or) {
+      // Ors are only considered here if they are disjoint. The addition that
+      // they represent in this case is NUW.
+      assert(cast<PossiblyDisjointInst>(BO)->isDisjoint());
+      return true;
+    }
+    return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap();
+  }
+  // UserChain can only contain ConstantInt, CastInst, or BinaryOperator.
+  // Among the possible CastInsts, only trunc without nuw is a problem: If it
+  // is distributed through an add nuw, wrapping may occur:
+  // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)"
+  if (const TruncInst *TI = dyn_cast<TruncInst>(U))
+    return TI->hasNoUnsignedWrap();
+  return isa<CastInst>(U) || isa<ConstantInt>(U);
+}
+
 Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
-                                        User *&UserChainTail) {
+                                        User *&UserChainTail,
+                                        bool &PreservesNUW) {
   ConstantOffsetExtractor Extractor(GEP->getIterator());
   // Find a non-zero constant offset first.
   APInt ConstantOffset =
@@ -787,8 +813,12 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
                      GEP->isInBounds());
   if (ConstantOffset == 0) {
     UserChainTail = nullptr;
+    PreservesNUW = true;
     return nullptr;
   }
+
+  PreservesNUW = all_of(Extractor.UserChain, allowsPreservingNUW);
+
   // Separates the constant offset from the GEP index.
   Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
   UserChainTail = Extractor.UserChain.back();
@@ -1052,6 +1082,10 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
     }
   }
 
+  // Track information for preserving GEP flags.
+  bool AllOffsetsNonNegative = AccumulativeByteOffset >= 0;
+  bool AllNUWPreserved = true;
+
   // Remove the constant offset in each sequential index. The resultant GEP
   // computes the variadic base.
   // Notice that we don't remove struct field indices here. If LowerGEP is
@@ -1070,8 +1104,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
       // uses the variadic part as the new index.
       Value *OldIdx = GEP->getOperand(I);
       User *UserChainTail;
-      Value *NewIdx =
-          ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
+      bool PreservesNUW;
+      Value *NewIdx = ConstantOffsetExtractor::Extract(
+          OldIdx, GEP, UserChainTail, PreservesNUW);
       if (NewIdx != nullptr) {
         // Switches to the index with the constant offset removed.
         GEP->setOperand(I, NewIdx);
@@ -1079,6 +1114,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
         // and the old index if they are not used.
         RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
         RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
+        AllOffsetsNonNegative =
+            AllOffsetsNonNegative && isKnownNonNegative(NewIdx, *DL);
+        AllNUWPreserved &= PreservesNUW;
       }
     }
   }
@@ -1099,12 +1137,35 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // inbounds keyword is not present, the offsets are added to the base
   // address with silently-wrapping two's complement arithmetic".
   // Therefore, the final code will be a semantically equivalent.
-  //
-  // TODO(jingyue): do some range analysis to keep as many inbounds as
-  // possible. GEPs with inbounds are more friendly to alias analysis.
-  // TODO(gep_nowrap): Preserve nuw at least.
   GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
-  GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+
+  // If the initial GEP was inbounds/nusw and all variable indices and the
+  // accumulated offsets are non-negative, they can be added in any order and
+  // the intermediate results are in bounds and don't overflow in a nusw sense.
+  // So, we can preserve the inbounds/nusw flag for both GEPs.
+  bool CanPreserveInBoundsNUSW = AllOffsetsNonNegative;
+
+  // If the initial GEP was NUW and all operations that we reassociate were NUW
+  // additions, the resulting GEPs are also NUW.
+  if (GEP->hasNoUnsignedWrap() && AllNUWPreserved) {
+    NewGEPFlags |= GEPNoWrapFlags::noUnsignedWrap();
+    // If the initial GEP additionally had NUSW (or inbounds, which implies
+    // NUSW), we know that the indices in the initial GEP must all have their
+    // signbit not set. For indices that are the result of NUW adds, the
+    // add-operands therefore also don't have their signbit set. Therefore, all
+    // indices of the resulting GEPs are non-negative -> we can preserve
+    // the inbounds/nusw flag.
+    CanPreserveInBoundsNUSW |= GEP->hasNoUnsignedSignedWrap();
+  }
+
+  if (CanPreserveInBoundsNUSW) {
+    if (GEP->isInBounds())
+      NewGEPFlags |= GEPNoWrapFlags::inBounds();
+    else if (GEP->hasNoUnsignedSignedWrap())
+      NewGEPFlags |= GEPNoWrapFlags::noUnsignedSignedWrap();
+  }
+
+  GEP->setNoWrapFlags(NewGEPFlags);
 
   // Lowers a GEP to either GEPs with a single index or arithmetic operations.
   if (LowerGEP) {
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 422e5d8215502..2a5b678e91fd8 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -1,5 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=separate-const-offset-from-gep -S | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=separate-const-offset-from-gep -S | FileCheck %s
+
+; gfx1200 is particularly interesting since it allows negative immediate offsets
+; in flat instructions, so the transformation is applied in more cases.
 
 ; The inbounds flags cannot be preserved here: If the pointers point to the
 ; beginning of an object and %i is 1, the intermediate GEPs are out of bounds.
@@ -16,3 +19,524 @@ entry:
   %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
   ret ptr %arrayidx
 }
+
+; All indices must be non-negative, so inbounds can be preserved.
+define ptr @must_be_inbounds(ptr %p, i32 %i) {
+; CHECK-LABEL: @must_be_inbounds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[I_PROM]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %i.prom = zext i32 %i to i64
+  %idx = add nsw i64 %i.prom, 1
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+; idx must be non-negative -> preserve inbounds
+define ptr @sign_bit_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @sign_bit_clear(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX:%.*]] = and i64 [[I:%.*]], 9223372036854775807
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX]]
+;
+entry:
+  %idx = and i64 %i, u0x7fffffffffffffff
+  %idx.add = add i64 %idx, 1
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+  ret ptr %arrayidx
+}
+
+; idx may be negative -> don't preserve inbounds
+define ptr @sign_bit_not_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @sign_bit_not_clear(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX:%.*]] = and i64 [[I:%.*]], -256
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = and i64 %i, u0xffffffffffffff00
+  %idx.add = add i64 %idx, 1
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+  ret ptr %arrayidx
+}
+
+; idx may be 0 or very negative -> don't preserve inbounds
+define ptr @only_sign_bit_not_clear(ptr %p, i64 %i) {
+; CHECK-LABEL: @only_sign_bit_not_clear(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX:%.*]] = and i64 [[I:%.*]], -9223372036854775808
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = and i64 %i, u0x8000000000000000
+  %idx.add = add i64 %idx, 1
+  %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add
+  ret ptr %arrayidx
+}
+
+; all indices non-negative -> preserve inbounds
+define ptr @multi_level_nonnegative(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_nonnegative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255
+; CHECK-NEXT:    [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 180
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %masked.idx1 = and i64 %idx1, u0xff
+  %masked.idx2 = and i64 %idx2, u0xffff
+  %idx1.add = add i64 %masked.idx1, 2
+  %idx2.add = add i64 %masked.idx2, 5
+  %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+  ret ptr %arrayidx
+}
+
+; It doesn't matter that %idx2.add might be negative, the indices in the resulting GEPs are all non-negative -> preserve inbounds
+define ptr @multi_level_mixed_okay(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_mixed_okay(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255
+; CHECK-NEXT:    [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 156
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %masked.idx1 = and i64 %idx1, u0xff
+  %masked.idx2 = and i64 %idx2, u0xffff
+  %idx1.add = add i64 %masked.idx1, 2
+  %idx2.add = add i64 %masked.idx2, -1
+  %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+  ret ptr %arrayidx
+}
+
+; One index may be negative -> don't preserve inbounds
+define ptr @multi_level_mixed_not_okay(ptr %p, i64 %idx1, i64 %idx2) {
+; CHECK-LABEL: @multi_level_mixed_not_okay(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], -256
+; CHECK-NEXT:    [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 156
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %masked.idx1 = and i64 %idx1, u0xffffffffffffff00
+  %masked.idx2 = and i64 %idx2, u0xffff
+  %idx1.add = add i64 %masked.idx1, 2
+  %idx2.add = add i64 %masked.idx2, -1
+  %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add
+  ret ptr %arrayidx
+}
+
+
+define ptr @nuw_implies_nuw(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_implies_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i64 %i, 1
+  %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_implies_nuw_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_implies_nuw_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i64 %i, -16
+  %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i64 %i, 1
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+; This is poison anyway, so we can preserve the flags.
+define ptr @nuw_inbounds_implies_nuw_inbounds_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i64 %i, -16
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_nusw_implies_nuw_nusw(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i64 %i, 1
+  %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+; Also poison.
+define ptr @nuw_implies_nuw_nusw_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_implies_nuw_nusw_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i64 %i, -16
+  %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_ordisjoint(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_ordisjoint(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = or disjoint i64 %i, 1
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+; This is poison anyway, so we can do the transformation.
+define ptr @nuw_inbounds_implies_nuw_inbounds_ordisjoint_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_ordisjoint_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = or disjoint i64 %i, -16
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+; Check that nothing happens for non-disjoint ors
+define ptr @or_no_disjoint(ptr %p, i64 %i) {
+; CHECK-LABEL: @or_no_disjoint(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX:%.*]] = or i64 [[I:%.*]], 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX]]
+; CHECK-NEXT:    ret ptr [[ARRAYIDX]]
+;
+entry:
+  %idx = or i64 %i, 1
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+define ptr @no_nuw_inbounds_for_sub(ptr %p, i64 %i) {
+; CHECK-LABEL: @no_nuw_inbounds_for_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = sub nuw i64 %i, 1
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+define ptr @no_nuw_inbounds_for_sub_negative(ptr %p, i64 %i) {
+; CHECK-LABEL: @no_nuw_inbounds_for_sub_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 64
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = sub nuw i64 %i, -16
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
+  ret ptr %arrayidx
+}
+
+; Can't preserved nuw and other flags here as distributing the trunc towards the
+; leaves can introduce new wraps.
+define ptr @nuw_inbounds_trunc(ptr %p, i128 %i) {
+; CHECK-LABEL: @nuw_inbounds_trunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i128 [[I:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i128 %i, 1
+  %idx.conv = trunc i128 %idx to i64
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+  ret ptr %arrayidx
+}
+
+; trunc nuw is not a problem.
+define ptr @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(ptr %p, i128 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc nuw i128 [[I:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i128 %i, 1
+  %idx.conv = trunc nuw i128 %idx to i64
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_sext(ptr %p, i32 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_sext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i32 %i, 1
+  %idx.conv = sext i32 %idx to i64
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_zext(ptr %p, i32 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_zext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i32 %i, 1
+  %idx.conv = zext i32 %idx to i64
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_zext_negative(ptr %p, i8 %i) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_zext_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX_CONV:%.*]] = zext i8 [[I:%.*]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX_CONV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 960
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+entry:
+  %idx = add nuw i8 %i, -16
+  %idx.conv = zext i8 %idx to i64
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv
+  ret ptr %arrayidx
+}
+
+
+; This test and the following ones mask most bits of %v off to facilitate
+; validation with alive2 while still allowing interesting values.
+define ptr @nuw_inbounds_implies_nuw_inbounds_nested(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_nested(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V_MASKED:%.*]] = and i64 [[V:%.*]], -1152921488500719601
+; CHECK-NEXT:    [[IDX22:%.*]] = add i64 [[I:%.*]], [[V_MASKED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %v.masked = and i64 %v, u0xf0000003c000000f
+  %idx1 = add nuw i64 %i, 1
+  %idx2 = add nuw i64 %idx1, %v.masked
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_nested_negative(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_nested_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT:    [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %v.masked = and i64 %v, u0xf0000003c000000f
+  %idx1 = add nuw i64 %i, -16
+  %idx2 = add nuw i64 %idx1, %v.masked
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_implies_nuw_nested(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_implies_nuw_nested(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT:    [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %v.masked = and i64 %v, u0xf0000003c000000f
+  %idx1 = add nuw i64 %i, 1
+  %idx2 = add nuw i64 %idx1, %v.masked
+  %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx2
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_implies_nuw_nested_negative(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_implies_nuw_nested_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT:    [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %v.masked = and i64 %v, u0xf0000003c000000f
+  %idx1 = add nuw i64 %i, -16
+  %idx2 = add nuw i64 %idx1, %v.masked
+  %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx2
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_nusw_implies_nuw_nusw_nested(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw_nested(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT:    [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %v.masked = and i64 %v, u0xf0000003c000000f
+  %idx1 = add nuw i64 %i, 1
+  %idx2 = add nuw i64 %idx1, %v.masked
+  %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_nusw_implies_nuw_nusw_nested_negative(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw_nested_negative(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT:    [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 -64
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %v.masked = and i64 %v, u0xf0000003c000000f
+  %idx1 = add nuw i64 %i, -16
+  %idx2 = add nuw i64 %idx1, %v.masked
+  %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2
+  ret ptr %arrayidx
+}
+
+
+; Neither inbounds nor nuw can be preserved.
+define ptr @nuw_inbounds_nested_not_all_nuw(ptr %p, i64 %i, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_nested_not_all_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601
+; CHECK-NEXT:    [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX22]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+entry:
+  %v.masked = and i64 %v, u0xf0000003c000000f
+  %idx1 = add nuw i64 %i, 1
+  %idx2 = add i64 %idx1, %v.masked
+  %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2
+  ret ptr %arrayidx
+}
+
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel(ptr %src, i64 %i1, i64 %i2) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[I2:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+  %idx1 = add nuw i64 %i1, 1
+  %idx2 = add nuw i64 2, %i2
+  %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %idx2
+  ret ptr %arrayidx
+}
+
+; Neither inbounds nor nuw can be preserved.
+define ptr @nuw_inbounds_multilevel_not_all_nuw(ptr %src, i64 %i1, i64 %i2) {
+; CHECK-LABEL: @nuw_inbounds_multilevel_not_all_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[I2:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 24
+; CHECK-NEXT:    ret ptr [[ARRAYIDX3]]
+;
+  %idx1 = add nuw i64 %i1, 1
+  %idx2 = add i64 2, %i2
+  %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %idx2
+  ret ptr %arrayidx
+}
+
+; Missing information about non-extracted indices does not matter.
+define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel_one_unfolded(ptr %src, i64 %i1, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel_one_unfolded(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[V:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+  %idx1 = add nuw i64 %i1, 1
+  %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %v
+  ret ptr %arrayidx
+}
+
+define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel_other_unfolded(ptr %src, i64 %i1, i64 %v) {
+; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel_other_unfolded(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[V:%.*]], i64 [[I1:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
+;
+  %idx1 = add nuw i64 %i1, 1
+  %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %v, i64 %idx1
+  ret ptr %arrayidx
+}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 9a73feb2c4b5c..4474585bf9b06 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -157,19 +157,19 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
 ; IR-NEXT:  .preheader:
 ; IR-NEXT:    [[TMP0:%.*]] = zext i32 [[Y]] to i64
 ; IR-NEXT:    [[TMP1:%.*]] = zext i32 [[X]] to i64
-; IR-NEXT:    [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
+; IR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
 ; IR-NEXT:    [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr
 ; IR-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
 ; IR-NEXT:    [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
-; IR-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4
+; IR-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4
 ; IR-NEXT:    [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr
 ; IR-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
 ; IR-NEXT:    [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]]
-; IR-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128
+; IR-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128
 ; IR-NEXT:    [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr
 ; IR-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
 ; IR-NEXT:    [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]]
-; IR-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132
+; IR-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132
 ; IR-NEXT:    [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr
 ; IR-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; IR-NEXT:    [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]]
@@ -224,19 +224,19 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
 ; IR-NEXT:  .preheader:
 ; IR-NEXT:    [[TMP0:%.*]] = zext i32 [[Y]] to i64
 ; IR-NEXT:    [[TMP1:%.*]] = zext i32 [[X]] to i64
-; IR-NEXT:    [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
+; IR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]]
 ; IR-NEXT:    [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr
 ; IR-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
 ; IR-NEXT:    [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00
-; IR-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4
+; IR-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4
 ; IR-NEXT:    [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr
 ; IR-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
 ; IR-NEXT:    [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]]
-; IR-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128
+; IR-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128
 ; IR-NEXT:    [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr
 ; IR-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
 ; IR-NEXT:    [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]]
-; IR-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132
+; IR-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132
 ; IR-NEXT:    [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr
 ; IR-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; IR-NEXT:    [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]]
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 77b3434f4f159..da04a6e979425 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -372,8 +372,8 @@ define ptr @trunk_explicit(ptr %ptr, i64 %idx) {
 ; CHECK-LABEL: define ptr @trunk_explicit(
 ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
-; CHECK-NEXT:    [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
+; CHECK-NEXT:    [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216
 ; CHECK-NEXT:    ret ptr [[PTR21]]
 ;
 entry:
@@ -389,8 +389,8 @@ define ptr @trunk_long_idx(ptr %ptr, i64 %idx) {
 ; CHECK-LABEL: define ptr @trunk_long_idx(
 ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
-; CHECK-NEXT:    [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1
+; CHECK-NEXT:    [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216
 ; CHECK-NEXT:    ret ptr [[PTR21]]
 ;
 entry: