diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index e048015298461..320b79203c0b3 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -235,8 +235,10 @@ class ConstantOffsetExtractor { /// \p GEP The given GEP /// \p UserChainTail Outputs the tail of UserChain so that we can /// garbage-collect unused instructions in UserChain. + /// \p PreservesNUW Outputs whether the extraction allows preserving the + /// GEP's nuw flag, if it has one. static Value *Extract(Value *Idx, GetElementPtrInst *GEP, - User *&UserChainTail); + User *&UserChainTail, bool &PreservesNUW); /// Looks for a constant offset from the given GEP index without extracting /// it. It returns the numeric value of the extracted constant offset (0 if @@ -778,8 +780,32 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { return NewBO; } +/// A helper function to check if reassociating through an entry in the user +/// chain would invalidate the GEP's nuw flag. +static bool allowsPreservingNUW(const User *U) { + if (const BinaryOperator *BO = dyn_cast(U)) { + // Binary operations need to be effectively add nuw. + auto Opcode = BO->getOpcode(); + if (Opcode == BinaryOperator::Or) { + // Ors are only considered here if they are disjoint. The addition that + // they represent in this case is NUW. + assert(cast(BO)->isDisjoint()); + return true; + } + return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap(); + } + // UserChain can only contain ConstantInt, CastInst, or BinaryOperator. + // Among the possible CastInsts, only trunc without nuw is a problem: If it + // is distributed through an add nuw, wrapping may occur: + // "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)" + if (const TruncInst *TI = dyn_cast(U)) + return TI->hasNoUnsignedWrap(); + return isa(U) || isa(U); +} + Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, - User *&UserChainTail) { + User *&UserChainTail, + bool &PreservesNUW) { ConstantOffsetExtractor Extractor(GEP->getIterator()); // Find a non-zero constant offset first. APInt ConstantOffset = @@ -787,8 +813,12 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, GEP->isInBounds()); if (ConstantOffset == 0) { UserChainTail = nullptr; + PreservesNUW = true; return nullptr; } + + PreservesNUW = all_of(Extractor.UserChain, allowsPreservingNUW); + // Separates the constant offset from the GEP index. Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset(); UserChainTail = Extractor.UserChain.back(); @@ -1052,6 +1082,10 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { } } + // Track information for preserving GEP flags. + bool AllOffsetsNonNegative = AccumulativeByteOffset >= 0; + bool AllNUWPreserved = true; + // Remove the constant offset in each sequential index. The resultant GEP // computes the variadic base. // Notice that we don't remove struct field indices here. If LowerGEP is @@ -1070,8 +1104,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // uses the variadic part as the new index. Value *OldIdx = GEP->getOperand(I); User *UserChainTail; - Value *NewIdx = - ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail); + bool PreservesNUW; + Value *NewIdx = ConstantOffsetExtractor::Extract( + OldIdx, GEP, UserChainTail, PreservesNUW); if (NewIdx != nullptr) { // Switches to the index with the constant offset removed. GEP->setOperand(I, NewIdx); @@ -1079,6 +1114,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // and the old index if they are not used. RecursivelyDeleteTriviallyDeadInstructions(UserChainTail); RecursivelyDeleteTriviallyDeadInstructions(OldIdx); + AllOffsetsNonNegative = + AllOffsetsNonNegative && isKnownNonNegative(NewIdx, *DL); + AllNUWPreserved &= PreservesNUW; } } } @@ -1099,12 +1137,35 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // inbounds keyword is not present, the offsets are added to the base // address with silently-wrapping two's complement arithmetic". // Therefore, the final code will be a semantically equivalent. - // - // TODO(jingyue): do some range analysis to keep as many inbounds as - // possible. GEPs with inbounds are more friendly to alias analysis. - // TODO(gep_nowrap): Preserve nuw at least. GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none(); - GEP->setNoWrapFlags(GEPNoWrapFlags::none()); + + // If the initial GEP was inbounds/nusw and all variable indices and the + // accumulated offsets are non-negative, they can be added in any order and + // the intermediate results are in bounds and don't overflow in a nusw sense. + // So, we can preserve the inbounds/nusw flag for both GEPs. + bool CanPreserveInBoundsNUSW = AllOffsetsNonNegative; + + // If the initial GEP was NUW and all operations that we reassociate were NUW + // additions, the resulting GEPs are also NUW. + if (GEP->hasNoUnsignedWrap() && AllNUWPreserved) { + NewGEPFlags |= GEPNoWrapFlags::noUnsignedWrap(); + // If the initial GEP additionally had NUSW (or inbounds, which implies + // NUSW), we know that the indices in the initial GEP must all have their + // signbit not set. For indices that are the result of NUW adds, the + // add-operands therefore also don't have their signbit set. Therefore, all + // indices of the resulting GEPs are non-negative -> we can preserve + // the inbounds/nusw flag. + CanPreserveInBoundsNUSW |= GEP->hasNoUnsignedSignedWrap(); + } + + if (CanPreserveInBoundsNUSW) { + if (GEP->isInBounds()) + NewGEPFlags |= GEPNoWrapFlags::inBounds(); + else if (GEP->hasNoUnsignedSignedWrap()) + NewGEPFlags |= GEPNoWrapFlags::noUnsignedSignedWrap(); + } + + GEP->setNoWrapFlags(NewGEPFlags); // Lowers a GEP to either GEPs with a single index or arithmetic operations. if (LowerGEP) { diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll index 422e5d8215502..2a5b678e91fd8 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=separate-const-offset-from-gep -S | FileCheck %s +; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=separate-const-offset-from-gep -S | FileCheck %s + +; gfx1200 is particularly interesting since it allows negative immediate offsets +; in flat instructions, so the transformation is applied in more cases. ; The inbounds flags cannot be preserved here: If the pointers point to the ; beginning of an object and %i is 1, the intermediate GEPs are out of bounds. @@ -16,3 +19,524 @@ entry: %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx ret ptr %arrayidx } + +; All indices must be non-negative, so inbounds can be preserved. +define ptr @must_be_inbounds(ptr %p, i32 %i) { +; CHECK-LABEL: @must_be_inbounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[I_PROM]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %i.prom = zext i32 %i to i64 + %idx = add nsw i64 %i.prom, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; idx must be non-negative -> preserve inbounds +define ptr @sign_bit_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @sign_bit_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], 9223372036854775807 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX]] +; +entry: + %idx = and i64 %i, u0x7fffffffffffffff + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; idx may be negative -> don't preserve inbounds +define ptr @sign_bit_not_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @sign_bit_not_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], -256 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = and i64 %i, u0xffffffffffffff00 + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; idx may be 0 or very negative -> don't preserve inbounds +define ptr @only_sign_bit_not_clear(ptr %p, i64 %i) { +; CHECK-LABEL: @only_sign_bit_not_clear( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = and i64 [[I:%.*]], -9223372036854775808 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = and i64 %i, u0x8000000000000000 + %idx.add = add i64 %idx, 1 + %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idx.add + ret ptr %arrayidx +} + +; all indices non-negative -> preserve inbounds +define ptr @multi_level_nonnegative(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_nonnegative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 180 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xff + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, 5 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx +} + +; It doesn't matter that %idx2.add might be negative, the indices in the resulting GEPs are all non-negative -> preserve inbounds +define ptr @multi_level_mixed_okay(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_mixed_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], 255 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 156 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xff + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, -1 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx +} + +; One index may be negative -> don't preserve inbounds +define ptr @multi_level_mixed_not_okay(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @multi_level_mixed_not_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MASKED_IDX1:%.*]] = and i64 [[IDX1:%.*]], -256 +; CHECK-NEXT: [[MASKED_IDX2:%.*]] = and i64 [[IDX2:%.*]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P:%.*]], i64 0, i64 [[MASKED_IDX1]], i64 [[MASKED_IDX2]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 156 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %masked.idx1 = and i64 %idx1, u0xffffffffffffff00 + %masked.idx2 = and i64 %idx2, u0xffff + %idx1.add = add i64 %masked.idx1, 2 + %idx2.add = add i64 %masked.idx2, -1 + %arrayidx = getelementptr inbounds [10 x [20 x i32]], ptr %p, i64 0, i64 %idx1.add, i64 %idx2.add + ret ptr %arrayidx +} + + +define ptr @nuw_implies_nuw(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_implies_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, 1 + %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @nuw_implies_nuw_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_implies_nuw_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, -16 + %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, 1 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; This is poison anyway, so we can preserve the flags. +define ptr @nuw_inbounds_implies_nuw_inbounds_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, -16 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @nuw_nusw_implies_nuw_nusw(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, 1 + %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; Also poison. +define ptr @nuw_implies_nuw_nusw_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_implies_nuw_nusw_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i64 %i, -16 + %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + + +define ptr @nuw_inbounds_implies_nuw_inbounds_ordisjoint(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_ordisjoint( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = or disjoint i64 %i, 1 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; This is poison anyway, so we can do the transformation. +define ptr @nuw_inbounds_implies_nuw_inbounds_ordisjoint_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_ordisjoint_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = or disjoint i64 %i, -16 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; Check that nothing happens for non-disjoint ors +define ptr @or_no_disjoint(ptr %p, i64 %i) { +; CHECK-LABEL: @or_no_disjoint( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX:%.*]] = or i64 [[I:%.*]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX]] +; CHECK-NEXT: ret ptr [[ARRAYIDX]] +; +entry: + %idx = or i64 %i, 1 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @no_nuw_inbounds_for_sub(ptr %p, i64 %i) { +; CHECK-LABEL: @no_nuw_inbounds_for_sub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = sub nuw i64 %i, 1 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +define ptr @no_nuw_inbounds_for_sub_negative(ptr %p, i64 %i) { +; CHECK-LABEL: @no_nuw_inbounds_for_sub_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 64 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = sub nuw i64 %i, -16 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx + ret ptr %arrayidx +} + +; Can't preserved nuw and other flags here as distributing the trunc towards the +; leaves can introduce new wraps. +define ptr @nuw_inbounds_trunc(ptr %p, i128 %i) { +; CHECK-LABEL: @nuw_inbounds_trunc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i128 %i, 1 + %idx.conv = trunc i128 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + +; trunc nuw is not a problem. +define ptr @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(ptr %p, i128 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_trunc_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i128 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i128 %i, 1 + %idx.conv = trunc nuw i128 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_sext(ptr %p, i32 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_sext( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i32 %i, 1 + %idx.conv = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_zext(ptr %p, i32 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_zext( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i32 %i, 1 + %idx.conv = zext i32 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_zext_negative(ptr %p, i8 %i) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_zext_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX_CONV:%.*]] = zext i8 [[I:%.*]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX_CONV]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 960 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; +entry: + %idx = add nuw i8 %i, -16 + %idx.conv = zext i8 %idx to i64 + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.conv + ret ptr %arrayidx +} + + +; This test and the following ones mask most bits of %v off to facilitate +; validation with alive2 while still allowing interesting values. +define ptr @nuw_inbounds_implies_nuw_inbounds_nested(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V_MASKED:%.*]] = and i64 [[V:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V_MASKED]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_nested_negative(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_nested_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, -16 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_implies_nuw_nested(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_implies_nuw_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_implies_nuw_nested_negative(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_implies_nuw_nested_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, -16 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_nusw_implies_nuw_nusw_nested(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw_nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + +define ptr @nuw_nusw_implies_nuw_nusw_nested_negative(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_nusw_implies_nuw_nusw_nested_negative( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr nusw nuw i8, ptr [[TMP0]], i64 -64 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, -16 + %idx2 = add nuw i64 %idx1, %v.masked + %arrayidx = getelementptr nusw nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + + +; Neither inbounds nor nuw can be preserved. +define ptr @nuw_inbounds_nested_not_all_nuw(ptr %p, i64 %i, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_nested_not_all_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = and i64 [[V1:%.*]], -1152921488500719601 +; CHECK-NEXT: [[IDX22:%.*]] = add i64 [[I:%.*]], [[V]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX22]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; +entry: + %v.masked = and i64 %v, u0xf0000003c000000f + %idx1 = add nuw i64 %i, 1 + %idx2 = add i64 %idx1, %v.masked + %arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx2 + ret ptr %arrayidx +} + + +define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel(ptr %src, i64 %i1, i64 %i2) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[I2:%.*]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; + %idx1 = add nuw i64 %i1, 1 + %idx2 = add nuw i64 2, %i2 + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %idx2 + ret ptr %arrayidx +} + +; Neither inbounds nor nuw can be preserved. +define ptr @nuw_inbounds_multilevel_not_all_nuw(ptr %src, i64 %i1, i64 %i2) { +; CHECK-LABEL: @nuw_inbounds_multilevel_not_all_nuw( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[I2:%.*]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 24 +; CHECK-NEXT: ret ptr [[ARRAYIDX3]] +; + %idx1 = add nuw i64 %i1, 1 + %idx2 = add i64 2, %i2 + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %idx2 + ret ptr %arrayidx +} + +; Missing information about non-extracted indices does not matter. +define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel_one_unfolded(ptr %src, i64 %i1, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel_one_unfolded( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[I1:%.*]], i64 [[V:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; + %idx1 = add nuw i64 %i1, 1 + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %idx1, i64 %v + ret ptr %arrayidx +} + +define ptr @nuw_inbounds_implies_nuw_inbounds_multilevel_other_unfolded(ptr %src, i64 %i1, i64 %v) { +; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_multilevel_other_unfolded( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[SRC:%.*]], i64 [[V:%.*]], i64 [[I1:%.*]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[ARRAYIDX2]] +; + %idx1 = add nuw i64 %i1, 1 + %arrayidx = getelementptr inbounds nuw [4 x i32], ptr %src, i64 %v, i64 %idx1 + ret ptr %arrayidx +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll index 9a73feb2c4b5c..4474585bf9b06 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll @@ -157,19 +157,19 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) { ; IR-NEXT: .preheader: ; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64 ; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 -; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] +; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] ; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr ; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4 +; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4 ; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr ; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 ; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]] -; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128 +; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128 ; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr ; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 ; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]] -; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132 +; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132 ; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr ; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 ; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]] @@ -224,19 +224,19 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) { ; IR-NEXT: .preheader: ; IR-NEXT: [[TMP0:%.*]] = zext i32 [[Y]] to i64 ; IR-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 -; IR-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] +; IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x [32 x float]], ptr addrspace(3) @array, i64 0, i64 [[TMP1]], i64 [[TMP0]] ; IR-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[TMP2]] to ptr ; IR-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; IR-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; IR-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 4 +; IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 4 ; IR-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(3) [[TMP6]] to ptr ; IR-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 ; IR-NEXT: [[TMP9:%.*]] = fadd float [[TMP5]], [[TMP8]] -; IR-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 128 +; IR-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 128 ; IR-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(3) [[TMP10]] to ptr ; IR-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 ; IR-NEXT: [[TMP13:%.*]] = fadd float [[TMP9]], [[TMP12]] -; IR-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP2]], i64 132 +; IR-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP2]], i64 132 ; IR-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(3) [[TMP14]] to ptr ; IR-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 ; IR-NEXT: [[TMP17:%.*]] = fadd float [[TMP13]], [[TMP16]] diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 77b3434f4f159..da04a6e979425 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -372,8 +372,8 @@ define ptr @trunk_explicit(ptr %ptr, i64 %idx) { ; CHECK-LABEL: define ptr @trunk_explicit( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216 ; CHECK-NEXT: ret ptr [[PTR21]] ; entry: @@ -389,8 +389,8 @@ define ptr @trunk_long_idx(ptr %ptr, i64 %idx) { ; CHECK-LABEL: define ptr @trunk_long_idx( ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 -; CHECK-NEXT: [[PTR21:%.*]] = getelementptr i8, ptr [[TMP0]], i64 3216 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 3216 ; CHECK-NEXT: ret ptr [[PTR21]] ; entry: