diff --git a/llvm/include/llvm/IR/GEPNoWrapFlags.h b/llvm/include/llvm/IR/GEPNoWrapFlags.h index 4e6ab0d88bfcf..0ecc3792353a9 100644 --- a/llvm/include/llvm/IR/GEPNoWrapFlags.h +++ b/llvm/include/llvm/IR/GEPNoWrapFlags.h @@ -84,6 +84,16 @@ class GEPNoWrapFlags { return Res; } + /// Given (gep (gep p, x), y), determine the nowrap flags for + /// (gep (gep, p, y), x). + GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const { + GEPNoWrapFlags Res = *this & Other; + // We can only preserve inbounds and nusw if nuw is also set. + if (!Res.hasNoUnsignedWrap()) + return none(); + return Res; + } + bool operator==(GEPNoWrapFlags Other) const { return Flags == Other.Flags; } bool operator!=(GEPNoWrapFlags Other) const { return !(*this == Other); } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 1a9b54bc009bc..abb802bab265c 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2674,6 +2674,62 @@ static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, return nullptr; } +/// Combine constant offsets separated by variable offsets. +/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2 +static Instruction *combineConstantOffsets(GetElementPtrInst &GEP, + InstCombinerImpl &IC) { + if (!GEP.hasAllConstantIndices()) + return nullptr; + + GEPNoWrapFlags NW = GEPNoWrapFlags::all(); + SmallVector Skipped; + auto *InnerGEP = dyn_cast(GEP.getPointerOperand()); + while (true) { + if (!InnerGEP) + return nullptr; + + NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags()); + if (InnerGEP->hasAllConstantIndices()) + break; + + if (!InnerGEP->hasOneUse()) + return nullptr; + + Skipped.push_back(InnerGEP); + InnerGEP = dyn_cast(InnerGEP->getPointerOperand()); + } + + // The two constant offset GEPs are directly adjacent: Let normal offset + // merging handle it. + if (Skipped.empty()) + return nullptr; + + // FIXME: This one-use check is not strictly necessary. Consider relaxing it + // if profitable. + if (!InnerGEP->hasOneUse()) + return nullptr; + + // Don't bother with vector splats. + Type *Ty = GEP.getType(); + if (InnerGEP->getType() != Ty) + return nullptr; + + const DataLayout &DL = IC.getDataLayout(); + APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0); + if (!GEP.accumulateConstantOffset(DL, Offset) || + !InnerGEP->accumulateConstantOffset(DL, Offset)) + return nullptr; + + IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand()); + for (GetElementPtrInst *SkippedGEP : Skipped) + SkippedGEP->setNoWrapFlags(NW); + + return IC.replaceInstUsesWith( + GEP, + IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset), "", + NW.intersectForOffsetAdd(GEP.getNoWrapFlags()))); +} + Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src) { // Combine Indices - If the source pointer to this getelementptr instruction @@ -2685,6 +2741,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this)) return I; + if (auto *I = combineConstantOffsets(GEP, *this)) + return I; + // For constant GEPs, use a more general offset-based folding approach. Type *PtrTy = Src->getType()->getScalarType(); if (GEP.hasAllConstantIndices() && diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll index fa006ff992ba3..8a4898ae923a7 100644 --- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll @@ -50,10 +50,9 @@ define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) { ; result = ((ptr) p + a) + 3 define ptr @merge(ptr %p, i64 %a) { ; CHECK-LABEL: @merge( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 12 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, ptr %p, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 %a @@ -67,13 +66,11 @@ define ptr @merge(ptr %p, i64 %a) { ; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9 define ptr @nested(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @nested( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 16 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 128 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 16 -; CHECK-NEXT: ret ptr [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i64 160 +; CHECK-NEXT: ret ptr [[TMP4]] ; %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1 %2 = getelementptr inbounds i8, ptr %1, i64 %a @@ -125,3 +122,138 @@ define ptr @multipleUses3(ptr %p) { %3 = getelementptr inbounds i32, ptr %1, i64 %2 ret ptr %3 } + +define ptr @merge_nuw(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_nuw( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr nuw i8, ptr %p, i64 1 + %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a + %gep3 = getelementptr nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +define ptr @merge_nuw_inbounds(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_nuw_inbounds( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1 + %gep2 = getelementptr inbounds nuw i32, ptr %gep1, i64 %a + %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +; It would be okay to preserve nusw here, as the constant addition does not +; overflow. +define ptr @merge_nuw_nusw(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_nuw_nusw( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1 + %gep2 = getelementptr nusw nuw i32, ptr %gep1, i64 %a + %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +; Can't preserve nusw on the final GEP +define ptr @merge_nuw_nusw_overflow(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_nuw_nusw_overflow( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 -2305843009213693952 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr nusw nuw i8, ptr %p, i64 u0x7000000000000000 + %gep2 = getelementptr nusw nuw i32, ptr %gep1, i64 %a + %gep3 = getelementptr nusw nuw i8, ptr %gep2, i64 u0x7000000000000000 + ret ptr %gep3 +} + +define ptr @merge_missing_nuw1(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_missing_nuw1( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr i8, ptr %p, i64 1 + %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a + %gep3 = getelementptr nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +define ptr @merge_missing_nuw2(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_missing_nuw2( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr nuw i8, ptr %p, i64 1 + %gep2 = getelementptr i32, ptr %gep1, i64 %a + %gep3 = getelementptr nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +define ptr @merge_missing_nuw3(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_missing_nuw3( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr nuw i8, ptr %p, i64 1 + %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a + %gep3 = getelementptr i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +define ptr @merge_nuw_missing_inbounds(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_nuw_missing_inbounds( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr nuw i8, ptr %p, i64 1 + %gep2 = getelementptr inbounds nuw i32, ptr %gep1, i64 %a + %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +define ptr @merge_nuw_missing_nusw(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_nuw_missing_nusw( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1 + %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a + %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +define ptr @merge_inbounds_missing_nuw(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_inbounds_missing_nuw( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1 + %gep2 = getelementptr inbounds i32, ptr %gep1, i64 %a + %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} + +define ptr @merge_nusw_missing_nuw(ptr %p, i64 %a) { +; CHECK-LABEL: @merge_nusw_missing_nuw( +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5 +; CHECK-NEXT: ret ptr [[GEP3]] +; + %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1 + %gep2 = getelementptr nusw i32, ptr %gep1, i64 %a + %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1 + ret ptr %gep3 +} diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll index e2f22b8322d2b..5fb82d2328dde 100644 --- a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll +++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll @@ -85,10 +85,8 @@ define ptr @test_zero_sext_add_nsw(ptr %base, i32 %a) { ; CHECK-LABEL: define ptr @test_zero_sext_add_nsw( ; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[A]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]] -; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]] ; CHECK-NEXT: ret ptr [[P2]] ; entry: