-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[InstCombine] Merge constant offset geps across variable geps #156326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-ir Author: Nikita Popov (nikic) ChangesFold: To: An alternative to this would be to generally canonicalize constant offset GEPs to the right. I found the results of doing that somewhat mixed, so I'm going for this more obviously beneficial change for now. Proof for flag preservation: https://alive2.llvm.org/ce/z/gmpAMg Full diff: https://github.com/llvm/llvm-project/pull/156326.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/GEPNoWrapFlags.h b/llvm/include/llvm/IR/GEPNoWrapFlags.h
index 4e6ab0d88bfcf..0ecc3792353a9 100644
--- a/llvm/include/llvm/IR/GEPNoWrapFlags.h
+++ b/llvm/include/llvm/IR/GEPNoWrapFlags.h
@@ -84,6 +84,16 @@ class GEPNoWrapFlags {
return Res;
}
+ /// Given (gep (gep p, x), y), determine the nowrap flags for
+ /// (gep (gep, p, y), x).
+ GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const {
+ GEPNoWrapFlags Res = *this & Other;
+ // We can only preserve inbounds and nusw if nuw is also set.
+ if (!Res.hasNoUnsignedWrap())
+ return none();
+ return Res;
+ }
+
bool operator==(GEPNoWrapFlags Other) const { return Flags == Other.Flags; }
bool operator!=(GEPNoWrapFlags Other) const { return !(*this == Other); }
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5ed091a631896..c2e0c25620b2b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2676,6 +2676,61 @@ static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
return nullptr;
}
+// Combine constant offsets separated by variable offsets.
+// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
+static Instruction *combineConstantOffsets(GetElementPtrInst &GEP,
+ InstCombinerImpl &IC) {
+ if (!GEP.hasAllConstantIndices())
+ return nullptr;
+
+ GEPNoWrapFlags NW = GEP.getNoWrapFlags();
+ SmallVector<GetElementPtrInst *> Skipped;
+ auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
+ while (true) {
+ if (!InnerGEP)
+ return nullptr;
+
+ NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags());
+ if (InnerGEP->hasAllConstantIndices())
+ break;
+
+ if (!InnerGEP->hasOneUse())
+ return nullptr;
+
+ Skipped.push_back(InnerGEP);
+ InnerGEP = dyn_cast<GetElementPtrInst>(InnerGEP->getPointerOperand());
+ }
+
+ // The two constant offset GEPs are directly adjacent: Let normal offset
+ // merging handle it.
+ if (Skipped.empty())
+ return nullptr;
+
+ // FIXME: This one-use check is not strictly necessary. Consider relaxing it
+ // if profitable.
+ if (!InnerGEP->hasOneUse())
+ return nullptr;
+
+ // Don't bother with vector splats.
+ Type *Ty = GEP.getType();
+ if (InnerGEP->getType() != Ty)
+ return nullptr;
+
+ const DataLayout &DL = IC.getDataLayout();
+ APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
+ if (!GEP.accumulateConstantOffset(DL, Offset) ||
+ !InnerGEP->accumulateConstantOffset(DL, Offset))
+ return nullptr;
+
+ IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand());
+ for (GetElementPtrInst *SkippedGEP : Skipped)
+ SkippedGEP->setNoWrapFlags(NW);
+
+ return IC.replaceInstUsesWith(
+ GEP, IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset),
+ "", NW));
+}
+
Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
GEPOperator *Src) {
// Combine Indices - If the source pointer to this getelementptr instruction
@@ -2687,6 +2742,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
return I;
+ if (auto *I = combineConstantOffsets(GEP, *this))
+ return I;
+
// For constant GEPs, use a more general offset-based folding approach.
Type *PtrTy = Src->getType()->getScalarType();
if (GEP.hasAllConstantIndices() &&
diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
index fa006ff992ba3..63d0cbf76a671 100644
--- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
@@ -50,10 +50,9 @@ define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) {
; result = ((ptr) p + a) + 3
define ptr @merge(ptr %p, i64 %a) {
; CHECK-LABEL: @merge(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
-; CHECK-NEXT: ret ptr [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 12
+; CHECK-NEXT: ret ptr [[TMP2]]
;
%1 = getelementptr inbounds i32, ptr %p, i64 1
%2 = getelementptr inbounds i32, ptr %1, i64 %a
@@ -67,13 +66,11 @@ define ptr @merge(ptr %p, i64 %a) {
; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9
define ptr @nested(ptr %p, i64 %a, i64 %b) {
; CHECK-LABEL: @nested(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 16
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 128
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 16
-; CHECK-NEXT: ret ptr [[TMP6]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i64 160
+; CHECK-NEXT: ret ptr [[TMP4]]
;
%1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1
%2 = getelementptr inbounds i8, ptr %1, i64 %a
@@ -125,3 +122,123 @@ define ptr @multipleUses3(ptr %p) {
%3 = getelementptr inbounds i32, ptr %1, i64 %2
ret ptr %3
}
+
+define ptr @merge_nuw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nuw_inbounds(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw_inbounds(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr inbounds nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nuw_nusw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw_nusw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nusw nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nusw nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_missing_nuw1(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_missing_nuw1(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr i8, ptr %p, i64 1
+ %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_missing_nuw2(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_missing_nuw2(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_missing_nuw3(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_missing_nuw3(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nuw_missing_inbounds(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw_missing_inbounds(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr inbounds nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nuw_missing_nusw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw_missing_nusw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_inbounds_missing_nuw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_inbounds_missing_nuw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr inbounds i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nusw_missing_nuw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nusw_missing_nuw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nusw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
index e2f22b8322d2b..5fb82d2328dde 100644
--- a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
+++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
@@ -85,10 +85,8 @@ define ptr @test_zero_sext_add_nsw(ptr %base, i32 %a) {
; CHECK-LABEL: define ptr @test_zero_sext_add_nsw(
; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[A]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]]
; CHECK-NEXT: ret ptr [[P2]]
;
entry:
|
|
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesFold: To: An alternative to this would be to generally canonicalize constant offset GEPs to the right. I found the results of doing that somewhat mixed, so I'm going for this more obviously beneficial change for now. Proof for flag preservation: https://alive2.llvm.org/ce/z/gmpAMg Full diff: https://github.com/llvm/llvm-project/pull/156326.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/GEPNoWrapFlags.h b/llvm/include/llvm/IR/GEPNoWrapFlags.h
index 4e6ab0d88bfcf..0ecc3792353a9 100644
--- a/llvm/include/llvm/IR/GEPNoWrapFlags.h
+++ b/llvm/include/llvm/IR/GEPNoWrapFlags.h
@@ -84,6 +84,16 @@ class GEPNoWrapFlags {
return Res;
}
+ /// Given (gep (gep p, x), y), determine the nowrap flags for
+ /// (gep (gep, p, y), x).
+ GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const {
+ GEPNoWrapFlags Res = *this & Other;
+ // We can only preserve inbounds and nusw if nuw is also set.
+ if (!Res.hasNoUnsignedWrap())
+ return none();
+ return Res;
+ }
+
bool operator==(GEPNoWrapFlags Other) const { return Flags == Other.Flags; }
bool operator!=(GEPNoWrapFlags Other) const { return !(*this == Other); }
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5ed091a631896..c2e0c25620b2b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2676,6 +2676,61 @@ static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
return nullptr;
}
+// Combine constant offsets separated by variable offsets.
+// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
+static Instruction *combineConstantOffsets(GetElementPtrInst &GEP,
+ InstCombinerImpl &IC) {
+ if (!GEP.hasAllConstantIndices())
+ return nullptr;
+
+ GEPNoWrapFlags NW = GEP.getNoWrapFlags();
+ SmallVector<GetElementPtrInst *> Skipped;
+ auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
+ while (true) {
+ if (!InnerGEP)
+ return nullptr;
+
+ NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags());
+ if (InnerGEP->hasAllConstantIndices())
+ break;
+
+ if (!InnerGEP->hasOneUse())
+ return nullptr;
+
+ Skipped.push_back(InnerGEP);
+ InnerGEP = dyn_cast<GetElementPtrInst>(InnerGEP->getPointerOperand());
+ }
+
+ // The two constant offset GEPs are directly adjacent: Let normal offset
+ // merging handle it.
+ if (Skipped.empty())
+ return nullptr;
+
+ // FIXME: This one-use check is not strictly necessary. Consider relaxing it
+ // if profitable.
+ if (!InnerGEP->hasOneUse())
+ return nullptr;
+
+ // Don't bother with vector splats.
+ Type *Ty = GEP.getType();
+ if (InnerGEP->getType() != Ty)
+ return nullptr;
+
+ const DataLayout &DL = IC.getDataLayout();
+ APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
+ if (!GEP.accumulateConstantOffset(DL, Offset) ||
+ !InnerGEP->accumulateConstantOffset(DL, Offset))
+ return nullptr;
+
+ IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand());
+ for (GetElementPtrInst *SkippedGEP : Skipped)
+ SkippedGEP->setNoWrapFlags(NW);
+
+ return IC.replaceInstUsesWith(
+ GEP, IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset),
+ "", NW));
+}
+
Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
GEPOperator *Src) {
// Combine Indices - If the source pointer to this getelementptr instruction
@@ -2687,6 +2742,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
return I;
+ if (auto *I = combineConstantOffsets(GEP, *this))
+ return I;
+
// For constant GEPs, use a more general offset-based folding approach.
Type *PtrTy = Src->getType()->getScalarType();
if (GEP.hasAllConstantIndices() &&
diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
index fa006ff992ba3..63d0cbf76a671 100644
--- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
@@ -50,10 +50,9 @@ define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) {
; result = ((ptr) p + a) + 3
define ptr @merge(ptr %p, i64 %a) {
; CHECK-LABEL: @merge(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
-; CHECK-NEXT: ret ptr [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 12
+; CHECK-NEXT: ret ptr [[TMP2]]
;
%1 = getelementptr inbounds i32, ptr %p, i64 1
%2 = getelementptr inbounds i32, ptr %1, i64 %a
@@ -67,13 +66,11 @@ define ptr @merge(ptr %p, i64 %a) {
; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9
define ptr @nested(ptr %p, i64 %a, i64 %b) {
; CHECK-LABEL: @nested(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 16
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 128
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 16
-; CHECK-NEXT: ret ptr [[TMP6]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i64 160
+; CHECK-NEXT: ret ptr [[TMP4]]
;
%1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1
%2 = getelementptr inbounds i8, ptr %1, i64 %a
@@ -125,3 +122,123 @@ define ptr @multipleUses3(ptr %p) {
%3 = getelementptr inbounds i32, ptr %1, i64 %2
ret ptr %3
}
+
+define ptr @merge_nuw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nuw_inbounds(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw_inbounds(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr inbounds nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nuw_nusw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw_nusw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nusw nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nusw nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_missing_nuw1(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_missing_nuw1(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr i8, ptr %p, i64 1
+ %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_missing_nuw2(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_missing_nuw2(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_missing_nuw3(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_missing_nuw3(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nuw_missing_inbounds(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw_missing_inbounds(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr inbounds nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nuw_missing_nusw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nuw_missing_nusw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr nuw i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nuw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_inbounds_missing_nuw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_inbounds_missing_nuw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr inbounds nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr inbounds i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr inbounds nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
+
+define ptr @merge_nusw_missing_nuw(ptr %p, i64 %a) {
+; CHECK-LABEL: @merge_nusw_missing_nuw(
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 5
+; CHECK-NEXT: ret ptr [[GEP3]]
+;
+ %gep1 = getelementptr nusw nuw i8, ptr %p, i64 1
+ %gep2 = getelementptr nusw i32, ptr %gep1, i64 %a
+ %gep3 = getelementptr nusw nuw i32, ptr %gep2, i64 1
+ ret ptr %gep3
+}
diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
index e2f22b8322d2b..5fb82d2328dde 100644
--- a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
+++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
@@ -85,10 +85,8 @@ define ptr @test_zero_sext_add_nsw(ptr %base, i32 %a) {
; CHECK-LABEL: define ptr @test_zero_sext_add_nsw(
; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[A]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]]
; CHECK-NEXT: ret ptr [[P2]]
;
entry:
|
Fold:
%gep1 = ptradd %p, C1
%gep2 = ptradd %gep1, %x
%res = ptradd %gep2, C2
To:
%gep = ptradd %gep, %x
%res = ptradd %gep, C1+C2
An alternative to this would be to generally canonicalize constant
offset GEPs to the right. I found to be the results of that somewhat
mixed, so I'm going for this more obviously beneficial change for
now.
Proof for flag preservation: https://alive2.llvm.org/ce/z/gmpAMg
6046a58 to
16f3d7b
Compare
|
@zyw-bot mfuzz |
|
@zyw-bot csmith-quick-fuzz |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
| return nullptr; | ||
| } | ||
|
|
||
| // Combine constant offsets separated by variable offsets. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use ///.
Fold:
To:
An alternative to this would be to generally canonicalize constant offset GEPs to the right. I found the results of doing that somewhat mixed, so I'm going for this more obviously beneficial change for now.
Proof for flag preservation on reassociation: https://alive2.llvm.org/ce/z/gmpAMg