From 44220ba6a3a6184eb8a34983b898221b849abc68 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 28 Nov 2023 11:21:01 +0100 Subject: [PATCH] [InstCombine] Don't require GEP in indexed compare fold The indexed compare fold folds comparisons like p+a == p+b to a == b, even in cases where the a/b are complex (e.g. via multiple geps, or phis). Currently, it requires that the LHS is actually a GEP, but this requirement isn't really necessary: We can handle the pattern p == p+b as well. This patch removes the GEP requirement, allowing additional comparisons to be optimized away. --- .../InstCombine/InstCombineCompares.cpp | 39 ++++++++++--------- .../Transforms/InstCombine/getelementptr.ll | 8 ++-- llvm/test/Transforms/InstCombine/pr39908.ll | 12 ++---- .../PhaseOrdering/loop-access-checks.ll | 11 +++--- 4 files changed, 33 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b7b0bb7361359..0b144d779ce81 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -626,32 +626,34 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base, GEPNoWrapFlags NW, return NewInsts[Start]; } -/// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant. +/// Converts (CMP LHS, RHS) if this change would make RHS a constant. /// We can look through PHIs, GEPs and casts in order to determine a common base -/// between GEPLHS and RHS. -static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, - CmpPredicate Cond, +/// between LHS and RHS. +static Instruction *transformToIndexedCompare(Value *LHS, Value *RHS, + ICmpInst::Predicate Cond, const DataLayout &DL, InstCombiner &IC) { - // FIXME: Support vector of pointers. - if (GEPLHS->getType()->isVectorTy()) + if (ICmpInst::isSigned(Cond)) return nullptr; - if (!GEPLHS->hasAllConstantIndices()) + // FIXME: Support vector of pointers. + if (!LHS->getType()->isPointerTy()) return nullptr; - APInt Offset(DL.getIndexTypeSizeInBits(GEPLHS->getType()), 0); + APInt Offset(DL.getIndexTypeSizeInBits(LHS->getType()), 0); Value *PtrBase = - GEPLHS->stripAndAccumulateConstantOffsets(DL, Offset, - /*AllowNonInbounds*/ false); + LHS->stripAndAccumulateConstantOffsets(DL, Offset, + /*AllowNonInbounds*/ false); // Bail if we looked through addrspacecast. - if (PtrBase->getType() != GEPLHS->getType()) + if (PtrBase->getType() != LHS->getType()) return nullptr; // The set of nodes that will take part in this transformation. SetVector Nodes; - GEPNoWrapFlags NW = GEPLHS->getNoWrapFlags(); + GEPNoWrapFlags NW = GEPNoWrapFlags::all(); + if (auto *GEPLHS = dyn_cast(LHS)) + NW = GEPLHS->getNoWrapFlags(); if (!canRewriteGEPAsOffset(RHS, PtrBase, NW, DL, Nodes)) return nullptr; @@ -800,10 +802,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return replaceInstUsesWith(I, Cmp); } - // Otherwise, the base pointers are different and the indices are - // different. Try convert this to an indexed compare by looking through - // PHIs/casts. - return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this); + return nullptr; } if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() && @@ -851,9 +850,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } } - // Try convert this to an indexed compare by looking through PHIs/casts as a - // last resort. - return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this); + return nullptr; } bool InstCombinerImpl::foldAllocaCmp(AllocaInst *Alloca) { @@ -7312,6 +7309,10 @@ Instruction *InstCombinerImpl::foldICmpCommutative(CmpPredicate Pred, if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI)) return NI; + if (Instruction *Res = + transformToIndexedCompare(Op0, Op1, Pred, getDataLayout(), *this)) + return Res; + if (auto *SI = dyn_cast(Op0)) if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI)) return NI; diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index feba952919b9a..7e7b23929e740 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -686,15 +686,15 @@ define i32 @test28() nounwind { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ORIENTATIONS:%.*]] = alloca [1 x [1 x %struct.x]], align 8 ; CHECK-NEXT: [[T3:%.*]] = call i32 @puts(ptr noundef nonnull dereferenceable(1) @.str) #[[ATTR0]] -; CHECK-NEXT: [[T45:%.*]] = getelementptr inbounds nuw i8, ptr [[ORIENTATIONS]], i64 1 ; CHECK-NEXT: br label [[BB10:%.*]] ; CHECK: bb10: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[BB10]] ] ; CHECK-NEXT: [[T12_REC:%.*]] = xor i32 [[INDVAR]], -1 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T12_REC]] to i64 -; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds [[STRUCT_X:%.*]], ptr [[T45]], i64 [[TMP0]] -; CHECK-NEXT: [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12]]) #[[ATTR0]] -; CHECK-NEXT: [[T84:%.*]] = icmp eq ptr [[T12]], [[ORIENTATIONS]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[ORIENTATIONS]], i64 [[TMP0]] +; CHECK-NEXT: [[T12_PTR:%.*]] = getelementptr i8, ptr [[TMP1]], i64 1 +; CHECK-NEXT: [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12_PTR]]) #[[ATTR0]] +; CHECK-NEXT: [[T84:%.*]] = icmp eq i32 [[INDVAR]], 0 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: br i1 [[T84]], label [[BB17:%.*]], label [[BB10]] ; CHECK: bb17: diff --git a/llvm/test/Transforms/InstCombine/pr39908.ll b/llvm/test/Transforms/InstCombine/pr39908.ll index ca143f417fb27..5d13a331c6d2e 100644 --- a/llvm/test/Transforms/InstCombine/pr39908.ll +++ b/llvm/test/Transforms/InstCombine/pr39908.ll @@ -7,9 +7,7 @@ target datalayout = "p:32:32" define i1 @test(ptr %p, i32 %n) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[N:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; %end = getelementptr inbounds [0 x %S], ptr %p, i32 0, i32 %n, i32 0, i32 0 @@ -22,9 +20,7 @@ define i1 @test(ptr %p, i32 %n) { define i1 @test64(ptr %p, i64 %n) { ; CHECK-LABEL: @test64( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32 -; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[TMP1]], i32 0, i32 0 -; CHECK-NEXT: [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; %end = getelementptr inbounds [0 x %S], ptr %p, i64 0, i64 %n, i32 0, i64 0 @@ -37,9 +33,7 @@ define i1 @test64(ptr %p, i64 %n) { define i1 @test64_overflow(ptr %p, i64 %n) { ; CHECK-LABEL: @test64_overflow( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32 -; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[TMP1]], i32 0, i32 0 -; CHECK-NEXT: [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; %end = getelementptr inbounds [0 x %S], ptr %p, i64 0, i64 %n, i32 0, i64 8589934592 diff --git a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll index e53b8687af915..9a88f64730683 100644 --- a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll @@ -24,7 +24,7 @@ define void @test_fill_with_foreach([2 x i64] %elems.coerce) { ; CHECK-NEXT: [[ELEMS_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ELEMS_COERCE_FCA_0_EXTRACT]] to ptr ; CHECK-NEXT: [[ELEMS_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 1 -; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[ELEMS_COERCE_FCA_1_EXTRACT]] +; CHECK-NEXT: [[ADD_PTR_I_IDX:%.*]] = shl nsw i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 2 ; CHECK-NEXT: [[CMP_NOT_I_I_I_I:%.*]] = icmp slt i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT_I_I_I_I]], label [[ERROR:%.*]], label [[FOR_COND_PREHEADER_SPLIT:%.*]] ; CHECK: for.cond.preheader.split: @@ -36,10 +36,11 @@ define void @test_fill_with_foreach([2 x i64] %elems.coerce) { ; CHECK-NEXT: tail call void @error() ; CHECK-NEXT: br label [[COMMON_RET]] ; CHECK: for.body: -; CHECK-NEXT: [[__BEGIN1_SROA_0_03:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_COND_PREHEADER_SPLIT]] ] -; CHECK-NEXT: tail call void @use(ptr noundef nonnull align 4 dereferenceable(4) [[__BEGIN1_SROA_0_03]]) -; CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__BEGIN1_SROA_0_03]], i64 4 -; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR_I]], [[ADD_PTR_I]] +; CHECK-NEXT: [[__BEGIN1_SROA_0_0_IDX3:%.*]] = phi i64 [ [[__BEGIN1_SROA_0_0_ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER_SPLIT]] ] +; CHECK-NEXT: [[__BEGIN1_SROA_0_0_PTR4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 [[__BEGIN1_SROA_0_0_IDX3]] +; CHECK-NEXT: tail call void @use(ptr noundef nonnull align 4 dereferenceable(4) [[__BEGIN1_SROA_0_0_PTR4]]) +; CHECK-NEXT: [[__BEGIN1_SROA_0_0_ADD]] = add nuw nsw i64 [[__BEGIN1_SROA_0_0_IDX3]], 4 +; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i64 [[__BEGIN1_SROA_0_0_ADD]], [[ADD_PTR_I_IDX]] ; CHECK-NEXT: br i1 [[CMP_I_NOT]], label [[COMMON_RET]], label [[FOR_BODY]] ; entry: