diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c167dd7f65fac..98fd7c9273c5d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1064,6 +1064,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, ResumeForEpilogue, /// Returns the value for vscale. VScale, + OpsEnd = VScale, }; /// Returns true if this VPInstruction generates scalar values for all lanes. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ebf833e2b7e88..6ad7b4dab3d5d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1982,6 +1982,13 @@ struct VPCSEDenseMapInfo : public DenseMapInfo { .Case([](auto *I) { return std::make_pair(true, I->getVectorIntrinsicID()); }) + .Case([](auto *I) { + // For recipes that do not directly map to LLVM IR instructions, + // assign opcodes after the last VPInstruction opcode (which is also + // after the last IR Instruction opcode), based on the VPDefID. + return std::make_pair(false, + VPInstruction::OpsEnd + 1 + I->getVPDefID()); + }) .Default([](auto *) { return std::nullopt; }); } @@ -2005,12 +2012,9 @@ struct VPCSEDenseMapInfo : public DenseMapInfo { static bool canHandle(const VPSingleDefRecipe *Def) { // We can extend the list of handled recipes in the future, // provided we account for the data embedded in them while checking for - // equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode, - // as it is essentially a GEP with different semantics. - auto C = isa(Def) - ? std::make_pair(false, Instruction::GetElementPtr) - : getOpcodeOrIntrinsicID(Def); + // equality or hashing. + auto C = getOpcodeOrIntrinsicID(Def); // The issue with (Insert|Extract)Value is that the index of the // insert/extract is not a proper operand in LLVM IR, and hence also not in // VPlan. @@ -2048,11 +2052,22 @@ struct VPCSEDenseMapInfo : public DenseMapInfo { vputils::isSingleScalar(L) != vputils::isSingleScalar(R) || !equal(L->operands(), R->operands())) return false; + assert(getOpcodeOrIntrinsicID(L) && getOpcodeOrIntrinsicID(R) && + "must have valid opcode info for both recipes"); if (auto *LFlags = dyn_cast(L)) if (LFlags->hasPredicate() && LFlags->getPredicate() != cast(R)->getPredicate()) return false; + // Recipes in replicate regions implicitly depend on predicate. If either + // recipe is in a replicate region, only consider them equal if both have + // the same parent. + const VPRegionBlock *RegionL = L->getParent()->getParent(); + const VPRegionBlock *RegionR = R->getParent()->getParent(); + if (((RegionL && RegionL->isReplicator()) || + (RegionR && RegionR->isReplicator())) && + L->getParent() != R->getParent()) + return false; const VPlan *Plan = L->getParent()->getPlan(); VPTypeAnalysis TypeInfo(*Plan); return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R); diff --git a/llvm/test/Transforms/LoopVectorize/cse-replicate-regions.ll b/llvm/test/Transforms/LoopVectorize/cse-replicate-regions.ll new file mode 100644 index 0000000000000..c0692f3231e89 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/cse-replicate-regions.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-widen-divrem-via-safe-divisor=false -S %s | FileCheck %s + +define void @multiple_vppredinstphi_with_same_predicate(ptr %A, i32 %d) { +; CHECK-LABEL: define void @multiple_vppredinstphi_with_same_predicate( +; CHECK-SAME: ptr [[A:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] +; CHECK: [[PRED_SDIV_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = sdiv i32 -10, [[D]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] +; CHECK: [[PRED_SDIV_CONTINUE]]: +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP4]], %[[PRED_SDIV_IF]] ] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2]] +; CHECK: [[PRED_SDIV_IF1]]: +; CHECK-NEXT: [[TMP7:%.*]] = sdiv i32 -10, [[D]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP7]], i32 1 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]] +; CHECK: [[PRED_SDIV_CONTINUE2]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ [[TMP5]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP8]], %[[PRED_SDIV_IF1]] ] +; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP9]], [[TMP9]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP10]], <2 x i32> zeroinitializer +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.A = getelementptr inbounds i32, ptr %A, i32 %iv + %l = load i32, ptr %gep.A + %c = icmp sgt i32 %l, 0 + br i1 %c, label %then, label %loop.latch + +then: + %div.0 = sdiv i32 -10, %d + %div.1 = sdiv i32 -10, %d + %add = add i32 %div.1, %div.0 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %add, %then ], [ 0, %loop.header ] + store i32 %merge, ptr %gep.A + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 100 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @multiple_vppredinstphi_with_different_predicate(ptr %A, i32 %d) { +; CHECK-LABEL: define void @multiple_vppredinstphi_with_different_predicate( +; CHECK-SAME: ptr [[A:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE6:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] +; CHECK: [[PRED_SDIV_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = sdiv i32 -10, [[D]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] +; CHECK: [[PRED_SDIV_CONTINUE]]: +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP4]], %[[PRED_SDIV_IF]] ] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]] +; CHECK: [[PRED_SDIV_IF1]]: +; CHECK-NEXT: [[TMP7:%.*]] = sdiv i32 -10, [[D]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP7]], i32 1 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]] +; CHECK: [[PRED_SDIV_CONTINUE2]]: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ [[TMP5]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP8]], %[[PRED_SDIV_IF1]] ] +; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i1> [[TMP1]], [[TMP10]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP9]], <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 20) +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP11]], <2 x i1> [[TMP12]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF3:.*]], label %[[PRED_SDIV_CONTINUE4:.*]] +; CHECK: [[PRED_SDIV_IF3]]: +; CHECK-NEXT: [[TMP15:%.*]] = sdiv i32 -10, [[D]] +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> poison, i32 [[TMP15]], i32 0 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE4]] +; CHECK: [[PRED_SDIV_CONTINUE4]]: +; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ poison, %[[PRED_SDIV_CONTINUE2]] ], [ [[TMP16]], %[[PRED_SDIV_IF3]] ] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 +; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_SDIV_IF5:.*]], label %[[PRED_SDIV_CONTINUE6]] +; CHECK: [[PRED_SDIV_IF5]]: +; CHECK-NEXT: [[TMP19:%.*]] = sdiv i32 -10, [[D]] +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP19]], i32 1 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE6]] +; CHECK: [[PRED_SDIV_CONTINUE6]]: +; CHECK-NEXT: [[TMP21:%.*]] = phi <2 x i32> [ [[TMP17]], %[[PRED_SDIV_CONTINUE4]] ], [ [[TMP20]], %[[PRED_SDIV_IF5]] ] +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP21]], <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = add <2 x i32> [[PREDPHI]], [[PREDPHI7]] +; CHECK-NEXT: store <2 x i32> [[TMP22]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.A = getelementptr inbounds i32, ptr %A, i32 %iv + %l = load i32, ptr %gep.A + %c.0 = icmp sgt i32 %l, 0 + br i1 %c.0, label %then.0, label %continue + +then.0: + %div.0 = sdiv i32 -10, %d + br label %continue + +continue: + %merge.0 = phi i32 [ %div.0, %then.0 ], [ 0, %loop.header ] + %c.1 = icmp sgt i32 %l, 20 + br i1 %c.1, label %then.1, label %loop.latch + +then.1: + %div.1 = sdiv i32 -10, %d + br label %loop.latch + +loop.latch: + %merge.1 = phi i32 [ %div.1, %then.1 ], [ 0, %continue ] + %add = add i32 %merge.0, %merge.1 + store i32 %add, ptr %gep.A + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 100 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +}