From f9f7969f5a8cb5c59536da1a4d90a1b5fec02970 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 26 Aug 2025 16:31:06 +0800 Subject: [PATCH 1/6] Reassociate header mask --- .../Transforms/Vectorize/VPlanTransforms.cpp | 105 ++++++++++-------- .../RISCV/blocks-with-dead-instructions.ll | 12 +- ...ruction-or-drop-poison-generating-flags.ll | 11 +- .../LoopVectorize/X86/constant-fold.ll | 40 +++---- 4 files changed, 87 insertions(+), 81 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a942d52cbca94..4ad054a438b70 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -996,7 +996,8 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, } /// Try to simplify recipe \p R. -static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { +static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo, + VPValue *HeaderMask) { VPlan *Plan = R.getParent()->getPlan(); auto *Def = dyn_cast(&R); @@ -1119,6 +1120,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } + // Reassociate the header mask so it has more opportunities to be simplified. + // (headermask && x) && y -> headermask && (x && y) + if (HeaderMask && match(Def, m_LogicalAnd(m_LogicalAnd(m_Specific(HeaderMask), + m_VPValue(X)), + m_VPValue(Y)))) + return Def->replaceAllUsesWith( + Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(X, Y))); + if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) return Def->replaceAllUsesWith(A); @@ -1263,13 +1272,61 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { } } +/// Collect the header mask with the pattern: +/// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) +/// TODO: Introduce explicit recipe for header-mask instead of searching +/// for the header-mask pattern manually. +static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { + SmallVector WideCanonicalIVs; + auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), + IsaPred); + assert(count_if(Plan.getCanonicalIV()->users(), + IsaPred) <= 1 && + "Must have at most one VPWideCanonicalIVRecipe"); + if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) { + auto *WideCanonicalIV = + cast(*FoundWidenCanonicalIVUser); + WideCanonicalIVs.push_back(WideCanonicalIV); + } + + // Also include VPWidenIntOrFpInductionRecipes that represent a widened + // version of the canonical induction. + VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + auto *WidenOriginalIV = dyn_cast(&Phi); + if (WidenOriginalIV && WidenOriginalIV->isCanonical()) + WideCanonicalIVs.push_back(WidenOriginalIV); + } + + // Walk users of wide canonical IVs and find the single compare of the form + // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). + VPSingleDefRecipe *HeaderMask = nullptr; + for (auto *Wide : WideCanonicalIVs) { + for (VPUser *U : SmallVector(Wide->users())) { + auto *VPI = dyn_cast(U); + if (!VPI || !vputils::isHeaderMask(VPI, Plan)) + continue; + + assert(VPI->getOperand(0) == Wide && + "WidenCanonicalIV must be the first operand of the compare"); + assert(!HeaderMask && "Multiple header masks found?"); + HeaderMask = VPI; + } + } + return HeaderMask; +} + void VPlanTransforms::simplifyRecipes(VPlan &Plan) { + VPValue *HeaderMask = nullptr; + // Ignore post-unrolling as there can be multiple header masks. + if (!Plan.isUnrolled()) + HeaderMask = findHeaderMask(Plan); ReversePostOrderTraversal> RPOT( Plan.getEntry()); VPTypeAnalysis TypeInfo(Plan); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(RPOT)) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - simplifyRecipe(R, TypeInfo); + simplifyRecipe(R, TypeInfo, HeaderMask); } } } @@ -2192,50 +2249,6 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( return LaneMaskPhi; } -/// Collect the header mask with the pattern: -/// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) -/// TODO: Introduce explicit recipe for header-mask instead of searching -/// for the header-mask pattern manually. -static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { - SmallVector WideCanonicalIVs; - auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), - IsaPred); - assert(count_if(Plan.getCanonicalIV()->users(), - IsaPred) <= 1 && - "Must have at most one VPWideCanonicalIVRecipe"); - if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) { - auto *WideCanonicalIV = - cast(*FoundWidenCanonicalIVUser); - WideCanonicalIVs.push_back(WideCanonicalIV); - } - - // Also include VPWidenIntOrFpInductionRecipes that represent a widened - // version of the canonical induction. - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - for (VPRecipeBase &Phi : HeaderVPBB->phis()) { - auto *WidenOriginalIV = dyn_cast(&Phi); - if (WidenOriginalIV && WidenOriginalIV->isCanonical()) - WideCanonicalIVs.push_back(WidenOriginalIV); - } - - // Walk users of wide canonical IVs and find the single compare of the form - // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). - VPSingleDefRecipe *HeaderMask = nullptr; - for (auto *Wide : WideCanonicalIVs) { - for (VPUser *U : SmallVector(Wide->users())) { - auto *VPI = dyn_cast(U); - if (!VPI || !vputils::isHeaderMask(VPI, Plan)) - continue; - - assert(VPI->getOperand(0) == Wide && - "WidenCanonicalIV must be the first operand of the compare"); - assert(!HeaderMask && "Multiple header masks found?"); - HeaderMask = VPI; - } - } - return HeaderMask; -} - void VPlanTransforms::addActiveLaneMask( VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck) { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 631328a9a0964..c06b06ed4aee5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -436,23 +436,17 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP27]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP27]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP12]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv8i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP14]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv8i16.nxv8p0( align 2 [[TMP20]], splat (i1 true), i32 [[TMP27]]) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[WIDE_MASKED_GATHER]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = select [[TMP15]], [[TMP17]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = select [[TMP18]], [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = select [[TMP17]], [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = xor [[TMP17]], splat (i1 true) -; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP15]], [[TMP28]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP18]], [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP29]], [[TMP28]] +; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP17]], [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) ; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index db6185087bac5..a4b90c658cd6e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -34,17 +34,16 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv8i32() ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult [[TMP10]], [[BROADCAST_SPLAT8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP28:%.*]] = select [[TMP11]], [[TMP13]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP28]], [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = select [[TMP13]], [[TMP14]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = xor [[TMP13]], splat (i1 true) -; CHECK-NEXT: [[TMP29:%.*]] = select [[TMP11]], [[TMP16]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP15]], [[TMP29]] +; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP9]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP19:%.*]] = select [[TMP17]], [[TMP18]], zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP14]], splat (i1 true) -; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP28]], [[TMP20]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] +; CHECK-NEXT: [[TMP28:%.*]] = select [[TMP13]], [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP11]], [[TMP28]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP28]] ; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index db54ca61f715b..54d738388ea73 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -65,35 +65,35 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 @@ -107,11 +107,11 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], true -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -158,35 +158,35 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 @@ -200,11 +200,11 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[CMP]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -256,9 +256,9 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> , <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> , <4 x i1> [[TMP2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -346,10 +346,10 @@ define void @redundant_and_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: From 2cd0bb5b420cb361555c694882a01f2ab4b801f9 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 2 Sep 2025 00:14:03 +0800 Subject: [PATCH 2/6] Reassociate all ands --- .../Transforms/Vectorize/VPlanTransforms.cpp | 107 ++++++++---------- .../AArch64/force-target-instruction-cost.ll | 66 ++++++----- .../RISCV/blocks-with-dead-instructions.ll | 13 ++- ...ruction-or-drop-poison-generating-flags.ll | 23 ++-- .../LoopVectorize/X86/constant-fold.ll | 83 +++++++------- .../LoopVectorize/X86/predicate-switch.ll | 9 +- .../LoopVectorize/reduction-inloop-pred.ll | 4 +- .../LoopVectorize/reduction-inloop.ll | 12 +- .../Transforms/LoopVectorize/reduction.ll | 4 +- 9 files changed, 143 insertions(+), 178 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4ad054a438b70..e18dad09b6360 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -996,8 +996,7 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, } /// Try to simplify recipe \p R. -static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo, - VPValue *HeaderMask) { +static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { VPlan *Plan = R.getParent()->getPlan(); auto *Def = dyn_cast(&R); @@ -1120,13 +1119,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo, return; } - // Reassociate the header mask so it has more opportunities to be simplified. - // (headermask && x) && y -> headermask && (x && y) - if (HeaderMask && match(Def, m_LogicalAnd(m_LogicalAnd(m_Specific(HeaderMask), - m_VPValue(X)), - m_VPValue(Y)))) + // (x & y) & z -> x & (y & z) + if (match(Def, m_LogicalAnd(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), + m_VPValue(Z)))) return Def->replaceAllUsesWith( - Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(X, Y))); + Builder.createLogicalAnd(X, Builder.createLogicalAnd(X, Y))); if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) return Def->replaceAllUsesWith(A); @@ -1272,61 +1269,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo, } } -/// Collect the header mask with the pattern: -/// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) -/// TODO: Introduce explicit recipe for header-mask instead of searching -/// for the header-mask pattern manually. -static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { - SmallVector WideCanonicalIVs; - auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), - IsaPred); - assert(count_if(Plan.getCanonicalIV()->users(), - IsaPred) <= 1 && - "Must have at most one VPWideCanonicalIVRecipe"); - if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) { - auto *WideCanonicalIV = - cast(*FoundWidenCanonicalIVUser); - WideCanonicalIVs.push_back(WideCanonicalIV); - } - - // Also include VPWidenIntOrFpInductionRecipes that represent a widened - // version of the canonical induction. - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - for (VPRecipeBase &Phi : HeaderVPBB->phis()) { - auto *WidenOriginalIV = dyn_cast(&Phi); - if (WidenOriginalIV && WidenOriginalIV->isCanonical()) - WideCanonicalIVs.push_back(WidenOriginalIV); - } - - // Walk users of wide canonical IVs and find the single compare of the form - // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). - VPSingleDefRecipe *HeaderMask = nullptr; - for (auto *Wide : WideCanonicalIVs) { - for (VPUser *U : SmallVector(Wide->users())) { - auto *VPI = dyn_cast(U); - if (!VPI || !vputils::isHeaderMask(VPI, Plan)) - continue; - - assert(VPI->getOperand(0) == Wide && - "WidenCanonicalIV must be the first operand of the compare"); - assert(!HeaderMask && "Multiple header masks found?"); - HeaderMask = VPI; - } - } - return HeaderMask; -} - void VPlanTransforms::simplifyRecipes(VPlan &Plan) { - VPValue *HeaderMask = nullptr; - // Ignore post-unrolling as there can be multiple header masks. - if (!Plan.isUnrolled()) - HeaderMask = findHeaderMask(Plan); ReversePostOrderTraversal> RPOT( Plan.getEntry()); VPTypeAnalysis TypeInfo(Plan); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(RPOT)) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - simplifyRecipe(R, TypeInfo, HeaderMask); + simplifyRecipe(R, TypeInfo); } } } @@ -2249,6 +2198,50 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( return LaneMaskPhi; } +/// Collect the header mask with the pattern: +/// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) +/// TODO: Introduce explicit recipe for header-mask instead of searching +/// for the header-mask pattern manually. +static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { + SmallVector WideCanonicalIVs; + auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), + IsaPred); + assert(count_if(Plan.getCanonicalIV()->users(), + IsaPred) <= 1 && + "Must have at most one VPWideCanonicalIVRecipe"); + if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) { + auto *WideCanonicalIV = + cast(*FoundWidenCanonicalIVUser); + WideCanonicalIVs.push_back(WideCanonicalIV); + } + + // Also include VPWidenIntOrFpInductionRecipes that represent a widened + // version of the canonical induction. + VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + auto *WidenOriginalIV = dyn_cast(&Phi); + if (WidenOriginalIV && WidenOriginalIV->isCanonical()) + WideCanonicalIVs.push_back(WidenOriginalIV); + } + + // Walk users of wide canonical IVs and find the single compare of the form + // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). + VPSingleDefRecipe *HeaderMask = nullptr; + for (auto *Wide : WideCanonicalIVs) { + for (VPUser *U : SmallVector(Wide->users())) { + auto *VPI = dyn_cast(U); + if (!VPI || !vputils::isHeaderMask(VPI, Plan)) + continue; + + assert(VPI->getOperand(0) == Wide && + "WidenCanonicalIV must be the first operand of the compare"); + assert(!HeaderMask && "Multiple header masks found?"); + HeaderMask = VPI; + } + } + return HeaderMask; +} + void VPlanTransforms::addActiveLaneMask( VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 708967e3d13af..0822aaada233e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -194,14 +194,11 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT40:%.*]] = insertelement <2 x i1> poison, i1 [[C_3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT41:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT40]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT56:%.*]] = insertelement <2 x i1> poison, i1 [[C_4]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT57:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT56]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) -; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE55:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE53:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer @@ -213,58 +210,59 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] -; CHECK: [[PRED_STORE_IF42]]: +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF40:.*]], label %[[PRED_STORE_CONTINUE41:.*]] +; CHECK: [[PRED_STORE_IF40]]: ; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]] -; CHECK: [[PRED_STORE_CONTINUE43]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE41]] +; CHECK: [[PRED_STORE_CONTINUE41]]: ; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] -; CHECK: [[PRED_STORE_IF44]]: +; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] +; CHECK: [[PRED_STORE_IF42]]: ; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]] -; CHECK: [[PRED_STORE_CONTINUE45]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]] +; CHECK: [[PRED_STORE_CONTINUE43]]: ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] -; CHECK: [[PRED_STORE_IF46]]: +; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] +; CHECK: [[PRED_STORE_IF44]]: ; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] -; CHECK: [[PRED_STORE_CONTINUE47]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]] +; CHECK: [[PRED_STORE_CONTINUE45]]: ; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP19]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]] ; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] -; CHECK: [[PRED_STORE_IF48]]: +; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] +; CHECK: [[PRED_STORE_IF46]]: ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 0 ; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] -; CHECK: [[PRED_STORE_CONTINUE49]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] +; CHECK: [[PRED_STORE_CONTINUE47]]: ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] -; CHECK: [[PRED_STORE_IF50]]: +; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] +; CHECK: [[PRED_STORE_IF48]]: ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 1 ; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] -; CHECK: [[PRED_STORE_CONTINUE51]]: -; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] +; CHECK: [[PRED_STORE_CONTINUE49]]: +; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP18]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP37:%.*]] = or <2 x i1> [[TMP23]], [[TMP35]] ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]] -; CHECK: [[PRED_STORE_IF52]]: +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] +; CHECK: [[PRED_STORE_IF50]]: ; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] ; CHECK-NEXT: store i64 [[TMP22]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] -; CHECK: [[PRED_STORE_CONTINUE53]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] +; CHECK: [[PRED_STORE_CONTINUE51]]: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1 -; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55]] -; CHECK: [[PRED_STORE_IF54]]: +; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53]] +; CHECK: [[PRED_STORE_IF52]]: ; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] ; CHECK-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]] -; CHECK: [[PRED_STORE_CONTINUE55]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] +; CHECK: [[PRED_STORE_CONTINUE53]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP46]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index c06b06ed4aee5..6d373a42d7c3d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -425,9 +425,6 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i1 [[IC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] @@ -436,17 +433,21 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP27]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP27]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP12]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv8i32() +; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP14]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv8i16.nxv8p0( align 2 [[TMP20]], splat (i1 true), i32 [[TMP27]]) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[WIDE_MASKED_GATHER]], zeroinitializer -; CHECK-NEXT: [[TMP29:%.*]] = select [[TMP17]], [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = select [[TMP15]], [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = xor [[TMP17]], splat (i1 true) -; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP29]], [[TMP28]] -; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP17]], [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP18]], [[TMP28]] +; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP15]], [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) ; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index a4b90c658cd6e..01ea0f0ed1dc3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -12,11 +12,7 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] @@ -33,17 +29,14 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv8i32() ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult [[TMP10]], [[BROADCAST_SPLAT8]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP9:%.*]] = select [[TMP13]], [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = xor [[TMP13]], splat (i1 true) -; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP9]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT4]] -; CHECK-NEXT: [[TMP19:%.*]] = select [[TMP17]], [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP14]], splat (i1 true) -; CHECK-NEXT: [[TMP28:%.*]] = select [[TMP13]], [[TMP20]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP11]], [[TMP28]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP28]] +; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP11]], [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = xor [[TMP14]], splat (i1 true) +; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP15]], [[TMP29]] +; CHECK-NEXT: [[TMP13:%.*]] = select [[TMP11]], [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP11]], [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or [[TMP17]], [[TMP14]] +; CHECK-NEXT: [[TMP22:%.*]] = select [[TMP11]], [[TMP12]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index 54d738388ea73..030879f8ec0e8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -65,41 +65,39 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> , <4 x i1> [[TMP2]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if3: +; CHECK: pred.store.if1: ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if5: +; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if7: +; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue8: +; CHECK: pred.store.continue6: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -107,11 +105,11 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], true -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -158,41 +156,39 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i1> [[TMP3]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if3: +; CHECK: pred.store.if1: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if5: +; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if7: +; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue8: +; CHECK: pred.store.continue6: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -200,11 +196,11 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[CMP]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0:%.*]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -252,12 +248,9 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> , <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> , <4 x i1> [[TMP2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -268,25 +261,25 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if3: +; CHECK: pred.store.if1: ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: +; CHECK: pred.store.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if5: +; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: +; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3 ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if7: +; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue8: +; CHECK: pred.store.continue6: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -298,7 +291,7 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], false -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -346,10 +339,10 @@ define void @redundant_and_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i1> [[TMP1]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll index d695de6491baa..15c052cc4c822 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll @@ -458,11 +458,10 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) ; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) -; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer -; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer +; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]]) ; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP14]]) @@ -534,16 +533,14 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) ; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) ; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) -; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer -; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer ; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) ; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) ; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) ; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) ; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer -; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer -; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP20]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer +; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer +; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP21]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]]) ; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index 755d7e2f6bbd8..a809b9aa53370 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -1356,12 +1356,10 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP3]], [[TMP8]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] ; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index dd1e9ac7317eb..2b83b38245c0b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -1186,12 +1186,10 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP3]], [[TMP8]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] ; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]] @@ -1243,18 +1241,14 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD4]], splat (float 1.000000e+00) ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD2]], splat (float 2.000000e+00) ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD3]] ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], [[WIDE_LOAD4]] ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = and <4 x i1> [[TMP5]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], [[WIDE_LOAD2]] -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = and <4 x i1> [[TMP11]], [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = and <4 x i1> [[TMP19]], [[TMP5]] -; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = and <4 x i1> [[TMP12]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = and <4 x i1> [[TMP21]], [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = and <4 x i1> [[TMP5]], [[TMP9]] +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = and <4 x i1> [[TMP6]], [[TMP10]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP20]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP15]], <4 x float> [[TMP17]], <4 x float> [[PREDPHI]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI6]] = select <4 x i1> [[TMP5]], <4 x float> [[PREDPHI5]], <4 x float> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index aa1ac25182bb5..a7b8791029300 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -762,12 +762,10 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP3]], [[TMP8]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] ; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]] From 94f7b9f1e107f8e0a889eac31b212bdd356653ed Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 2 Sep 2025 00:26:43 +0800 Subject: [PATCH 3/6] Restrict to multiple users --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e18dad09b6360..1abd007707390 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1119,9 +1119,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } - // (x & y) & z -> x & (y & z) + // Reassociate (x & y) & z -> x & (y & z) if x has multiple users. With tail + // folding it is likely that x is a header mask and can be simplified further. if (match(Def, m_LogicalAnd(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), - m_VPValue(Z)))) + m_VPValue(Z))) && + X->hasMoreThanOneUniqueUser()) return Def->replaceAllUsesWith( Builder.createLogicalAnd(X, Builder.createLogicalAnd(X, Y))); @@ -2036,7 +2038,6 @@ void VPlanTransforms::truncateToMinimalBitwidths( PH->appendRecipe(NewOp); } } - } } } From 28162b8ac60ef7dbe1abeb6767e6792bb8113a7f Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 2 Sep 2025 00:28:52 +0800 Subject: [PATCH 4/6] Undo stray change --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 1abd007707390..52f91622df0cf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2038,6 +2038,7 @@ void VPlanTransforms::truncateToMinimalBitwidths( PH->appendRecipe(NewOp); } } + } } } From f66a9407c8d3fc9d161e1faa16198aa2db4365ab Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 2 Sep 2025 00:49:56 +0800 Subject: [PATCH 5/6] Update comment --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 52f91622df0cf..cb42f6903bb4c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1119,8 +1119,9 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } - // Reassociate (x & y) & z -> x & (y & z) if x has multiple users. With tail - // folding it is likely that x is a header mask and can be simplified further. + // Reassociate (x && y) && z -> x && (y && z) if x has multiple users. With + // tail folding it is likely that x is a header mask and can be simplified + // further. if (match(Def, m_LogicalAnd(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), m_VPValue(Z))) && X->hasMoreThanOneUniqueUser()) From 76619661664e21218542400eaca56809e905b55d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 2 Sep 2025 19:03:49 +0800 Subject: [PATCH 6/6] Fix devastating typo --- .../Transforms/Vectorize/VPlanTransforms.cpp | 2 +- .../AArch64/force-target-instruction-cost.ll | 69 +++++++-------- .../RISCV/blocks-with-dead-instructions.ll | 13 ++- ...ruction-or-drop-poison-generating-flags.ll | 25 ++++-- .../LoopVectorize/X86/constant-fold.ll | 83 ++++++++++--------- .../LoopVectorize/X86/predicate-switch.ll | 9 +- .../LoopVectorize/reduction-inloop-pred.ll | 4 +- .../LoopVectorize/reduction-inloop.ll | 12 ++- .../Transforms/LoopVectorize/reduction.ll | 4 +- 9 files changed, 125 insertions(+), 96 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cb42f6903bb4c..bfdf9ff4ac5df 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1126,7 +1126,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { m_VPValue(Z))) && X->hasMoreThanOneUniqueUser()) return Def->replaceAllUsesWith( - Builder.createLogicalAnd(X, Builder.createLogicalAnd(X, Y))); + Builder.createLogicalAnd(X, Builder.createLogicalAnd(Y, Z))); if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) return Def->replaceAllUsesWith(A); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 0822aaada233e..1d65ff809725d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -194,11 +194,16 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT40:%.*]] = insertelement <2 x i1> poison, i1 [[C_3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT41:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT40]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT56:%.*]] = insertelement <2 x i1> poison, i1 [[C_4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT57:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT56]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE53:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE55:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer @@ -210,59 +215,57 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF40:.*]], label %[[PRED_STORE_CONTINUE41:.*]] -; CHECK: [[PRED_STORE_IF40]]: -; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE41]] -; CHECK: [[PRED_STORE_CONTINUE41]]: -; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] ; CHECK: [[PRED_STORE_IF42]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] +; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]] ; CHECK: [[PRED_STORE_CONTINUE43]]: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] ; CHECK: [[PRED_STORE_IF44]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]] ; CHECK: [[PRED_STORE_CONTINUE45]]: -; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP19]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 +; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] +; CHECK: [[PRED_STORE_IF46]]: +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] +; CHECK: [[PRED_STORE_CONTINUE47]]: +; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]] ; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] -; CHECK: [[PRED_STORE_IF46]]: +; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] +; CHECK: [[PRED_STORE_IF48]]: ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 0 ; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] -; CHECK: [[PRED_STORE_CONTINUE47]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] +; CHECK: [[PRED_STORE_CONTINUE49]]: ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] -; CHECK: [[PRED_STORE_IF48]]: +; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] +; CHECK: [[PRED_STORE_IF50]]: ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 1 ; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] -; CHECK: [[PRED_STORE_CONTINUE49]]: -; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP18]], <2 x i1> zeroinitializer +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] +; CHECK: [[PRED_STORE_CONTINUE51]]: +; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP37:%.*]] = or <2 x i1> [[TMP23]], [[TMP35]] ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] -; CHECK: [[PRED_STORE_IF50]]: +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]] +; CHECK: [[PRED_STORE_IF52]]: ; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] ; CHECK-NEXT: store i64 [[TMP22]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] -; CHECK: [[PRED_STORE_CONTINUE51]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] +; CHECK: [[PRED_STORE_CONTINUE53]]: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1 -; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53]] -; CHECK: [[PRED_STORE_IF52]]: +; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55]] +; CHECK: [[PRED_STORE_IF54]]: ; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] ; CHECK-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] -; CHECK: [[PRED_STORE_CONTINUE53]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]] +; CHECK: [[PRED_STORE_CONTINUE55]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP46]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 6d373a42d7c3d..5a99f15b9f585 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -425,6 +425,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i1 [[IC]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP11]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] @@ -433,21 +436,17 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP27]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP27]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP12]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv8i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP14]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv8i16.nxv8p0( align 2 [[TMP20]], splat (i1 true), i32 [[TMP27]]) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[WIDE_MASKED_GATHER]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = select [[TMP15]], [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = select [[TMP17]], [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = xor [[TMP17]], splat (i1 true) -; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP18]], [[TMP28]] -; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP15]], [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP14]], [[TMP28]] +; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP17]], [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) ; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 01ea0f0ed1dc3..8b212f4ef9706 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -12,7 +12,11 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] @@ -29,15 +33,18 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv8i32() ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult [[TMP10]], [[BROADCAST_SPLAT8]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP11]], [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP29:%.*]] = xor [[TMP14]], splat (i1 true) -; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP15]], [[TMP29]] -; CHECK-NEXT: [[TMP13:%.*]] = select [[TMP11]], [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP11]], [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = or [[TMP17]], [[TMP14]] -; CHECK-NEXT: [[TMP22:%.*]] = select [[TMP11]], [[TMP12]], zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP21]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = select [[TMP13]], [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = xor [[TMP13]], splat (i1 true) +; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP9]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[TMP19:%.*]] = select [[TMP17]], [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP14]], splat (i1 true) +; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP13]], [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP11]], [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0( zeroinitializer, ptr align 2 [[TMP24]], [[TMP22]], i32 [[TMP25]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index 030879f8ec0e8..54d738388ea73 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -65,39 +65,41 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> , <4 x i1> [[TMP2]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if1: +; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if3: +; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK: pred.store.continue6: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if5: +; CHECK: pred.store.if7: ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue6: +; CHECK: pred.store.continue8: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -105,11 +107,11 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], true -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -156,39 +158,41 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i1> [[TMP3]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if1: +; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if3: +; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 +; CHECK: pred.store.continue6: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if5: +; CHECK: pred.store.if7: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue6: +; CHECK: pred.store.continue8: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -196,11 +200,11 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[CMP]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0:%.*]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -248,9 +252,12 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> , <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> , <4 x i1> [[TMP2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -261,25 +268,25 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if1: +; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue2: +; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if3: +; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue4: +; CHECK: pred.store.continue6: ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3 ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if5: +; CHECK: pred.store.if7: ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue6: +; CHECK: pred.store.continue8: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -291,7 +298,7 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], false -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -339,10 +346,10 @@ define void @redundant_and_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i1> [[TMP3]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll index 15c052cc4c822..e25be6f867862 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll @@ -458,9 +458,10 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) ; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) -; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer +; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]]) ; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer @@ -533,12 +534,14 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) ; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) ; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) +; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer +; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer ; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) ; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) ; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) ; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) -; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer -; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer +; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer +; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP21]]) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index a809b9aa53370..755d7e2f6bbd8 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -1356,10 +1356,12 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] ; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 2b83b38245c0b..dd1e9ac7317eb 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -1186,10 +1186,12 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] ; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]] @@ -1241,14 +1243,18 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD4]], splat (float 1.000000e+00) ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD2]], splat (float 2.000000e+00) ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD3]] ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], [[WIDE_LOAD4]] ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = and <4 x i1> [[TMP5]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], [[WIDE_LOAD2]] -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = and <4 x i1> [[TMP5]], [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = and <4 x i1> [[TMP6]], [[TMP10]] +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = and <4 x i1> [[TMP11]], [[TMP9]] +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = and <4 x i1> [[TMP19]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = and <4 x i1> [[TMP12]], [[TMP10]] +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = and <4 x i1> [[TMP21]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP20]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP15]], <4 x float> [[TMP17]], <4 x float> [[PREDPHI]] ; CHECK-INTERLEAVED-NEXT: [[PREDPHI6]] = select <4 x i1> [[TMP5]], <4 x float> [[PREDPHI5]], <4 x float> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index a7b8791029300..aa1ac25182bb5 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -762,10 +762,12 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] ; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]]