diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 9d7bf97d305ed..cfbb4ad32d681 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1351,6 +1351,9 @@ class VPInstruction : public VPRecipeWithIRFlags, } } + /// Returns true if the underlying opcode may read from or write to memory. + bool opcodeMayReadOrWriteFromMemory() const; + /// Returns true if the recipe only uses the first lane of operand \p Op. bool onlyFirstLaneUsed(const VPValue *Op) const override; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 76336ae447edb..e54df8bdeac55 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -51,24 +51,7 @@ extern cl::opt ForceTargetInstructionCost; bool VPRecipeBase::mayWriteToMemory() const { switch (getVPDefID()) { case VPInstructionSC: - if (Instruction::isBinaryOp(cast(this)->getOpcode())) - return false; - switch (cast(this)->getOpcode()) { - case Instruction::Or: - case Instruction::ICmp: - case Instruction::Select: - case VPInstruction::AnyOf: - case VPInstruction::Not: - case VPInstruction::CalculateTripCountMinusVF: - case VPInstruction::CanonicalIVIncrementForPart: - case VPInstruction::ExtractFromEnd: - case VPInstruction::FirstOrderRecurrenceSplice: - case VPInstruction::LogicalAnd: - case VPInstruction::PtrAdd: - return false; - default: - return true; - } + return cast(this)->opcodeMayReadOrWriteFromMemory(); case VPInterleaveSC: return cast(this)->getNumStoreOperands() > 0; case VPWidenStoreEVLSC: @@ -115,6 +98,8 @@ bool VPRecipeBase::mayWriteToMemory() const { bool VPRecipeBase::mayReadFromMemory() const { switch (getVPDefID()) { + case VPInstructionSC: + return cast(this)->opcodeMayReadOrWriteFromMemory(); case VPWidenLoadEVLSC: case VPWidenLoadSC: return true; @@ -707,6 +692,26 @@ void VPInstruction::execute(VPTransformState &State) { /*IsScalar*/ GeneratesPerFirstLaneOnly); } +bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { + if (Instruction::isBinaryOp(getOpcode())) + return false; + switch (getOpcode()) { + case Instruction::ICmp: + case Instruction::Select: + case VPInstruction::AnyOf: + case VPInstruction::CalculateTripCountMinusVF: + case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::ExtractFromEnd: + case VPInstruction::FirstOrderRecurrenceSplice: + case VPInstruction::LogicalAnd: + case VPInstruction::Not: + case VPInstruction::PtrAdd: + return false; + default: + return true; + } +} + bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); if (Instruction::isBinaryOp(getOpcode())) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll index ddf6c1005e058..254cdf2d14d9f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll @@ -209,6 +209,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ] @@ -218,7 +219,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], [[TMP3]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> splat (i8 1) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 08a6001431903..8c5d84e6981bc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -151,9 +151,9 @@ exit: ret void } -define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4, ptr %src, ptr %dst.3, i1 %c.3, ptr %dst.2) { +define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y.ptr, ptr %dst.1, i1 %c.4, ptr %src, ptr %dst.3, i1 %c.3, ptr %dst.2) { ; CHECK-LABEL: define void @test_exit_branch_cost( -; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]], i32 [[Y:%.*]], ptr [[DST_1:%.*]], i1 [[C_4:%.*]], ptr [[SRC:%.*]], ptr [[DST_3:%.*]], i1 [[C_3:%.*]], ptr [[DST_2:%.*]]) { +; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[X_PTR:%.*]], ptr noalias [[Y_PTR:%.*]], ptr [[DST_1:%.*]], i1 [[C_4:%.*]], ptr [[SRC:%.*]], ptr [[DST_3:%.*]], i1 [[C_3:%.*]], ptr [[DST_2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -172,11 +172,11 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP3]] ; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]] ; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]] -; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]] +; CHECK-NEXT: [[CONFLICT_RDX21:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]] ; CHECK-NEXT: [[BOUND012:%.*]] = icmp ult ptr [[DST_1]], [[SCEVGEP4]] ; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] ; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] -; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX11]], [[FOUND_CONFLICT14]] +; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX21]], [[FOUND_CONFLICT14]] ; CHECK-NEXT: [[BOUND016:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP2]] ; CHECK-NEXT: [[BOUND117:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP1]] ; CHECK-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] @@ -184,161 +184,101 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: [[BOUND020:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP3]] ; CHECK-NEXT: [[BOUND121:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] ; CHECK-NEXT: [[FOUND_CONFLICT22:%.*]] = and i1 [[BOUND020]], [[BOUND121]] -; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]] +; CHECK-NEXT: [[CONFLICT_RDX41:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]] ; CHECK-NEXT: [[BOUND024:%.*]] = icmp ult ptr [[DST_3]], [[SCEVGEP4]] ; CHECK-NEXT: [[BOUND125:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]] ; CHECK-NEXT: [[FOUND_CONFLICT26:%.*]] = and i1 [[BOUND024]], [[BOUND125]] -; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX23]], [[FOUND_CONFLICT26]] +; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX41]], [[FOUND_CONFLICT26]] ; CHECK-NEXT: [[BOUND028:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP3]] ; CHECK-NEXT: [[BOUND129:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]] ; CHECK-NEXT: [[FOUND_CONFLICT30:%.*]] = and i1 [[BOUND028]], [[BOUND129]] -; CHECK-NEXT: [[CONFLICT_RDX31:%.*]] = or i1 [[CONFLICT_RDX27]], [[FOUND_CONFLICT30]] +; CHECK-NEXT: [[CONFLICT_RDX65:%.*]] = or i1 [[CONFLICT_RDX27]], [[FOUND_CONFLICT30]] ; CHECK-NEXT: [[BOUND032:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP4]] ; CHECK-NEXT: [[BOUND133:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP2]] -; CHECK-NEXT: [[FOUND_CONFLICT34:%.*]] = and i1 [[BOUND032]], [[BOUND133]] -; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX31]], [[FOUND_CONFLICT34]] +; CHECK-NEXT: [[FOUND_CONFLICT68:%.*]] = and i1 [[BOUND032]], [[BOUND133]] +; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX65]], [[FOUND_CONFLICT68]] ; CHECK-NEXT: [[BOUND036:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP4]] ; CHECK-NEXT: [[BOUND137:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP3]] ; CHECK-NEXT: [[FOUND_CONFLICT38:%.*]] = and i1 [[BOUND036]], [[BOUND137]] ; CHECK-NEXT: [[CONFLICT_RDX39:%.*]] = or i1 [[CONFLICT_RDX35]], [[FOUND_CONFLICT38]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX39]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT40:%.*]] = insertelement <2 x i1> poison, i1 [[C_3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT41:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT40]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT56:%.*]] = insertelement <2 x i1> poison, i1 [[C_4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT57:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT56]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE74:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true) +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE55:.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7:![0-9]+]], !noalias [[META10:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] ; CHECK: [[PRED_STORE_IF42]]: ; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]] ; CHECK: [[PRED_STORE_CONTINUE43]]: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] ; CHECK: [[PRED_STORE_IF44]]: -; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]] ; CHECK: [[PRED_STORE_CONTINUE45]]: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 +; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] ; CHECK: [[PRED_STORE_IF46]]: -; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] ; CHECK: [[PRED_STORE_CONTINUE47]]: -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) -; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) -; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] -; CHECK: [[PRED_STORE_IF48]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] -; CHECK: [[PRED_STORE_CONTINUE49]]: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] -; CHECK: [[PRED_STORE_IF50]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] -; CHECK: [[PRED_STORE_CONTINUE51]]: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]] -; CHECK: [[PRED_STORE_IF52]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] -; CHECK: [[PRED_STORE_CONTINUE53]]: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55:.*]] -; CHECK: [[PRED_STORE_IF54]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]] -; CHECK: [[PRED_STORE_CONTINUE55]]: -; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = or <2 x i1> [[TMP47]], [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) ; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP22]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]] -; CHECK: [[PRED_STORE_IF59]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[PREDPHI]], i32 0 -; CHECK-NEXT: store i64 [[TMP25]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE60]] -; CHECK: [[PRED_STORE_CONTINUE60]]: -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP22]], i32 1 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF61:.*]], label %[[PRED_STORE_CONTINUE62:.*]] -; CHECK: [[PRED_STORE_IF61]]: -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i64> [[PREDPHI]], i32 1 -; CHECK-NEXT: store i64 [[TMP27]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE62]] -; CHECK: [[PRED_STORE_CONTINUE62]]: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF63:.*]], label %[[PRED_STORE_CONTINUE64:.*]] -; CHECK: [[PRED_STORE_IF63]]: +; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] +; CHECK: [[PRED_STORE_IF48]]: ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 0 -; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE64]] -; CHECK: [[PRED_STORE_CONTINUE64]]: +; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] +; CHECK: [[PRED_STORE_CONTINUE49]]: ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF65:.*]], label %[[PRED_STORE_CONTINUE66:.*]] -; CHECK: [[PRED_STORE_IF65]]: +; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] +; CHECK: [[PRED_STORE_IF50]]: ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 1 ; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE66]] -; CHECK: [[PRED_STORE_CONTINUE66]]: -; CHECK-NEXT: [[TMP32:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) -; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) -; CHECK-NEXT: [[TMP34:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[TMP32]], <2 x i1> zeroinitializer +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] +; CHECK: [[PRED_STORE_CONTINUE51]]: ; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP36:%.*]] = or <2 x i1> [[TMP22]], [[TMP34]] ; CHECK-NEXT: [[TMP37:%.*]] = or <2 x i1> [[TMP23]], [[TMP35]] -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0 -; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF67:.*]], label %[[PRED_STORE_CONTINUE68:.*]] -; CHECK: [[PRED_STORE_IF67]]: -; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] -; CHECK-NEXT: store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE68]] -; CHECK: [[PRED_STORE_CONTINUE68]]: -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF69:.*]], label %[[PRED_STORE_CONTINUE70:.*]] -; CHECK: [[PRED_STORE_IF69]]: -; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE70]] -; CHECK: [[PRED_STORE_CONTINUE70]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF71:.*]], label %[[PRED_STORE_CONTINUE72:.*]] -; CHECK: [[PRED_STORE_IF71]]: -; CHECK-NEXT: [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE72]] -; CHECK: [[PRED_STORE_CONTINUE72]]: +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]] +; CHECK: [[PRED_STORE_IF52]]: +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] +; CHECK: [[PRED_STORE_CONTINUE53]]: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1 -; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF73:.*]], label %[[PRED_STORE_CONTINUE74]] -; CHECK: [[PRED_STORE_IF73]]: -; CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE74]] -; CHECK: [[PRED_STORE_CONTINUE74]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55]] +; CHECK: [[PRED_STORE_IF54]]: +; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP25]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]] +; CHECK: [[PRED_STORE_CONTINUE55]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP46]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -348,6 +288,10 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[X_GEP:%.*]] = getelementptr i64, ptr [[X_PTR]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[X_GEP]], align 8 +; CHECK-NEXT: [[Y_GEP:%.*]] = getelementptr i32, ptr [[Y_PTR]], i64 [[IV]] +; CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[Y_GEP]], align 4 ; CHECK-NEXT: [[C1:%.*]] = icmp eq i64 [[X]], 0 ; CHECK-NEXT: br i1 [[C1]], label %[[THEN_4:.*]], label %[[THEN_1:.*]] ; CHECK: [[THEN_1]]: @@ -386,6 +330,10 @@ entry: loop.header: %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] + %x.gep = getelementptr i64, ptr %x.ptr, i64 %iv + %x = load i64, ptr %x.gep + %y.gep = getelementptr i32, ptr %y.ptr, i64 %iv + %y = load i32, ptr %y.gep %c1 = icmp eq i64 %x, 0 br i1 %c1, label %then.4, label %then.1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll index f9c1ab4a81810..3d00c228baf51 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll @@ -75,17 +75,17 @@ define i32 @any_of_reduction_used_in_blend_with_mutliple_phis(ptr %src, i64 %N, ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i1 [[C_0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i1 [[C_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP6]], [[TMP7]], zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, ptr [[SRC]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = xor [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP7:%.*]] = xor [[BROADCAST_SPLAT2]], splat (i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP6]], [[TMP7]], zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2p0.nxv2p0( [[BROADCAST_SPLAT4]], i32 8, [[TMP8]], poison) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq [[WIDE_MASKED_GATHER]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or [[VEC_PHI]], [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index a7765f47180d8..038e726adc24a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -432,6 +432,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP6]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -439,7 +440,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP6]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP9]], align 8 @@ -477,6 +477,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -486,10 +487,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) -; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP5]] -; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]] +; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]] ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8 @@ -560,6 +559,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP6]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -567,7 +567,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP6]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP9]], align 8 @@ -605,6 +604,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -614,10 +614,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) -; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP5]] -; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]] +; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]] ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll index 270e6bcd9ab1f..1a9e7ddb965f3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll @@ -14,72 +14,70 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT16]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE15:.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[PRED_SDIV_CONTINUE15]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_SDIV_CONTINUE15]] ] -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE17:.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[PRED_SDIV_CONTINUE17]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_SDIV_CONTINUE17]] ] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] ; CHECK: [[PRED_SDIV_IF]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] ; CHECK: [[PRED_SDIV_CONTINUE]]: ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF2:.*]], label %[[PRED_SDIV_CONTINUE3:.*]] -; CHECK: [[PRED_SDIV_IF2]]: -; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE3]] -; CHECK: [[PRED_SDIV_CONTINUE3]]: -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 -; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]] ; CHECK: [[PRED_SDIV_IF4]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE5]] ; CHECK: [[PRED_SDIV_CONTINUE5]]: -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SDIV_IF6:.*]], label %[[PRED_SDIV_CONTINUE7:.*]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF6:.*]], label %[[PRED_SDIV_CONTINUE7:.*]] ; CHECK: [[PRED_SDIV_IF6]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE7]] ; CHECK: [[PRED_SDIV_CONTINUE7]]: -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF8:.*]], label %[[PRED_SDIV_CONTINUE9:.*]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SDIV_IF8:.*]], label %[[PRED_SDIV_CONTINUE9:.*]] ; CHECK: [[PRED_SDIV_IF8]]: -; CHECK-NEXT: [[TMP7:%.*]] = sdiv i16 [[X]], [[Y]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0 ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE9]] ; CHECK: [[PRED_SDIV_CONTINUE9]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[PRED_SDIV_CONTINUE7]] ], [ [[TMP8]], %[[PRED_SDIV_IF8]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_SDIV_IF10:.*]], label %[[PRED_SDIV_CONTINUE11:.*]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF10:.*]], label %[[PRED_SDIV_CONTINUE11:.*]] ; CHECK: [[PRED_SDIV_IF10]]: -; CHECK-NEXT: [[TMP11:%.*]] = sdiv i16 [[X]], [[Y]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = sdiv i16 [[X]], [[Y]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0 ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE11]] ; CHECK: [[PRED_SDIV_CONTINUE11]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_SDIV_CONTINUE9]] ], [ [[TMP12]], %[[PRED_SDIV_IF10]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF12:.*]], label %[[PRED_SDIV_CONTINUE13:.*]] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[PRED_SDIV_CONTINUE9]] ], [ [[TMP8]], %[[PRED_SDIV_IF10]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_SDIV_IF12:.*]], label %[[PRED_SDIV_CONTINUE13:.*]] ; CHECK: [[PRED_SDIV_IF12]]: -; CHECK-NEXT: [[TMP15:%.*]] = sdiv i16 [[X]], [[Y]] -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP13]], i16 [[TMP15]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = sdiv i16 [[X]], [[Y]] +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP11]], i32 1 ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE13]] ; CHECK: [[PRED_SDIV_CONTINUE13]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i16> [ [[TMP13]], %[[PRED_SDIV_CONTINUE11]] ], [ [[TMP16]], %[[PRED_SDIV_IF12]] ] -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_SDIV_IF14:.*]], label %[[PRED_SDIV_CONTINUE15]] +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_SDIV_CONTINUE11]] ], [ [[TMP12]], %[[PRED_SDIV_IF12]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF14:.*]], label %[[PRED_SDIV_CONTINUE15:.*]] ; CHECK: [[PRED_SDIV_IF14]]: -; CHECK-NEXT: [[TMP19:%.*]] = sdiv i16 [[X]], [[Y]] -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP19]], i32 3 +; CHECK-NEXT: [[TMP15:%.*]] = sdiv i16 [[X]], [[Y]] +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP13]], i16 [[TMP15]], i32 2 ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE15]] ; CHECK: [[PRED_SDIV_CONTINUE15]]: -; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP17]], %[[PRED_SDIV_CONTINUE13]] ], [ [[TMP20]], %[[PRED_SDIV_IF14]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i16> [ [[TMP13]], %[[PRED_SDIV_CONTINUE13]] ], [ [[TMP16]], %[[PRED_SDIV_IF14]] ] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_SDIV_IF16:.*]], label %[[PRED_SDIV_CONTINUE17]] +; CHECK: [[PRED_SDIV_IF16]]: +; CHECK-NEXT: [[TMP19:%.*]] = sdiv i16 [[X]], [[Y]] +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP19]], i32 3 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE17]] +; CHECK: [[PRED_SDIV_CONTINUE17]]: +; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP17]], %[[PRED_SDIV_CONTINUE15]] ], [ [[TMP20]], %[[PRED_SDIV_IF16]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i16> zeroinitializer, <4 x i16> [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true) -; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true) ; CHECK-NEXT: [[TMP24]] = or <4 x i1> [[VEC_PHI]], [[TMP22]] -; CHECK-NEXT: [[TMP25]] = or <4 x i1> [[VEC_PHI1]], [[TMP23]] +; CHECK-NEXT: [[TMP25]] = or <4 x i1> [[VEC_PHI3]], [[TMP22]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index 8d56c3386a3b1..cfae26a3a4257 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -15,19 +15,18 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]] ; CHECK: [[PRED_UREM_IF]]: ; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]] ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]] ; CHECK: [[PRED_UREM_CONTINUE]]: ; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[REM]], %[[PRED_UREM_IF]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]] ; CHECK: [[PRED_UREM_IF1]]: ; CHECK-NEXT: [[TMP6:%.*]] = urem i64 [[MUL]], [[X]] @@ -48,7 +47,7 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK: [[PRED_UREM_CONTINUE6]]: ; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP9]], i64 0) -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: [[P:%.*]] = select i1 [[TMP14]], i64 [[TMP12]], i64 1 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[TMP15]], i64 [[TMP13]], i64 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index c14c34cade6b6..a0294f7ac7992 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -129,6 +129,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[TOBOOL6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[VECTOR_BODY29:%.*]] ; CHECK: vector.body29: ; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH30]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY29]] ] @@ -138,7 +139,6 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]] ; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP32]], i64 0 -; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[TMP34]]) ; CHECK-NEXT: [[TMP35:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1) ; CHECK-NEXT: [[TMP36:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP35]] @@ -173,16 +173,17 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[N_VEC53]], 2 ; CHECK-NEXT: [[IND_END54:%.*]] = add i64 8, [[TMP43]] ; CHECK-NEXT: [[IND_END57:%.*]] = mul i64 [[N_VEC53]], 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT50:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT50]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], splat (i1 true) ; CHECK-NEXT: [[DOTSPLATINSERT62:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL42]], i64 0 ; CHECK-NEXT: [[DOTSPLAT63:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT62]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION64:%.*]] = add <8 x i64> [[DOTSPLAT63]], ; CHECK-NEXT: [[DOTSPLATINSERT67:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL44]], i64 0 ; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], -; CHECK-NEXT: [[BROADCAST_SPLATINSERT72:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT72]], <8 x i1> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY50:%.*]] -; CHECK: vec.epilog.vector.body50: +; CHECK: vec.epilog.vector.body52: ; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] ; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] ; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] @@ -190,7 +191,6 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]] ; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP46]], i64 0 -; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], splat (i1 true) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[TMP48]]) ; CHECK-NEXT: [[TMP49:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1) ; CHECK-NEXT: [[TMP50:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP49]] diff --git a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll index 4c95584ff253a..2fea016218e61 100644 --- a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll +++ b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll @@ -171,11 +171,11 @@ define i64 @invar_cond_incoming_ops_reordered(i1 %c) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index c4509e4ad56e4..7db53d8ffcedf 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -172,6 +172,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2 ; UNROLL-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] ; UNROLL-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]] +; UNROLL-NEXT: [[TMP13:%.*]] = xor i1 [[COND_2:%.*]], true ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] @@ -184,7 +185,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP6]] ; UNROLL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 ; UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 -; UNROLL-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]] +; UNROLL-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL: pred.store.if: ; UNROLL-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 ; UNROLL-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 @@ -192,10 +193,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL: pred.store.continue3: ; UNROLL-NEXT: [[TMP11:%.*]] = add i32 [[VEC_PHI]], 1 ; UNROLL-NEXT: [[TMP12:%.*]] = add i32 [[VEC_PHI1]], 1 -; UNROLL-NEXT: [[TMP13:%.*]] = xor i1 [[COND_2]], true -; UNROLL-NEXT: [[TMP14:%.*]] = xor i1 [[COND_2]], true ; UNROLL-NEXT: [[PREDPHI]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]] -; UNROLL-NEXT: [[PREDPHI4]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]] +; UNROLL-NEXT: [[PREDPHI4]] = select i1 [[TMP13]], i32 [[VEC_PHI1]], i32 [[TMP12]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -244,6 +243,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL-NOSIMPLIFY-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; UNROLL-NOSIMPLIFY-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP12:%.*]] = xor i1 [[COND_2:%.*]], true ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NOSIMPLIFY: vector.body: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] @@ -256,7 +256,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP5]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -268,10 +268,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL-NOSIMPLIFY: pred.store.continue3: ; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = add i32 [[VEC_PHI]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP11:%.*]] = add i32 [[VEC_PHI1]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP12:%.*]] = xor i1 [[COND_2]], true -; UNROLL-NOSIMPLIFY-NEXT: [[TMP13:%.*]] = xor i1 [[COND_2]], true ; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 [[TMP12]], i32 [[VEC_PHI]], i32 [[TMP10]] -; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI4]] = select i1 [[TMP13]], i32 [[VEC_PHI1]], i32 [[TMP11]] +; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI4]] = select i1 [[TMP12]], i32 [[VEC_PHI1]], i32 [[TMP11]] ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -321,9 +319,10 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2 ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] ; VEC-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]] -; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0 ; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i64 0 ; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0 ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] @@ -351,7 +350,6 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.continue2: ; VEC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 1) -; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; VEC-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP17]], <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP16]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 2175eab9752cf..96311de673d8a 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -1962,6 +1962,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] @@ -1989,7 +1990,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP13]] ; CHECK-NEXT: [[TMP15]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -2030,6 +2030,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND: vector.ph: ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483646 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 +; IND-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], +; IND-NEXT: [[TMP12:%.*]] = shufflevector <2 x i1> [[TMP11]], <2 x i1> poison, <2 x i32> zeroinitializer ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] @@ -2054,8 +2056,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; IND: pred.udiv.continue2: ; IND-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP5]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], [[PRED_UDIV_IF1]] ] -; IND-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], -; IND-NEXT: [[TMP12:%.*]] = shufflevector <2 x i1> [[TMP11]], <2 x i1> poison, <2 x i32> zeroinitializer ; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP10]] ; IND-NEXT: [[TMP13]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -2097,7 +2097,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483644 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; UNROLL-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], +; UNROLL-NEXT: [[TMP28:%.*]] = shufflevector <2 x i1> [[TMP27]], <2 x i1> poison, <2 x i32> zeroinitializer ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ] @@ -2143,8 +2144,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL: pred.udiv.continue8: ; UNROLL-NEXT: [[TMP21:%.*]] = phi <2 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ] -; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP11]], <2 x i32> [[WIDE_LOAD]] -; UNROLL-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP21]], <2 x i32> [[WIDE_LOAD2]] +; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP28]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP11]] +; UNROLL-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP28]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP21]] ; UNROLL-NEXT: [[TMP22]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; UNROLL-NEXT: [[TMP23]] = add <2 x i32> [[PREDPHI9]], [[VEC_PHI1]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2189,6 +2190,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ] @@ -2239,10 +2241,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL-NO-IC: pred.udiv.continue8: ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = phi <2 x i32> [ [[TMP20]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP25]], [[PRED_UDIV_IF7]] ] -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]] -; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP28]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP26]] +; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP26]] ; UNROLL-NO-IC-NEXT: [[TMP29]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP30]] = add <2 x i32> [[PREDPHI9]], [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2284,7 +2284,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE: vector.ph: ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483640 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 -; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; INTERLEAVE-NEXT: [[TMP47:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT]], +; INTERLEAVE-NEXT: [[TMP48:%.*]] = shufflevector <4 x i1> [[TMP47]], <4 x i1> poison, <4 x i32> zeroinitializer ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE16:%.*]] ] @@ -2366,8 +2367,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE16]] ; INTERLEAVE: pred.udiv.continue16: ; INTERLEAVE-NEXT: [[TMP41:%.*]] = phi <4 x i32> [ [[TMP36]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP40]], [[PRED_UDIV_IF15]] ] -; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP21]], <4 x i32> [[WIDE_LOAD]] -; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP41]], <4 x i32> [[WIDE_LOAD2]] +; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP21]] +; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> [[TMP41]] ; INTERLEAVE-NEXT: [[TMP42]] = add <4 x i32> [[PREDPHI]], [[VEC_PHI]] ; INTERLEAVE-NEXT: [[TMP43]] = add <4 x i32> [[PREDPHI17]], [[VEC_PHI1]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll index bc1c1bf04a37f..e8ad6a38d742c 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll @@ -134,12 +134,12 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP]], i64 3 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 3 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT6]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index ed7762fbc2ba3..fe660a8269672 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -41,26 +41,26 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[START]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 -; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[TMP10]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 +; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: store i32 10, ptr [[B]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; CHECK: pred.store.if2: ; CHECK-NEXT: store i32 10, ptr [[B]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]] ; CHECK: pred.store.continue3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i32 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index 4f47e66816c9a..a129a4b1928c6 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -6,25 +6,25 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: bb: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT3]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT5]], splat (i32 1) +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[PREDPHI7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10) -; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> [[VEC_IND]] ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 9), <2 x i32> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 8a8439fca439d..ca971f15e4878 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -89,13 +89,13 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[PREDPHI]] ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index 301526cf3070c..550e52d318230 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -1006,11 +1006,11 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 ; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4IC1: [[VECTOR_BODY]]: ; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI]], [[TMP1]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1048,6 +1048,7 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 ; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4IC4: [[VECTOR_BODY]]: ; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1055,13 +1056,9 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) -; CHECK-VF4IC4-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP1]] -; CHECK-VF4IC4-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI1]], [[TMP2]] -; CHECK-VF4IC4-NEXT: [[TMP7]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]] +; CHECK-VF4IC4-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] +; CHECK-VF4IC4-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI1]], [[TMP4]] +; CHECK-VF4IC4-NEXT: [[TMP7]] = or <4 x i1> [[VEC_PHI2]], [[TMP4]] ; CHECK-VF4IC4-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI3]], [[TMP4]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1100,6 +1097,7 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = icmp eq i32 [[A]], 3 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = xor i1 [[TMP0]], true ; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF1IC4: [[VECTOR_BODY]]: ; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -1107,13 +1105,9 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true -; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = xor i1 [[TMP0]], true -; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = xor i1 [[TMP0]], true -; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = xor i1 [[TMP0]], true -; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP1]] -; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP2]] -; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP4]] +; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP4]] +; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP4]] ; CHECK-VF1IC4-NEXT: [[TMP8]] = or i1 [[VEC_PHI3]], [[TMP4]] ; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 7590bb9d68680..4ba9cc661313b 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -281,12 +281,12 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 276 ; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll index 31732f027f6dd..892ddccbc93b4 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll @@ -10,12 +10,12 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 72011ca7f4840..7f5e0f3a77ef7 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -133,11 +133,11 @@ define void @blend_chain_iv(i1 %c) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_IND]], <4 x i64> undef ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI]], <4 x i64> undef ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI1]], i32 0 diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 5bcc2b8eb2e22..1ac499fba4175 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1220,9 +1220,9 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { VPInstruction VPInst(Instruction::Add, {&Op1, &Op2}); VPRecipeBase &Recipe = VPInst; EXPECT_FALSE(Recipe.mayHaveSideEffects()); - EXPECT_TRUE(Recipe.mayReadFromMemory()); + EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); - EXPECT_TRUE(Recipe.mayReadOrWriteMemory()); + EXPECT_FALSE(Recipe.mayReadOrWriteMemory()); } { VPValue Op1;