diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b2f677fb84f98..bd8680d4c1663 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10986,7 +10986,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // If the selects are the only uses of the compares, they will be // dead and we can adjust the cost by removing their cost. if (VI && SelectOnly) { - assert(!Ty->isVectorTy() && "Expected only for scalar type."); + assert((!Ty->isVectorTy() || SLPReVec) && + "Expected only for scalar type."); auto *CI = cast(VI->getOperand(0)); IntrinsicCost -= TTI->getCmpSelInstrCost( CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(), diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index 3d00ddf89aaa3..b312688b7932d 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -94,3 +94,43 @@ entry: %23 = fcmp ogt <8 x float> zeroinitializer, %19 ret void } + +define void @test3(float %0) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2) +; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; +entry: + br label %for.body.lr.ph + +for.body.lr.ph: + br i1 false, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %for.body.lr.ph + %1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ] + %2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ] + ret void + +for.body: + %3 = load <2 x float>, ptr null, align 4 + %4 = fcmp olt <2 x float> zeroinitializer, %3 + %5 = select <2 x i1> , <2 x float> %3, <2 x float> zeroinitializer + %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer + br label %for.cond.cleanup +}