diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 3a2f2f39cd1c9..af02a9fc23891 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1401,6 +1401,12 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + // Select might be expanded to move and branch without Zicond. + if (TLI->InstructionOpcodeToISD(Opcode) == ISD::SELECT && + !ValTy->isVectorTy() && !ST->hasStdExtZicond() && + !ST->hasVendorXVentanaCondOps()) + return 2; + if (isa(ValTy) && !ST->useRVVForFixedLengthVectors()) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll b/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll new file mode 100644 index 0000000000000..e3bced6377c50 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v -S | FileCheck %s + +@s1 = dso_local local_unnamed_addr global [4 x double] zeroinitializer, align 32 +@s2 = dso_local local_unnamed_addr global [4 x double] zeroinitializer, align 32 +@s3 = dso_local local_unnamed_addr global [64 x double] zeroinitializer, align 32 + +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr @s1, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @s2, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = fcmp fast ogt <4 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> , <4 x double> zeroinitializer +; CHECK-NEXT: store <4 x double> [[TMP3]], ptr @s3, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ule <4 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x double> , <4 x double> zeroinitializer +; CHECK-NEXT: store <4 x double> [[TMP5]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8 +; CHECK-NEXT: ret void +; +entry: + %0 = load double, ptr @s1, align 8 + %1 = load double, ptr @s2, align 8 + %cmp3 = fcmp fast ogt double %0, %1 + %cond = select fast i1 %cmp3, double -1.000000e+00, double 0.000000e+00 + store double %cond, ptr @s3, align 8 + %2 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 1), align 8 + %3 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 1), align 8 + %cmp3.1 = fcmp fast ogt double %2, %3 + %cond.1 = select fast i1 %cmp3.1, double -1.000000e+00, double 0.000000e+00 + store double %cond.1, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 1), align 8 + %4 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 2), align 8 + %5 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 2), align 8 + %cmp3.2 = fcmp fast ogt double %4, %5 + %cond.2 = select fast i1 %cmp3.2, double -1.000000e+00, double 0.000000e+00 + store double %cond.2, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 2), align 8 + %6 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 3), align 8 + %7 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 3), align 8 + %cmp3.3 = fcmp fast ogt double %6, %7 + %cond.3 = select fast i1 %cmp3.3, double -1.000000e+00, double 0.000000e+00 + store double %cond.3, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 3), align 8 + %cmp15 = fcmp fast ule double %0, %1 + %cond16 = select fast i1 %cmp15, double -1.000000e+00, double 0.000000e+00 + store double %cond16, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8 + %cmp15.1 = fcmp fast ule double %2, %3 + %cond16.1 = select fast i1 %cmp15.1, double -1.000000e+00, double 0.000000e+00 + store double %cond16.1, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 5), align 8 + %cmp15.2 = fcmp fast ule double %4, %5 + %cond16.2 = select fast i1 %cmp15.2, double -1.000000e+00, double 0.000000e+00 + store double %cond16.2, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 6), align 8 + %cmp15.3 = fcmp fast ule double %6, %7 + %cond16.3 = select fast i1 %cmp15.3, double -1.000000e+00, double 0.000000e+00 + store double %cond16.3, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 7), align 8 + ret void +}