Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,11 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);

// Select might be expanded to move and branch.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it also might not...

I think you need to consider availability of the various select variant scalar instructions here and adjust the cost appropriately. Consider zicond, and whichever of the scalar binmanip added min/max.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestions. :-) I added !hasStdExtZicond() to filter out zicond. For the scalar min/max, it seem the passes called getCmpSelInstrCost will see llvm.smax.i*. Assuming they won't change the cost?

if (TLI->InstructionOpcodeToISD(Opcode) == ISD::SELECT &&
!ValTy->isVectorTy())
return 2;

if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
Expand Down
55 changes: 55 additions & 0 deletions llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v -S | FileCheck %s

@s1 = dso_local local_unnamed_addr global [4 x double] zeroinitializer, align 32
@s2 = dso_local local_unnamed_addr global [4 x double] zeroinitializer, align 32
@s3 = dso_local local_unnamed_addr global [64 x double] zeroinitializer, align 32

define void @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr @s1, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @s2, align 8
; CHECK-NEXT: [[TMP2:%.*]] = fcmp fast ogt <4 x double> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> <double -1.000000e+00, double -1.000000e+00, double -1.000000e+00, double -1.000000e+00>, <4 x double> zeroinitializer
; CHECK-NEXT: store <4 x double> [[TMP3]], ptr @s3, align 8
; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ule <4 x double> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x double> <double -1.000000e+00, double -1.000000e+00, double -1.000000e+00, double -1.000000e+00>, <4 x double> zeroinitializer
; CHECK-NEXT: store <4 x double> [[TMP5]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
; CHECK-NEXT: ret void
;
entry:
%0 = load double, ptr @s1, align 8
%1 = load double, ptr @s2, align 8
%cmp3 = fcmp fast ogt double %0, %1
%cond = select fast i1 %cmp3, double -1.000000e+00, double 0.000000e+00
store double %cond, ptr @s3, align 8
%2 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 1), align 8
%3 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 1), align 8
%cmp3.1 = fcmp fast ogt double %2, %3
%cond.1 = select fast i1 %cmp3.1, double -1.000000e+00, double 0.000000e+00
store double %cond.1, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 1), align 8
%4 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 2), align 8
%5 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 2), align 8
%cmp3.2 = fcmp fast ogt double %4, %5
%cond.2 = select fast i1 %cmp3.2, double -1.000000e+00, double 0.000000e+00
store double %cond.2, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 2), align 8
%6 = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 3), align 8
%7 = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 3), align 8
%cmp3.3 = fcmp fast ogt double %6, %7
%cond.3 = select fast i1 %cmp3.3, double -1.000000e+00, double 0.000000e+00
store double %cond.3, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 3), align 8
%cmp15 = fcmp fast ule double %0, %1
%cond16 = select fast i1 %cmp15, double -1.000000e+00, double 0.000000e+00
store double %cond16, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
%cmp15.1 = fcmp fast ule double %2, %3
%cond16.1 = select fast i1 %cmp15.1, double -1.000000e+00, double 0.000000e+00
store double %cond16.1, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 5), align 8
%cmp15.2 = fcmp fast ule double %4, %5
%cond16.2 = select fast i1 %cmp15.2, double -1.000000e+00, double 0.000000e+00
store double %cond16.2, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 6), align 8
%cmp15.3 = fcmp fast ule double %6, %7
%cond16.3 = select fast i1 %cmp15.3, double -1.000000e+00, double 0.000000e+00
store double %cond16.3, ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 7), align 8
ret void
}