Skip to content

Commit a2ca69b

Browse files
committed
[RISCV][CostModel] Change select cost to 2
Changing select cost to 2 to reflect that it might be expanded to move and branch. The benefit could be encouraging SLP to generate vector merge to reduce branch.
1 parent aae3eef commit a2ca69b

File tree

2 files changed

+13
-32
lines changed

2 files changed

+13
-32
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,6 +1401,11 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
14011401
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
14021402
I);
14031403

1404+
// Select might be expanded to move and branch.
1405+
if (TLI->InstructionOpcodeToISD(Opcode) == ISD::SELECT &&
1406+
!ValTy->isVectorTy())
1407+
return 2;
1408+
14041409
if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
14051410
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
14061411
I);

llvm/test/Transforms/LoopVectorize/RISCV/slp-select-cost-2.ll

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,38 +8,14 @@
88
define void @foo() {
99
; CHECK-LABEL: @foo(
1010
; CHECK-NEXT: entry:
11-
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @s1, align 8
12-
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @s2, align 8
13-
; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast ogt double [[TMP0]], [[TMP1]]
14-
; CHECK-NEXT: [[COND:%.*]] = select fast i1 [[CMP3]], double -1.000000e+00, double 0.000000e+00
15-
; CHECK-NEXT: store double [[COND]], ptr @s3, align 8
16-
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 1), align 8
17-
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 1), align 8
18-
; CHECK-NEXT: [[CMP3_1:%.*]] = fcmp fast ogt double [[TMP2]], [[TMP3]]
19-
; CHECK-NEXT: [[COND_1:%.*]] = select fast i1 [[CMP3_1]], double -1.000000e+00, double 0.000000e+00
20-
; CHECK-NEXT: store double [[COND_1]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 1), align 8
21-
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 2), align 8
22-
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 2), align 8
23-
; CHECK-NEXT: [[CMP3_2:%.*]] = fcmp fast ogt double [[TMP4]], [[TMP5]]
24-
; CHECK-NEXT: [[COND_2:%.*]] = select fast i1 [[CMP3_2]], double -1.000000e+00, double 0.000000e+00
25-
; CHECK-NEXT: store double [[COND_2]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 2), align 8
26-
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s1, i64 0, i64 3), align 8
27-
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds ([4 x double], ptr @s2, i64 0, i64 3), align 8
28-
; CHECK-NEXT: [[CMP3_3:%.*]] = fcmp fast ogt double [[TMP6]], [[TMP7]]
29-
; CHECK-NEXT: [[COND_3:%.*]] = select fast i1 [[CMP3_3]], double -1.000000e+00, double 0.000000e+00
30-
; CHECK-NEXT: store double [[COND_3]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 3), align 8
31-
; CHECK-NEXT: [[CMP15:%.*]] = fcmp fast ule double [[TMP0]], [[TMP1]]
32-
; CHECK-NEXT: [[COND16:%.*]] = select fast i1 [[CMP15]], double -1.000000e+00, double 0.000000e+00
33-
; CHECK-NEXT: store double [[COND16]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
34-
; CHECK-NEXT: [[CMP15_1:%.*]] = fcmp fast ule double [[TMP2]], [[TMP3]]
35-
; CHECK-NEXT: [[COND16_1:%.*]] = select fast i1 [[CMP15_1]], double -1.000000e+00, double 0.000000e+00
36-
; CHECK-NEXT: store double [[COND16_1]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 5), align 8
37-
; CHECK-NEXT: [[CMP15_2:%.*]] = fcmp fast ule double [[TMP4]], [[TMP5]]
38-
; CHECK-NEXT: [[COND16_2:%.*]] = select fast i1 [[CMP15_2]], double -1.000000e+00, double 0.000000e+00
39-
; CHECK-NEXT: store double [[COND16_2]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 6), align 8
40-
; CHECK-NEXT: [[CMP15_3:%.*]] = fcmp fast ule double [[TMP6]], [[TMP7]]
41-
; CHECK-NEXT: [[COND16_3:%.*]] = select fast i1 [[CMP15_3]], double -1.000000e+00, double 0.000000e+00
42-
; CHECK-NEXT: store double [[COND16_3]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 7), align 8
11+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr @s1, align 8
12+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr @s2, align 8
13+
; CHECK-NEXT: [[TMP2:%.*]] = fcmp fast ogt <4 x double> [[TMP0]], [[TMP1]]
14+
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> <double -1.000000e+00, double -1.000000e+00, double -1.000000e+00, double -1.000000e+00>, <4 x double> zeroinitializer
15+
; CHECK-NEXT: store <4 x double> [[TMP3]], ptr @s3, align 8
16+
; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ule <4 x double> [[TMP0]], [[TMP1]]
17+
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x double> <double -1.000000e+00, double -1.000000e+00, double -1.000000e+00, double -1.000000e+00>, <4 x double> zeroinitializer
18+
; CHECK-NEXT: store <4 x double> [[TMP5]], ptr getelementptr inbounds ([64 x double], ptr @s3, i64 0, i64 4), align 8
4319
; CHECK-NEXT: ret void
4420
;
4521
entry:

0 commit comments

Comments
 (0)