Skip to content

Commit e546d0f

Browse files
authored
[RISCV][TTI] Add cost model for ROTL/ROTR (#170824)
A funnel shift with the same first two operands is a rotate. When `Zbb/Zbkb` is enabled we can use the `ROL(W)/ROR(I)(W)` instruction to represent this. Add cost model support for this. Similar to #169335 for AArch64.
1 parent b1ee4d0 commit e546d0f

File tree

2 files changed

+198
-0
lines changed

2 files changed

+198
-0
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,6 +1559,23 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
15591559
}
15601560
break;
15611561
}
1562+
case Intrinsic::fshl:
1563+
case Intrinsic::fshr: {
1564+
if (ICA.getArgs().empty())
1565+
break;
1566+
1567+
// Funnel-shifts are ROTL/ROTR when the first and second operand are equal.
1568+
// When Zbb/Zbkb is enabled we can use a single ROL(W)/ROR(I)(W)
1569+
// instruction.
1570+
if ((ST->hasStdExtZbb() || ST->hasStdExtZbkb()) && RetTy->isIntegerTy() &&
1571+
ICA.getArgs()[0] == ICA.getArgs()[1] &&
1572+
(RetTy->getIntegerBitWidth() == 32 ||
1573+
RetTy->getIntegerBitWidth() == 64) &&
1574+
RetTy->getIntegerBitWidth() <= ST->getXLen()) {
1575+
return 1;
1576+
}
1577+
break;
1578+
}
15621579
case Intrinsic::get_active_lane_mask: {
15631580
if (ST->hasVInstructions()) {
15641581
Type *ExpRetTy = VectorType::get(
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=riscv32 < %s | FileCheck %s --check-prefix=RV32
3+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=riscv64 < %s | FileCheck %s --check-prefix=RV64
4+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=riscv32 -mattr=+zbb < %s | FileCheck %s --check-prefix=RV32ZBB
5+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=riscv64 -mattr=+zbb < %s | FileCheck %s --check-prefix=RV64ZBB
6+
7+
define i32 @rotl_i32_3rd_arg_const(i32 %a) {
8+
; RV32-LABEL: 'rotl_i32_3rd_arg_const'
9+
; RV32-NEXT: Cost Model: Found costs of 4 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9)
10+
; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
11+
;
12+
; RV64-LABEL: 'rotl_i32_3rd_arg_const'
13+
; RV64-NEXT: Cost Model: Found costs of 4 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9)
14+
; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
15+
;
16+
; RV32ZBB-LABEL: 'rotl_i32_3rd_arg_const'
17+
; RV32ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9)
18+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
19+
;
20+
; RV64ZBB-LABEL: 'rotl_i32_3rd_arg_const'
21+
; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9)
22+
; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
23+
;
24+
entry:
25+
%r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9)
26+
ret i32 %r
27+
}
28+
29+
define i32 @rotl_i32_3rd_arg_var(i32 %a, i32 %c) {
30+
; RV32-LABEL: 'rotl_i32_3rd_arg_var'
31+
; RV32-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c)
32+
; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
33+
;
34+
; RV64-LABEL: 'rotl_i32_3rd_arg_var'
35+
; RV64-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c)
36+
; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
37+
;
38+
; RV32ZBB-LABEL: 'rotl_i32_3rd_arg_var'
39+
; RV32ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c)
40+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
41+
;
42+
; RV64ZBB-LABEL: 'rotl_i32_3rd_arg_var'
43+
; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c)
44+
; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
45+
;
46+
entry:
47+
%r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c)
48+
ret i32 %r
49+
}
50+
51+
define i64 @rotl_i64_3rd_arg_const(i64 %a) {
52+
; RV32-LABEL: 'rotl_i64_3rd_arg_const'
53+
; RV32-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9)
54+
; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
55+
;
56+
; RV64-LABEL: 'rotl_i64_3rd_arg_const'
57+
; RV64-NEXT: Cost Model: Found costs of 4 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9)
58+
; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
59+
;
60+
; RV32ZBB-LABEL: 'rotl_i64_3rd_arg_const'
61+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9)
62+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
63+
;
64+
; RV64ZBB-LABEL: 'rotl_i64_3rd_arg_const'
65+
; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9)
66+
; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
67+
;
68+
entry:
69+
%r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9)
70+
ret i64 %r
71+
}
72+
73+
define i64 @rotl_i64_3rd_arg_var(i64 %a, i64 %c) {
74+
; RV32-LABEL: 'rotl_i64_3rd_arg_var'
75+
; RV32-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c)
76+
; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
77+
;
78+
; RV64-LABEL: 'rotl_i64_3rd_arg_var'
79+
; RV64-NEXT: Cost Model: Found costs of 5 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c)
80+
; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
81+
;
82+
; RV32ZBB-LABEL: 'rotl_i64_3rd_arg_var'
83+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c)
84+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
85+
;
86+
; RV64ZBB-LABEL: 'rotl_i64_3rd_arg_var'
87+
; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c)
88+
; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
89+
;
90+
entry:
91+
%r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c)
92+
ret i64 %r
93+
}
94+
95+
define i32 @rotr_i32_3rd_arg_const(i32 %a) {
96+
; RV32-LABEL: 'rotr_i32_3rd_arg_const'
97+
; RV32-NEXT: Cost Model: Found costs of 4 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9)
98+
; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
99+
;
100+
; RV64-LABEL: 'rotr_i32_3rd_arg_const'
101+
; RV64-NEXT: Cost Model: Found costs of 4 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9)
102+
; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
103+
;
104+
; RV32ZBB-LABEL: 'rotr_i32_3rd_arg_const'
105+
; RV32ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9)
106+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
107+
;
108+
; RV64ZBB-LABEL: 'rotr_i32_3rd_arg_const'
109+
; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9)
110+
; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
111+
;
112+
entry:
113+
%r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9)
114+
ret i32 %r
115+
}
116+
117+
define i32 @rotr_i32_3rd_arg_var(i32 %a, i32 %c) {
118+
; RV32-LABEL: 'rotr_i32_3rd_arg_var'
119+
; RV32-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c)
120+
; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
121+
;
122+
; RV64-LABEL: 'rotr_i32_3rd_arg_var'
123+
; RV64-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c)
124+
; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
125+
;
126+
; RV32ZBB-LABEL: 'rotr_i32_3rd_arg_var'
127+
; RV32ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c)
128+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
129+
;
130+
; RV64ZBB-LABEL: 'rotr_i32_3rd_arg_var'
131+
; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c)
132+
; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
133+
;
134+
entry:
135+
%r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c)
136+
ret i32 %r
137+
}
138+
139+
define i64 @rotr_i64_3rd_arg_const(i64 %a) {
140+
; RV32-LABEL: 'rotr_i64_3rd_arg_const'
141+
; RV32-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9)
142+
; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
143+
;
144+
; RV64-LABEL: 'rotr_i64_3rd_arg_const'
145+
; RV64-NEXT: Cost Model: Found costs of 4 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9)
146+
; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
147+
;
148+
; RV32ZBB-LABEL: 'rotr_i64_3rd_arg_const'
149+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9)
150+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
151+
;
152+
; RV64ZBB-LABEL: 'rotr_i64_3rd_arg_const'
153+
; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9)
154+
; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
155+
;
156+
entry:
157+
%r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9)
158+
ret i64 %r
159+
}
160+
161+
define i64 @rotr_i64_3rd_arg_var(i64 %a, i64 %c) {
162+
; RV32-LABEL: 'rotr_i64_3rd_arg_var'
163+
; RV32-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c)
164+
; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
165+
;
166+
; RV64-LABEL: 'rotr_i64_3rd_arg_var'
167+
; RV64-NEXT: Cost Model: Found costs of 5 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c)
168+
; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
169+
;
170+
; RV32ZBB-LABEL: 'rotr_i64_3rd_arg_var'
171+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c)
172+
; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
173+
;
174+
; RV64ZBB-LABEL: 'rotr_i64_3rd_arg_var'
175+
; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c)
176+
; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
177+
;
178+
entry:
179+
%r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c)
180+
ret i64 %r
181+
}

0 commit comments

Comments
 (0)