Skip to content

Commit fb424f7

Browse files
committed
fixup! Update the latency of strided and indexed loads/stores
1 parent 43a5cbb commit fb424f7

File tree

6 files changed

+248
-222
lines changed

6 files changed

+248
-222
lines changed

llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@ class SiFiveP400GetVLMAX<string mx, int sew> {
4747
);
4848
}
4949

50+
class SiFiveP400StridedLdStLatency<string mx, int sew> {
51+
defvar VL = SiFiveP400GetVLMAX<mx, sew>.val;
52+
int val = !cond(
53+
!eq(VL, 2): 13,
54+
!eq(VL, 4): 18,
55+
!eq(VL, 8): 22,
56+
!eq(VL, 16): 30,
57+
// VL=32,64,128
58+
true: !sub(VL, 2)
59+
);
60+
}
61+
5062
// Latency for segmented loads and stores are calculated as vl * nf.
5163
class SiFiveP400SegmentedLdStCycles<string mx, int sew, int nf> {
5264
int c = !mul(SiFiveP400GetVLMAX<mx, sew>.val, nf);
@@ -391,7 +403,8 @@ foreach mx = SchedMxList in {
391403
}
392404
}
393405
foreach eew = [8, 16, 32, 64] in {
394-
let Latency = 13, ReleaseAtCycles = [SiFiveP400GetVLMAX<mx, eew>.val] in {
406+
let Latency = SiFiveP400StridedLdStLatency<mx, eew>.val,
407+
ReleaseAtCycles = [SiFiveP400GetVLMAX<mx, eew>.val] in {
395408
defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SiFiveP400VLD], mx, IsWorstCase>;
396409
defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SiFiveP400VLD], mx, IsWorstCase>;
397410
defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SiFiveP400VLD], mx, IsWorstCase>;

llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@ class SiFiveP600GetVLMAX<string mx, int sew> {
4747
);
4848
}
4949

50+
class SiFiveP600StridedLdStLatency<string mx, int sew> {
51+
defvar VL = SiFiveP400GetVLMAX<mx, sew>.val;
52+
int val = !cond(
53+
!eq(VL, 2): 13,
54+
!eq(VL, 4): 18,
55+
!eq(VL, 8): 22,
56+
!eq(VL, 16): 30,
57+
// VL=32,64,128
58+
true: !sub(VL, 2)
59+
);
60+
}
61+
5062
// Latency for segmented loads and stores are calculated as vl * nf.
5163
class SiFiveP600SegmentedLdStCycles<string mx, int sew, int nf> {
5264
int c = !mul(SiFiveP600GetVLMAX<mx, sew>.val, nf);
@@ -567,7 +579,8 @@ foreach mx = SchedMxList in {
567579
}
568580
}
569581
foreach eew = [8, 16, 32, 64] in {
570-
let Latency = 13, ReleaseAtCycles = [SiFiveP600GetVLMAX<mx, eew>.val] in {
582+
let Latency = SiFiveP600StridedLdStLatency<mx, eew>.val,
583+
ReleaseAtCycles = [SiFiveP600GetVLMAX<mx, eew>.val] in {
571584
defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SiFiveP600VLD], mx, IsWorstCase>;
572585
defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SiFiveP600VLD], mx, IsWorstCase>;
573586
defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SiFiveP600VLD], mx, IsWorstCase>;

llvm/test/tools/llvm-mca/RISCV/SiFiveP400/vlse-vsse.s

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ vsse64.v v8, (a0), t0
9999

100100
# CHECK: Iterations: 1
101101
# CHECK-NEXT: Instructions: 88
102-
# CHECK-NEXT: Total Cycles: 937
102+
# CHECK-NEXT: Total Cycles: 954
103103
# CHECK-NEXT: Total uOps: 88
104104

105105
# CHECK: Dispatch Width: 3
@@ -117,93 +117,93 @@ vsse64.v v8, (a0), t0
117117

118118
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
119119
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, ta, ma
120-
# CHECK-NEXT: 1 13 8.00 * vlse8.v v8, (a0), t0
120+
# CHECK-NEXT: 1 22 8.00 * vlse8.v v8, (a0), t0
121121
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, ta, ma
122-
# CHECK-NEXT: 1 13 4.00 * vlse8.v v8, (a0), t0
122+
# CHECK-NEXT: 1 18 4.00 * vlse8.v v8, (a0), t0
123123
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, ta, ma
124124
# CHECK-NEXT: 1 13 2.00 * vlse8.v v8, (a0), t0
125125
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, ta, ma
126-
# CHECK-NEXT: 1 13 16.00 * vlse8.v v8, (a0), t0
126+
# CHECK-NEXT: 1 30 16.00 * vlse8.v v8, (a0), t0
127127
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, ta, ma
128-
# CHECK-NEXT: 1 13 32.00 * vlse8.v v8, (a0), t0
128+
# CHECK-NEXT: 1 30 32.00 * vlse8.v v8, (a0), t0
129129
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, ta, ma
130-
# CHECK-NEXT: 1 13 64.00 * vlse8.v v8, (a0), t0
130+
# CHECK-NEXT: 1 62 64.00 * vlse8.v v8, (a0), t0
131131
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, ta, ma
132-
# CHECK-NEXT: 1 13 128.00 * vlse8.v v8, (a0), t0
132+
# CHECK-NEXT: 1 126 128.00 * vlse8.v v8, (a0), t0
133133
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, ta, ma
134-
# CHECK-NEXT: 1 13 4.00 * vlse16.v v8, (a0), t0
134+
# CHECK-NEXT: 1 18 4.00 * vlse16.v v8, (a0), t0
135135
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, ta, ma
136136
# CHECK-NEXT: 1 13 2.00 * vlse16.v v8, (a0), t0
137137
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, ta, ma
138-
# CHECK-NEXT: 1 13 8.00 * vlse16.v v8, (a0), t0
138+
# CHECK-NEXT: 1 22 8.00 * vlse16.v v8, (a0), t0
139139
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, ta, ma
140-
# CHECK-NEXT: 1 13 16.00 * vlse16.v v8, (a0), t0
140+
# CHECK-NEXT: 1 30 16.00 * vlse16.v v8, (a0), t0
141141
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, ta, ma
142-
# CHECK-NEXT: 1 13 32.00 * vlse16.v v8, (a0), t0
142+
# CHECK-NEXT: 1 30 32.00 * vlse16.v v8, (a0), t0
143143
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, ta, ma
144-
# CHECK-NEXT: 1 13 64.00 * vlse16.v v8, (a0), t0
144+
# CHECK-NEXT: 1 62 64.00 * vlse16.v v8, (a0), t0
145145
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, ta, ma
146146
# CHECK-NEXT: 1 13 2.00 * vlse32.v v8, (a0), t0
147147
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, ta, ma
148-
# CHECK-NEXT: 1 13 4.00 * vlse32.v v8, (a0), t0
148+
# CHECK-NEXT: 1 18 4.00 * vlse32.v v8, (a0), t0
149149
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, ta, ma
150-
# CHECK-NEXT: 1 13 8.00 * vlse32.v v8, (a0), t0
150+
# CHECK-NEXT: 1 22 8.00 * vlse32.v v8, (a0), t0
151151
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, ta, ma
152-
# CHECK-NEXT: 1 13 16.00 * vlse32.v v8, (a0), t0
152+
# CHECK-NEXT: 1 30 16.00 * vlse32.v v8, (a0), t0
153153
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, ta, ma
154-
# CHECK-NEXT: 1 13 32.00 * vlse32.v v8, (a0), t0
154+
# CHECK-NEXT: 1 30 32.00 * vlse32.v v8, (a0), t0
155155
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, ta, ma
156156
# CHECK-NEXT: 1 13 2.00 * vlse64.v v8, (a0), t0
157157
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, ta, ma
158-
# CHECK-NEXT: 1 13 4.00 * vlse64.v v8, (a0), t0
158+
# CHECK-NEXT: 1 18 4.00 * vlse64.v v8, (a0), t0
159159
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, ta, ma
160-
# CHECK-NEXT: 1 13 8.00 * vlse64.v v8, (a0), t0
160+
# CHECK-NEXT: 1 22 8.00 * vlse64.v v8, (a0), t0
161161
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, ta, ma
162-
# CHECK-NEXT: 1 13 16.00 * vlse64.v v8, (a0), t0
162+
# CHECK-NEXT: 1 30 16.00 * vlse64.v v8, (a0), t0
163163
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf2, ta, ma
164-
# CHECK-NEXT: 1 13 8.00 * vsse8.v v8, (a0), t0
164+
# CHECK-NEXT: 1 22 8.00 * vsse8.v v8, (a0), t0
165165
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf4, ta, ma
166-
# CHECK-NEXT: 1 13 4.00 * vsse8.v v8, (a0), t0
166+
# CHECK-NEXT: 1 18 4.00 * vsse8.v v8, (a0), t0
167167
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, mf8, ta, ma
168168
# CHECK-NEXT: 1 13 2.00 * vsse8.v v8, (a0), t0
169169
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m1, ta, ma
170-
# CHECK-NEXT: 1 13 16.00 * vsse8.v v8, (a0), t0
170+
# CHECK-NEXT: 1 30 16.00 * vsse8.v v8, (a0), t0
171171
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m2, ta, ma
172-
# CHECK-NEXT: 1 13 32.00 * vsse8.v v8, (a0), t0
172+
# CHECK-NEXT: 1 30 32.00 * vsse8.v v8, (a0), t0
173173
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m4, ta, ma
174-
# CHECK-NEXT: 1 13 64.00 * vsse8.v v8, (a0), t0
174+
# CHECK-NEXT: 1 62 64.00 * vsse8.v v8, (a0), t0
175175
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e8, m8, ta, ma
176-
# CHECK-NEXT: 1 13 128.00 * vsse8.v v8, (a0), t0
176+
# CHECK-NEXT: 1 126 128.00 * vsse8.v v8, (a0), t0
177177
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf2, ta, ma
178-
# CHECK-NEXT: 1 13 4.00 * vsse16.v v8, (a0), t0
178+
# CHECK-NEXT: 1 18 4.00 * vsse16.v v8, (a0), t0
179179
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, mf4, ta, ma
180180
# CHECK-NEXT: 1 13 2.00 * vsse16.v v8, (a0), t0
181181
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m1, ta, ma
182-
# CHECK-NEXT: 1 13 8.00 * vsse16.v v8, (a0), t0
182+
# CHECK-NEXT: 1 22 8.00 * vsse16.v v8, (a0), t0
183183
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m2, ta, ma
184-
# CHECK-NEXT: 1 13 16.00 * vsse16.v v8, (a0), t0
184+
# CHECK-NEXT: 1 30 16.00 * vsse16.v v8, (a0), t0
185185
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m4, ta, ma
186-
# CHECK-NEXT: 1 13 32.00 * vsse16.v v8, (a0), t0
186+
# CHECK-NEXT: 1 30 32.00 * vsse16.v v8, (a0), t0
187187
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e16, m8, ta, ma
188-
# CHECK-NEXT: 1 13 64.00 * vsse16.v v8, (a0), t0
188+
# CHECK-NEXT: 1 62 64.00 * vsse16.v v8, (a0), t0
189189
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, mf2, ta, ma
190190
# CHECK-NEXT: 1 13 2.00 * vsse32.v v8, (a0), t0
191191
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m1, ta, ma
192-
# CHECK-NEXT: 1 13 4.00 * vsse32.v v8, (a0), t0
192+
# CHECK-NEXT: 1 18 4.00 * vsse32.v v8, (a0), t0
193193
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m2, ta, ma
194-
# CHECK-NEXT: 1 13 8.00 * vsse32.v v8, (a0), t0
194+
# CHECK-NEXT: 1 22 8.00 * vsse32.v v8, (a0), t0
195195
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m4, ta, ma
196-
# CHECK-NEXT: 1 13 16.00 * vsse32.v v8, (a0), t0
196+
# CHECK-NEXT: 1 30 16.00 * vsse32.v v8, (a0), t0
197197
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e32, m8, ta, ma
198-
# CHECK-NEXT: 1 13 32.00 * vsse32.v v8, (a0), t0
198+
# CHECK-NEXT: 1 30 32.00 * vsse32.v v8, (a0), t0
199199
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m1, ta, ma
200200
# CHECK-NEXT: 1 13 2.00 * vsse64.v v8, (a0), t0
201201
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m2, ta, ma
202-
# CHECK-NEXT: 1 13 4.00 * vsse64.v v8, (a0), t0
202+
# CHECK-NEXT: 1 18 4.00 * vsse64.v v8, (a0), t0
203203
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m4, ta, ma
204-
# CHECK-NEXT: 1 13 8.00 * vsse64.v v8, (a0), t0
204+
# CHECK-NEXT: 1 22 8.00 * vsse64.v v8, (a0), t0
205205
# CHECK-NEXT: 1 1 1.00 U vsetvli zero, zero, e64, m8, ta, ma
206-
# CHECK-NEXT: 1 13 16.00 * vsse64.v v8, (a0), t0
206+
# CHECK-NEXT: 1 30 16.00 * vsse64.v v8, (a0), t0
207207

208208
# CHECK: Resources:
209209
# CHECK-NEXT: [0] - SiFiveP400Div

0 commit comments

Comments
 (0)