Skip to content

Commit 9a2658d

Browse files
committed
Adjust latency and remove --all-stats
1 parent 45cbe2d commit 9a2658d

File tree

4 files changed

+82
-211
lines changed

4 files changed

+82
-211
lines changed

llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,19 @@
1010
// We assume that:
1111
// * 6-issue out-of-order CPU with 192 ROB entries.
1212
// * Units:
13-
// * IXU (Integer GenericOOOALU Unit): 4 units, only one can execute division.
13+
// * IXU (Integer ALU Unit): 4 units, only one can execute division.
1414
// * FXU (Floating-point Unit): 2 units.
1515
// * LSU (Load/Store Unit): 2 units.
1616
// * VXU (Vector Unit): 1 unit.
1717
// * Latency:
1818
// * Integer instructions: 1 cycle.
1919
// * Multiplication instructions: 4 cycles.
2020
// * Multiplication/Division instructions: 7-13 cycles.
21-
// * Floating-point instructions: 4-6 cycles.
21+
// * Floating-point instructions: 2-6 cycles.
2222
// * Vector instructions: 2-6 cycles.
2323
// * Load/Store:
2424
// * IXU: 4 cycles.
25-
// * FXU: 6 cycles.
25+
// * FXU: 4 cycles.
2626
// * VXU: 6 cycles.
2727
// * Integer/floating-point/vector div/rem/sqrt/... are non-pipelined.
2828
//===----------------------------------------------------------------------===//
@@ -129,7 +129,7 @@ def : WriteRes<WriteAtomicSTD, [GenericOOOLSU]>;
129129
// Floating-point
130130
//===----------------------------------------------------------------------===//
131131
// Floating-point load
132-
let Latency = 6 in {
132+
let Latency = 4 in {
133133
def : WriteRes<WriteFLD32, [GenericOOOLSU]>;
134134
def : WriteRes<WriteFLD64, [GenericOOOLSU]>;
135135
}
@@ -139,12 +139,12 @@ def : WriteRes<WriteFST32, [GenericOOOLSU]>;
139139
def : WriteRes<WriteFST64, [GenericOOOLSU]>;
140140

141141
// Arithmetic and logic
142-
let Latency = 4 in {
142+
let Latency = 2 in {
143143
def : WriteRes<WriteFAdd32, [GenericOOOFPU]>;
144144
def : WriteRes<WriteFAdd64, [GenericOOOFPU]>;
145145
}
146146

147-
let Latency = 5 in {
147+
let Latency = 4 in {
148148
def : WriteRes<WriteFMul32, [GenericOOOFPU]>;
149149
def : WriteRes<WriteFMul64, [GenericOOOFPU]>;
150150
}
@@ -177,29 +177,29 @@ let Latency = 17, ReleaseAtCycles = [17] in {
177177
}
178178

179179
// Conversions
180-
let Latency = 4 in {
180+
let Latency = 2 in {
181181
def : WriteRes<WriteFCvtI32ToF32, [GenericOOOFPU]>;
182182
def : WriteRes<WriteFCvtI32ToF64, [GenericOOOFPU]>;
183183
def : WriteRes<WriteFCvtI64ToF32, [GenericOOOFPU]>;
184184
def : WriteRes<WriteFCvtI64ToF64, [GenericOOOFPU]>;
185185
}
186186

187-
let Latency = 4 in {
187+
let Latency = 2 in {
188188
def : WriteRes<WriteFCvtF32ToI32, [GenericOOOFPU]>;
189189
def : WriteRes<WriteFCvtF32ToI64, [GenericOOOFPU]>;
190190
}
191191

192-
let Latency = 4 in {
192+
let Latency = 2 in {
193193
def : WriteRes<WriteFCvtF64ToI32, [GenericOOOFPU]>;
194194
def : WriteRes<WriteFCvtF64ToI64, [GenericOOOFPU]>;
195195
}
196196

197-
let Latency = 4 in {
197+
let Latency = 2 in {
198198
def : WriteRes<WriteFCvtF64ToF32, [GenericOOOFPU]>;
199199
def : WriteRes<WriteFCvtF32ToF64, [GenericOOOFPU]>;
200200
}
201201

202-
let Latency = 6 in {
202+
let Latency = 2 in {
203203
def : WriteRes<WriteFMovI32ToF32, [GenericOOOFPU]>;
204204
def : WriteRes<WriteFMovI64ToF64, [GenericOOOFPU]>;
205205
def : WriteRes<WriteFMovF32ToI32, [GenericOOOFPU]>;
@@ -275,13 +275,13 @@ def : WriteRes<WriteXPERM, [GenericOOOALU]>;
275275
//===----------------------------------------------------------------------===//
276276
// Zfa extension
277277
//===----------------------------------------------------------------------===//
278-
let Latency = 3 in {
278+
let Latency = 2 in {
279279
def : WriteRes<WriteFRoundF16, [GenericOOOFPU]>;
280280
def : WriteRes<WriteFRoundF32, [GenericOOOFPU]>;
281281
def : WriteRes<WriteFRoundF64, [GenericOOOFPU]>;
282282
}
283283

284-
let Latency = 5 in {
284+
let Latency = 2 in {
285285
def : WriteRes<WriteFLI16, [GenericOOOFPU]>;
286286
def : WriteRes<WriteFLI32, [GenericOOOFPU]>;
287287
def : WriteRes<WriteFLI64, [GenericOOOFPU]>;
@@ -292,36 +292,36 @@ let Latency = 5 in {
292292
//===----------------------------------------------------------------------===//
293293
// Zfhmin
294294
// Load/Store
295-
let Latency = 6 in
295+
let Latency = 4 in
296296
def : WriteRes<WriteFLD16, [GenericOOOLSU]>;
297297
def : WriteRes<WriteFST16, [GenericOOOLSU]>;
298298

299299
// Conversions
300-
let Latency = 3 in {
300+
let Latency = 2 in {
301301
def : WriteRes<WriteFCvtF16ToF64, [GenericOOOFPU]>;
302302
def : WriteRes<WriteFCvtF64ToF16, [GenericOOOFPU]>;
303303
def : WriteRes<WriteFCvtF32ToF16, [GenericOOOFPU]>;
304304
def : WriteRes<WriteFCvtF16ToF32, [GenericOOOFPU]>;
305305
}
306306

307-
let Latency = 4 in {
307+
let Latency = 2 in {
308308
def : WriteRes<WriteFMovI16ToF16, [GenericOOOFPU]>;
309309
def : WriteRes<WriteFMovF16ToI16, [GenericOOOFPU]>;
310310
}
311311

312312
// Other than Zfhmin
313-
let Latency = 4 in {
313+
let Latency = 2 in {
314314
def : WriteRes<WriteFCvtI64ToF16, []>;
315315
def : WriteRes<WriteFCvtI32ToF16, []>;
316316
def : WriteRes<WriteFCvtF16ToI64, []>;
317317
def : WriteRes<WriteFCvtF16ToI32, []>;
318318
}
319319

320320
// Arithmetic and logic
321-
let Latency = 4 in
321+
let Latency = 2 in
322322
def : WriteRes<WriteFAdd16, [GenericOOOFPU]>;
323323

324-
let Latency = 5 in
324+
let Latency = 4 in
325325
def : WriteRes<WriteFMul16, [GenericOOOFPU]>;
326326

327327
let Latency = 6 in

llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2-
# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zabha -mcpu=generic-ooo --all-stats -iterations=1 < %s | FileCheck %s
2+
# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zabha -mcpu=generic-ooo -iterations=1 < %s | FileCheck %s
33

44
# Zalrsc
55
lr.w t0, (t1)
@@ -384,45 +384,6 @@ amomaxu.h.aqrl s5, s4, (s3)
384384
# CHECK-NEXT: 1 5 0.50 * * amominu.h.aqrl s6, s5, (s4)
385385
# CHECK-NEXT: 1 5 0.50 * * amomaxu.h.aqrl s5, s4, (s3)
386386

387-
# CHECK: Dynamic Dispatch Stall Cycles:
388-
# CHECK-NEXT: RAT - Register unavailable: 0
389-
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
390-
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
391-
# CHECK-NEXT: LQ - Load queue full: 0
392-
# CHECK-NEXT: SQ - Store queue full: 0
393-
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
394-
# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0
395-
396-
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
397-
# CHECK-NEXT: [# dispatched], [# cycles]
398-
# CHECK-NEXT: 0, 121 (81.8%)
399-
# CHECK-NEXT: 4, 1 (0.7%)
400-
# CHECK-NEXT: 6, 26 (17.6%)
401-
402-
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
403-
# CHECK-NEXT: [# issued], [# cycles]
404-
# CHECK-NEXT: 0, 58 (39.2%)
405-
# CHECK-NEXT: 1, 20 (13.5%)
406-
# CHECK-NEXT: 2, 70 (47.3%)
407-
408-
# CHECK: Scheduler's queue usage:
409-
# CHECK-NEXT: No scheduler resources used.
410-
411-
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
412-
# CHECK-NEXT: [# retired], [# cycles]
413-
# CHECK-NEXT: 0, 58 (39.2%)
414-
# CHECK-NEXT: 1, 22 (14.9%)
415-
# CHECK-NEXT: 2, 66 (44.6%)
416-
# CHECK-NEXT: 3, 2 (1.4%)
417-
418-
# CHECK: Total ROB Entries: 192
419-
# CHECK-NEXT: Max Used ROB Entries: 136 ( 70.8% )
420-
# CHECK-NEXT: Average Used ROB Entries per cy: 69 ( 35.9% )
421-
422-
# CHECK: Register File statistics:
423-
# CHECK-NEXT: Total number of mappings created: 160
424-
# CHECK-NEXT: Max number of mappings used: 136
425-
426387
# CHECK: Resources:
427388
# CHECK-NEXT: [0] - GenericOOODIV
428389
# CHECK-NEXT: [1.0] - GenericOOOFPU

0 commit comments

Comments
 (0)