Skip to content

Commit 18c2b1f

Browse files
Added Vector Integer Instruction latencies
Signed-off-by: Mikhail R. Gadelha <[email protected]>
1 parent c4d4e76 commit 18c2b1f

File tree

9 files changed

+4711
-4581
lines changed

9 files changed

+4711
-4581
lines changed

llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td

Lines changed: 178 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,87 @@ class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0
2424
bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
2525
}
2626

27+
defvar SMX60VLEN = 256;
28+
defvar SMX60DLEN = !div(SMX60VLEN, 2);
29+
30+
class Get1248Latency<string mx> {
31+
int c = !cond(
32+
!eq(mx, "M1") : 1,
33+
!eq(mx, "M2") : 2,
34+
!eq(mx, "M4") : 4,
35+
!eq(mx, "M8") : 8,
36+
!eq(mx, "MF2") : 1,
37+
!eq(mx, "MF4") : 1,
38+
!eq(mx, "MF8") : 1
39+
);
40+
}
41+
42+
// Used for: logical opsz, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides
43+
class Get44816Latency<string mx> {
44+
int c = !cond(
45+
!eq(mx, "M1") : 4,
46+
!eq(mx, "M2") : 4,
47+
!eq(mx, "M4") : 8,
48+
!eq(mx, "M8") : 16,
49+
!eq(mx, "MF2") : 4,
50+
!eq(mx, "MF4") : 4,
51+
!eq(mx, "MF8") : 4
52+
);
53+
}
54+
55+
// Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max
56+
class Get4458Latency<string mx> {
57+
int c = !cond(
58+
!eq(mx, "M1") : 4,
59+
!eq(mx, "M2") : 4,
60+
!eq(mx, "M4") : 5,
61+
!eq(mx, "M8") : 8,
62+
!eq(mx, "MF2") : 4,
63+
!eq(mx, "MF4") : 4,
64+
!eq(mx, "MF8") : 4
65+
);
66+
}
67+
68+
// Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs
69+
// Used for: widening operations
70+
class Get4588Latency<string mx> {
71+
int c = !cond(
72+
!eq(mx, "M1") : 4,
73+
!eq(mx, "M2") : 5,
74+
!eq(mx, "M4") : 8,
75+
!eq(mx, "M8") : 8, // M8 not supported for most widening, fallback
76+
!eq(mx, "MF2") : 4,
77+
!eq(mx, "MF4") : 4,
78+
!eq(mx, "MF8") : 4
79+
);
80+
}
81+
82+
// Used for: mask-producing comparisons, carry ops with mask, FP comparisons
83+
class Get461018Latency<string mx> {
84+
int c = !cond(
85+
!eq(mx, "M1") : 4,
86+
!eq(mx, "M2") : 6,
87+
!eq(mx, "M4") : 10,
88+
!eq(mx, "M8") : 18,
89+
!eq(mx, "MF2") : 4,
90+
!eq(mx, "MF4") : 4,
91+
!eq(mx, "MF8") : 4
92+
);
93+
}
94+
95+
// Used for: e64 multiply pattern, complex ops
96+
class Get781632Latency<string mx> {
97+
int c = !cond(
98+
!eq(mx, "M1") : 7,
99+
!eq(mx, "M2") : 8,
100+
!eq(mx, "M4") : 16,
101+
!eq(mx, "M8") : 32,
102+
!eq(mx, "MF2") : 7,
103+
!eq(mx, "MF4") : 7,
104+
!eq(mx, "MF8") : 7
105+
);
106+
}
107+
27108
def SpacemitX60Model : SchedMachineModel {
28109
let IssueWidth = 2; // dual-issue
29110
let MicroOpBufferSize = 0; // in-order
@@ -322,71 +403,120 @@ foreach LMul = [1, 2, 4, 8] in {
322403
foreach mx = SchedMxList in {
323404
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
324405

325-
defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>;
326-
defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>;
327-
defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>;
328-
defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>;
329-
defm "" : LMULWriteResMX<"WriteVICALUV", [SMX60_VIEU], mx, IsWorstCase>;
330-
defm "" : LMULWriteResMX<"WriteVICALUX", [SMX60_VIEU], mx, IsWorstCase>;
331-
defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>;
332-
defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>;
333-
defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>;
334-
defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>;
335-
defm "" : LMULWriteResMX<"WriteVICmpV", [SMX60_VIEU], mx, IsWorstCase>;
336-
defm "" : LMULWriteResMX<"WriteVICmpX", [SMX60_VIEU], mx, IsWorstCase>;
337-
defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>;
338-
defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
339-
defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
340-
defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>;
341-
defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>;
342-
defm "" : LMULWriteResMX<"WriteVIMergeI", [SMX60_VIEU], mx, IsWorstCase>;
343-
defm "" : LMULWriteResMX<"WriteVIMovV", [SMX60_VIEU], mx, IsWorstCase>;
344-
defm "" : LMULWriteResMX<"WriteVIMovX", [SMX60_VIEU], mx, IsWorstCase>;
345-
defm "" : LMULWriteResMX<"WriteVIMovI", [SMX60_VIEU], mx, IsWorstCase>;
346-
347-
defm "" : LMULWriteResMX<"WriteVShiftV", [SMX60_VIEU], mx, IsWorstCase>;
348-
defm "" : LMULWriteResMX<"WriteVShiftX", [SMX60_VIEU], mx, IsWorstCase>;
349-
defm "" : LMULWriteResMX<"WriteVShiftI", [SMX60_VIEU], mx, IsWorstCase>;
350-
351-
defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
352-
defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
353-
defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
354-
defm "" : LMULWriteResMX<"WriteVIMulAddX", [SMX60_VIEU], mx, IsWorstCase>;
406+
let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [Get1248Latency<mx>.c] in {
407+
defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
408+
defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
409+
}
410+
411+
let Latency = Get44816Latency<mx>.c, ReleaseAtCycles = [Get44816Latency<mx>.c] in {
412+
// Pattern of vadd, vsub, vrsub: 4/4/5/8
413+
// Pattern of vand, vor, vxor: 4/4/8/16
414+
// They are grouped together, so we used the worst case 4/4/5/16
415+
// TODO: use InstRW to override individual instructions' scheduling data
416+
defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>;
417+
defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>;
418+
defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>;
419+
420+
defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>;
421+
defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>;
422+
defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>;
423+
defm "" : LMULWriteResMX<"WriteVIMergeI", [SMX60_VIEU], mx, IsWorstCase>;
424+
defm "" : LMULWriteResMX<"WriteVIMovV", [SMX60_VIEU], mx, IsWorstCase>;
425+
defm "" : LMULWriteResMX<"WriteVIMovX", [SMX60_VIEU], mx, IsWorstCase>;
426+
defm "" : LMULWriteResMX<"WriteVIMovI", [SMX60_VIEU], mx, IsWorstCase>;
427+
defm "" : LMULWriteResMX<"WriteVShiftV", [SMX60_VIEU], mx, IsWorstCase>;
428+
defm "" : LMULWriteResMX<"WriteVShiftX", [SMX60_VIEU], mx, IsWorstCase>;
429+
defm "" : LMULWriteResMX<"WriteVShiftI", [SMX60_VIEU], mx, IsWorstCase>;
430+
431+
defm "" : LMULWriteResMX<"WriteVICALUV", [SMX60_VIEU], mx, IsWorstCase>;
432+
defm "" : LMULWriteResMX<"WriteVICALUX", [SMX60_VIEU], mx, IsWorstCase>;
433+
defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>;
434+
}
435+
436+
let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [Get461018Latency<mx>.c] in {
437+
defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>;
438+
defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>;
439+
defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>;
440+
defm "" : LMULWriteResMX<"WriteVICmpV", [SMX60_VIEU], mx, IsWorstCase>;
441+
defm "" : LMULWriteResMX<"WriteVICmpX", [SMX60_VIEU], mx, IsWorstCase>;
442+
defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>;
443+
}
444+
445+
// Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
446+
// e64 = 7,8,16,32. We use the worst-case until we can split the SEW.
447+
// TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites
448+
let Latency = Get781632Latency<mx>.c in {
449+
defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
450+
defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
451+
defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
452+
defm "" : LMULWriteResMX<"WriteVIMulAddX", [SMX60_VIEU], mx, IsWorstCase>;
453+
}
355454
}
356455

357456
// Widening
457+
// Pattern of vwmul, vwmacc, etc: e8/e16 = 4/4/5/8, e32 = 5,5,5,8
458+
// We use the worst-case for all.
358459
foreach mx = SchedMxListW in {
359460
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
360461

361-
defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>;
362-
defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>;
363-
defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>;
364-
defm "" : LMULWriteResMX<"WriteVIWMulV", [SMX60_VIEU], mx, IsWorstCase>;
365-
defm "" : LMULWriteResMX<"WriteVIWMulX", [SMX60_VIEU], mx, IsWorstCase>;
366-
defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
367-
defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SMX60_VIEU], mx, IsWorstCase>;
462+
defvar WideningLat = Get4588Latency<mx>.c;
463+
let Latency = WideningLat, ReleaseAtCycles = [Get1248Latency<mx>.c] in {
464+
defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>;
465+
defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>;
466+
defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>;
467+
}
468+
let Latency = WideningLat in {
469+
defm "" : LMULWriteResMX<"WriteVIWMulV", [SMX60_VIEU], mx, IsWorstCase>;
470+
defm "" : LMULWriteResMX<"WriteVIWMulX", [SMX60_VIEU], mx, IsWorstCase>;
471+
defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
472+
defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SMX60_VIEU], mx, IsWorstCase>;
473+
}
368474
}
369475

370-
// Vector Integer Division and Remainder
476+
// Division and remainder operations
477+
// Pattern of vdivu: 11/11/11/20/40/80/160
478+
// Pattern of vdiv: 12/12/12/22/44/88/176
479+
// Pattern of vremu: 12/12/12/22/44/88/176
480+
// Pattern of vrem: 13/13/13/24/48/96/192
481+
// We use the worst-case for all: 24/24/24/24/48/96/192
482+
// TODO: Create separate WriteVIRem to more closely match the latencies
371483
foreach mx = SchedMxList in {
372484
foreach sew = SchedSEWSet<mx>.val in {
373485
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
374486

375-
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
376-
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
487+
let Latency = !mul(Get1248Latency<mx>.c, 24) in {
488+
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
489+
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
490+
}
377491
}
378492
}
379493

380-
// Narrowing Shift and Clips
381-
foreach mx = SchedMxListW in {
494+
// Fractional LMUL Narrowing Shift and Clips
495+
foreach mx = ["MF8", "MF4", "MF2", "M1"] in {
382496
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
383497

384-
defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
385-
defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
386-
defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
387-
defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>;
388-
defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>;
389-
defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>;
498+
let Latency = Get44816Latency<mx>.c in {
499+
defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
500+
defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
501+
defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
502+
defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>;
503+
defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>;
504+
defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>;
505+
}
506+
}
507+
508+
// Integer LMUL Narrowing Shift and Clips
509+
foreach mx = ["M2", "M4"] in {
510+
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
511+
512+
let Latency = !mul(Get44816Latency<mx>.c, 2) in {
513+
defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
514+
defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
515+
defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
516+
defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>;
517+
defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>;
518+
defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>;
519+
}
390520
}
391521

392522
// 12. Vector Fixed-Point Arithmetic Instructions

0 commit comments

Comments
 (0)