Skip to content

Commit 3b9a0db

Browse files
[RISCV] Update SpacemiT-X60 vector load/stores (#169936)
This PR adds hardware-measured latencies/Occupancy for all RVV load/stores to the SpacemiT-X60 scheduling model.
1 parent 254b33f commit 3b9a0db

File tree

6 files changed

+2229
-2173
lines changed

6 files changed

+2229
-2173
lines changed

llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td

Lines changed: 84 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,33 @@ class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0
125125
defvar SMX60VLEN = 256;
126126
defvar SMX60DLEN = !div(SMX60VLEN, 2);
127127

128+
class SMX60GetLMulCycles<string mx> {
129+
int c = !cond(
130+
!eq(mx, "M1") : 1,
131+
!eq(mx, "M2") : 2,
132+
!eq(mx, "M4") : 4,
133+
!eq(mx, "M8") : 8,
134+
!eq(mx, "MF2") : 1,
135+
!eq(mx, "MF4") : 1,
136+
!eq(mx, "MF8") : 1
137+
);
138+
}
139+
140+
class SMX60GetVLMAX<string mx, int sew> {
141+
defvar LMUL = SMX60GetLMulCycles<mx>.c;
142+
int val = !cond(
143+
!eq(mx, "MF2") : !div(!div(SMX60VLEN, 2), sew),
144+
!eq(mx, "MF4") : !div(!div(SMX60VLEN, 4), sew),
145+
!eq(mx, "MF8") : !div(!div(SMX60VLEN, 8), sew),
146+
true: !div(!mul(SMX60VLEN, LMUL), sew)
147+
);
148+
}
149+
150+
// Latency for segmented loads and stores are calculated as vl * nf.
151+
class SMX60SegmentedLdStCycles<string mx, int sew, int nf> {
152+
int c = !mul(SMX60GetVLMAX<mx, sew>.val, nf);
153+
}
154+
128155
def SpacemitX60Model : SchedMachineModel {
129156
let IssueWidth = 2; // dual-issue
130157
let MicroOpBufferSize = 0; // in-order
@@ -367,23 +394,43 @@ foreach mx = SchedMxList in {
367394
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
368395

369396
// Unit-stride loads and stores
370-
defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>;
371-
defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>;
372-
defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>;
397+
defvar VLDELatAndOcc = ConstValueUntilLMULThenDoubleBase<"M2", 3, 4, mx>.c;
398+
let Latency = VLDELatAndOcc, ReleaseAtCycles = [VLDELatAndOcc] in {
399+
defm "" : LMULWriteResMX<"WriteVLDE", [SMX60_VLS], mx, IsWorstCase>;
400+
}
401+
defvar VSTELatAndOcc = GetLMULValue<[2, 2, 2, 3, 4, 8, 19], mx>.c;
402+
let Latency = VSTELatAndOcc, ReleaseAtCycles = [VSTELatAndOcc] in {
403+
defm "" : LMULWriteResMX<"WriteVSTE", [SMX60_VLS], mx, IsWorstCase>;
404+
}
405+
defvar VLDFFLatAndOcc = GetLMULValue<[4, 4, 4, 5, 7, 11, 19], mx>.c;
406+
let Latency = VLDFFLatAndOcc, ReleaseAtCycles = [VLDFFLatAndOcc] in {
407+
defm "" : LMULWriteResMX<"WriteVLDFF", [SMX60_VLS], mx, IsWorstCase>;
408+
}
373409

374410
// Mask loads and stores
375-
defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>;
376-
defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase=!eq(mx, "M1")>;
411+
let ReleaseAtCycles = [2] in {
412+
defm "" : LMULWriteResMX<"WriteVLDM", [SMX60_VLS], mx, IsWorstCase>;
413+
}
414+
let Latency = 2, ReleaseAtCycles = [2] in {
415+
defm "" : LMULWriteResMX<"WriteVSTM", [SMX60_VLS], mx, IsWorstCase>;
416+
}
377417

378418
// Strided and indexed loads and stores
379419
foreach eew = [8, 16, 32, 64] in {
380-
defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>;
381-
defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
382-
defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
420+
defvar StridedLdStLatAndOcc = SMX60GetVLMAX<mx, eew>.val;
421+
let Latency = StridedLdStLatAndOcc, ReleaseAtCycles = [StridedLdStLatAndOcc] in {
422+
defm "" : LMULWriteResMX<"WriteVLDS" # eew, [SMX60_VLS], mx, IsWorstCase>;
423+
defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>;
424+
}
425+
426+
defvar IndexedLdStLatAndOcc = !div(SMX60GetVLMAX<mx, eew>.val, 2);
427+
let Latency = IndexedLdStLatAndOcc, ReleaseAtCycles = [IndexedLdStLatAndOcc] in {
428+
defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
429+
defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
383430

384-
defm "" : LMULWriteResMX<"WriteVSTS" # eew, [SMX60_VLS], mx, IsWorstCase>;
385-
defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
386-
defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
431+
defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [SMX60_VLS], mx, IsWorstCase>;
432+
defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [SMX60_VLS], mx, IsWorstCase>;
433+
}
387434
}
388435
}
389436

@@ -393,30 +440,39 @@ foreach mx = SchedMxList in {
393440
foreach eew = [8, 16, 32, 64] in {
394441
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
395442

396-
// Unit-stride segmented
397-
defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
398-
defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
399-
defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
400-
401-
// Strided/indexed segmented
402-
defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
403-
defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
404-
405-
// Indexed segmented
406-
defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
407-
defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
408-
defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
409-
defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
443+
defvar SegmentedLdStLatAndOcc = SMX60SegmentedLdStCycles<mx, eew, nf>.c;
444+
let Latency = SegmentedLdStLatAndOcc, ReleaseAtCycles = [SegmentedLdStLatAndOcc] in {
445+
// Unit-stride segmented
446+
defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
447+
defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
448+
defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
449+
450+
// Strided/indexed segmented
451+
defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
452+
defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
453+
454+
// Indexed segmented
455+
defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
456+
defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
457+
defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
458+
defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SMX60_VLS], mx, IsWorstCase>;
459+
}
410460
}
411461
}
412462
}
413463

414464
// Whole register move/load/store
415465
foreach LMul = [1, 2, 4, 8] in {
416-
def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SMX60_VLS]>;
417-
def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SMX60_VLS]>;
466+
defvar WholeRegLdStLatAndOcc = !if(!eq(LMul, 1), 3, !mul(LMul, 2));
467+
let Latency = WholeRegLdStLatAndOcc, ReleaseAtCycles = [WholeRegLdStLatAndOcc] in {
468+
def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SMX60_VLS]>;
469+
def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SMX60_VLS]>;
470+
}
418471

419-
def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SMX60_VIEU]>;
472+
defvar VMovLatAndOcc = !if(!eq(LMul, 1), 4, !mul(LMul, 2));
473+
let Latency = VMovLatAndOcc, ReleaseAtCycles = [VMovLatAndOcc] in {
474+
def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SMX60_VIEU]>;
475+
}
420476
}
421477

422478
// 11. Vector Integer Arithmetic Instructions

0 commit comments

Comments
 (0)