Skip to content
163 changes: 124 additions & 39 deletions llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,23 @@ class Get461018Latency<string mx> {
);
}

// Used for: FP FMA operations, complex FP ops
class Get6678Latency<string mx> {
int c = !cond(
!eq(mx, "M4") : 7,
!eq(mx, "M8") : 8,
true: 6
);
}

// Used for: e32 multiply pattern, some FP ops
class Get5558Latency<string mx> {
int c = !cond(
!eq(mx, "M8") : 8,
true: 5
);
}

// Used for: e64 multiply pattern, complex ops
class Get781632Latency<string mx> {
int c = !cond(
Expand Down Expand Up @@ -518,94 +535,162 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;

defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
}
}
let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SMX60_VFP], mx, sew, IsWorstCase>;
}

foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
let Latency = !if(!eq(sew, 64), Get5558Latency<mx>.c, Get458Latency<mx>.c), ReleaseAtCycles = [4] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
}

defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SMX60_VFP], mx, sew, IsWorstCase>;
let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
}

defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
// TODO: for some reason, the following cond is not working, and always use Get5558Latency
let Latency = !if(!eq(sew, 64), Get6678Latency<mx>.c, Get5558Latency<mx>.c), ReleaseAtCycles = [5] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
}
}
}

foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;

defm "" : LMULWriteResMX<"WriteVFCmpV", [SMX60_VFP], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCmpF", [SMX60_VFP], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFClassV", [SMX60_VFP], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMergeV", [SMX60_VFP], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMovV", [SMX60_VFP], mx, IsWorstCase>;
let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [4] in {
defm "" : LMULWriteResMX<"WriteVFCmpV", [SMX60_VFP], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCmpF", [SMX60_VFP], mx, IsWorstCase>;
}

defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
defm "" : LMULWriteResMX<"WriteVFClassV", [SMX60_VFP], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMergeV", [SMX60_VFP], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMovV", [SMX60_VFP], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
}
}

// Widening
foreach mx = SchedMxListW in {
foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;

defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
}
}
}

foreach mx = SchedMxListFW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListFW>.c;

defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
}
}

foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;

defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;

// Pattern for vfwsub/vfwadd.vv, vfwsub/vfwadd.vf: e16mf4=4, e16mf2=4, e16m1=4, e16m2=5,
// e16m4=8, e32mf2=4, e32m1=4, e32m2=5, e32m4=8
// Pattern for vfwsub/vfwadd.wv, vfwsub/vfwadd.wf: e16mf4=5, e16mf2=5, e16m1=5, e16m2=9,
// e16m4=17, e32mf2=5, e32m1=5, e32m2=9, e32m4=17
// TODO: Split .wf/.wv variants into separate scheduling classes to use 5/5/9/17
defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
}

// Pattern for vfwmul.vv, vfwmul.vf: e16 = 4/4/4/6/8. We use 4/4/5/8 as approximation
// TODO: e32m4 = 8, but it's set to 5 here
let Latency = !if(!eq(sew, 32), Get5558Latency<mx>.c, Get4588Latency<mx>.c), ReleaseAtCycles = [4] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
}

// Pattern for vfwmacc, vfwnmacc, etc: e16 = 5/5/5/8; e32 = 6/6/7/8
// Use existing 6,6,7,8 as close approximation
let Latency = Get6678Latency<mx>.c, ReleaseAtCycles = [6] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
}
}
}

// Narrowing
foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;

defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
}
}

foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {

defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;

let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
}
}
}

// Vector Floating-Point Division and Square Root
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
defvar LMulLat = Get1248Latency<mx>.c;

// Pattern for vfdiv.vf: e16 = 12/24/48/96; e32 = 12/24/48/96; e64 = 18/36/72/144
// Pattern for vfrdiv.vf: e16 = 12/24/48/96; e32 = 12/24/48/96; e64 = 40/80/160/320
// We use the worst-case, vfdiv.vf is penalized in e64
// TODO: split vfdiv.vf and vfrdiv.vf into separate scheduling classes
let Latency = !if(!eq(sew, 64), !mul(LMulLat, 40), !mul(LMulLat, 12)), ReleaseAtCycles = [7] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SMX60_VFP], mx, sew, IsWorstCase>;
}

// Compute latency based on SEW and LMUL combination
defvar SEWLatencyFactor = !cond(
!eq(sew, 16) : 12, // e16: 12*LMUL
!eq(sew, 32) : 38, // e32: 38*LMUL
!eq(sew, 64) : 40 // e64: 40*LMUL
);

let Latency = !mul(LMulLat, SEWLatencyFactor), ReleaseAtCycles = [7] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SMX60_VFP], mx, sew, IsWorstCase>;
}
}
}

// Pattern for vfsqrt.v: e16 = 18/36/72/144; e32 = 38/76/152/304; e64 = 40/80/160/320
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;

defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SMX60_VFP], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SMX60_VFP], mx, sew, IsWorstCase>;
// Compute latency based on SEW and LMUL combination
defvar SEWLatencyFactor = !cond(
!eq(sew, 16) : 18, // e16: 18*LMUL
!eq(sew, 32) : 38, // e32: 38*LMUL
!eq(sew, 64) : 40 // e64: 40*LMUL
);

let Latency = !mul(Get1248Latency<mx>.c, SEWLatencyFactor), ReleaseAtCycles = [18] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SMX60_VFP], mx, sew, IsWorstCase>;
}
}
}

Expand Down
Loading
Loading