Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 63 additions & 20 deletions llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
Original file line number Diff line number Diff line change
Expand Up @@ -683,39 +683,82 @@ foreach mx = SchedMxList in {
}

// 16. Vector Permutation Instructions
// Slide
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;

defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
// Latency for slide up: 4/4/8/16, ReleaseAtCycles is 2/4/8/16
defvar VSlideUpLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
defvar VSlideUpOcc = ConstOneUntilMF2ThenDouble<mx>.c;
let Latency = VSlideUpLat, ReleaseAtCycles =[VSlideUpOcc] in {
defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
}

defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
// Latency for slide down: 4/5/9/17, ReleaseAtCycles is 3/5/9/17
defvar VSlideDownLat = GetLMULValue<[4, 4, 4, 4, 5, 9, 17], mx>.c;
defvar VSlideDownOcc = GetLMULValue<[1, 1, 1, 3, 5, 9, 17], mx>.c;
let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
}
// The following group slide up and down together, so we use the worst-case
// (slide down) for all.
let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;

defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
}
}

def : WriteRes<WriteVMovXS, [SMX60_VIEU]>;
def : WriteRes<WriteVMovSX, [SMX60_VIEU]>;

def : WriteRes<WriteVMovFS, [SMX60_VIEU]>;
def : WriteRes<WriteVMovSF, [SMX60_VIEU]>;
// ReleaseAtCycles is 2/2/2/2/2/3/6, but we can't set based on MX for now
// TODO: Split this into separate WriteRes for each MX
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it wasn't split by MX because vmv.x.s and friends should be independent of LMUL. I think Specmit X60 might be an outlier here and future hardware are probably more likely to have LMUL-independent latency & occupancy.

let Latency = 6, ReleaseAtCycles = [6] in {
def : WriteRes<WriteVMovXS, [SMX60_VIEU]>;
}

// Gather and Compress
foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCase>;
}
// ReleaseAtCycles is 1/1/1/1/1/2/4, but we can't set based on MX for now
// TODO: Split this into separate WriteRes for each MX
let Latency = 4, ReleaseAtCycles = [4] in {
def : WriteRes<WriteVMovSX, [SMX60_VIEU]>;
def : WriteRes<WriteVMovFS, [SMX60_VIEU]>;
def : WriteRes<WriteVMovSF, [SMX60_VIEU]>;
}

// Integer LMUL Gather and Compress
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;

defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
defvar VRGatherLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
let Latency = VRGatherLat, ReleaseAtCycles = [ConstOneUntilMF2ThenDouble<mx>.c] in {
defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
}

foreach sew = SchedSEWSet<mx>.val in {
defvar IsWorstCaseSEW = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;

defvar VRGatherVVLat = GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c;
defvar VRGatherVVOcc = GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c;
let Latency = VRGatherVVLat, ReleaseAtCycles = [VRGatherVVOcc] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
}
// For sew == 8, latency is half of the other cases, except for the fractional LMULs (const 4 cycles)
defvar VRGatherEI16Lat = !if(!eq(sew, 8),
GetLMULValue<[4, 4, 4, 8, 32, 128, 256], mx>.c,
GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c);
defvar VRGatherEI16Occ = !if(!eq(sew, 8),
GetLMULValue<[1, 1, 2, 8, 32, 128, 256], mx>.c,
GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c);
let Latency = VRGatherEI16Lat, ReleaseAtCycles = [VRGatherEI16Occ] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
}

defvar VCompressVLat = GetLMULValue<[4, 4, 4, 4, 10, 36, 136], mx>.c;
defvar VCompressVOcc = GetLMULValue<[1, 1, 1, 3, 10, 36, 136], mx>.c;
let Latency = VCompressVLat, ReleaseAtCycles = [VCompressVOcc] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
}
}
}

// Others
Expand Down
Loading