Skip to content

Commit 2ffc548

Browse files
Asher8118aahrun
authored andcommitted
[AArch64] Fix throughout of 64-bit SVE gather loads (llvm#168572)
In the Neoverse N3 Software Optimisation Guide, SVE non termporal gather load, vector+scalar 64-bit element size and gather load, vector + imm, 64-bit element size both show throughput of 4/5. However, it currently shows as 2/3. This patch adds a new resource group in order to show the correct throughput.
1 parent 30d0646 commit 2ffc548

File tree

2 files changed

+259
-247
lines changed

2 files changed

+259
-247
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ def N3UnitM : ProcResGroup<[N3UnitM0, N3UnitM1]>;
4949
def N3UnitL : ProcResGroup<[N3UnitL01, N3UnitL2]>;
5050
def N3UnitI : ProcResGroup<[N3UnitS, N3UnitM0, N3UnitM1]>;
5151

52+
// Group required for modelling SVE gather loads throughput
53+
def N3UnitVL : ProcResGroup<[N3UnitL01, N3UnitV0, N3UnitV1]>;
54+
// Unused group to fix: "error: proc resource group overlaps with N3UnitVL but
55+
// no supergroup contains both."
56+
def : ProcResGroup<[N3UnitL01, N3UnitL2, N3UnitV0, N3UnitV1]>;
57+
5258
//===----------------------------------------------------------------------===//
5359

5460
def : ReadAdvance<ReadI, 0>;
@@ -321,6 +327,12 @@ def N3Write_6c_2I_2L : SchedWriteRes<[N3UnitI, N3UnitI, N3UnitL, N3UnitL]> {
321327
let NumMicroOps = 4;
322328
}
323329

330+
def N3Write_6c_2L01_2V : SchedWriteRes<[N3UnitVL]> {
331+
let Latency = 6;
332+
let NumMicroOps = 4;
333+
let ReleaseAtCycles = [5];
334+
}
335+
324336
def N3Write_6c_4V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0]> {
325337
let Latency = 6;
326338
let NumMicroOps = 4;
@@ -2270,8 +2282,8 @@ def : InstRW<[N3Write_7c_4L], (instregex "^LDNT1[BHW]_ZZR_S$",
22702282
"^LDNT1S[BH]_ZZR_S$")>;
22712283

22722284
// Non temporal gather load, vector + scalar 64-bit element size
2273-
def : InstRW<[N3Write_6c_2L], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
2274-
def : InstRW<[N3Write_6c_2L], (instrs LDNT1D_ZZR_D)>;
2285+
def : InstRW<[N3Write_6c_2L01_2V], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
2286+
def : InstRW<[N3Write_6c_2L01_2V], (instrs LDNT1D_ZZR_D)>;
22752287

22762288
// Contiguous first faulting load, scalar + scalar
22772289
def : InstRW<[N3Write_6c_1L], (instregex "^LDFF1[BHWD]$",
@@ -2320,11 +2332,11 @@ def : InstRW<[N3Write_7c_4L], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
23202332
"^GLD(FF)?1W_IMM$")>;
23212333

23222334
// Gather load, vector + imm, 64-bit element size
2323-
def : InstRW<[N3Write_6c_2L], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
2335+
def : InstRW<[N3Write_6c_2L01_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
23242336
"^GLD(FF)?1D_IMM$")>;
23252337

23262338
// Gather load, 64-bit element size
2327-
def : InstRW<[N3Write_6c_2L],
2339+
def : InstRW<[N3Write_6c_2L01_2V],
23282340
(instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
23292341
"^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
23302342
"^GLD(FF)?1D_[SU]XTW(_SCALED)?$",

0 commit comments

Comments
 (0)