Skip to content

Commit e0c6007

Browse files
authored
[AArch64] Fix metrics of ASIMD instructions in Neoverse N3 (#169790)
Some ASIMD instructions in the Neoverse N3 scheduler model seem to have been missed and have default definitions, which give them incorrect latency and throughput. This patch fixes such instructions to match the current N3 SWOG.
1 parent 14fb59a commit e0c6007

File tree

2 files changed

+190
-190
lines changed

2 files changed

+190
-190
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,7 @@ def : SchedAlias<WriteVq, N3Write_2c_1V>;
10971097
// ASIMD shift accumulate
10981098
def : InstRW<[N3Wr_ADA, N3Rd_ADA], (instregex "^[SU]ABAL?v",
10991099
"^[SU]ADALPv",
1100-
"^[SU]R?SRAv")>;
1100+
"^[SU]R?SRA(v|d)")>;
11011101

11021102
// ASIMD arith, reduce, 4H/4S
11031103
def : InstRW<[N3Write_3c_1V1], (instregex "^[SU]?ADDL?Vv4i(16|32)v$")>;
@@ -1138,30 +1138,30 @@ def : InstRW<[N3Wr_VMAH, N3Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
11381138
def : InstRW<[N3Wr_VMAL, N3Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
11391139

11401140
// ASIMD multiply accumulate saturating long
1141-
def : InstRW<[N3Wr_VMASL, N3Rd_VMASL], (instregex "^SQDMLALv", "^SQDMLSLv")>;
1141+
def : InstRW<[N3Wr_VMASL, N3Rd_VMASL], (instregex "^SQDMLAL(v|i16|i32)", "^SQDMLSL(v|i16|i32)")>;
11421142

11431143
// ASIMD multiply/multiply long (8x8) polynomial, D-form
11441144
// ASIMD multiply/multiply long (8x8) polynomial, Q-form
11451145
def : InstRW<[N3Write_2c_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
11461146

11471147
// ASIMD multiply long
1148-
def : InstRW<[N3Write_4c_1V0], (instregex "^[SU]MULLv", "^SQDMULLv")>;
1148+
def : InstRW<[N3Write_4c_1V0], (instregex "^[SU]MULLv", "^SQDMULL(v|i16|i32)")>;
11491149

11501150
// ASIMD shift by immed, basic
1151-
def : InstRW<[N3Write_2c_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
1152-
"^SSHLLv", "^SSHRv", "^USHLLv",
1153-
"^USHRv")>;
1151+
def : InstRW<[N3Write_2c_1V1], (instregex "^SHL(v|d)", "^SHLLv", "^SHRNv",
1152+
"^SSHLLv", "^SSHR(v|d)", "^USHLLv",
1153+
"^USHR(v|d)")>;
11541154

11551155
// ASIMD shift by immed and insert, basic
1156-
def : InstRW<[N3Write_2c_1V1], (instregex "^SLIv", "^SRIv")>;
1156+
def : InstRW<[N3Write_2c_1V1], (instregex "^SLI(v|d)", "^SRI(v|d)")>;
11571157

11581158
// ASIMD shift by immed, complex
11591159
def : InstRW<[N3Write_4c_1V1],
1160-
(instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
1160+
(instregex "^RSHRNv", "^SQRSHRN[vbhs]", "^SQRSHRUN[vbhs]",
11611161
"^(SQSHLU?|UQSHL)[bhsd]$",
11621162
"^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1163-
"^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
1164-
"^UQSHRNv", "^URSHRv")>;
1163+
"^SQSHRN[vbhs]", "^SQSHRUN[vbhs]", "^SRSHR(v|d)",
1164+
"^UQRSHRN[vbhs]", "^UQSHRN[vbhs]","^URSHR(v|d)")>;
11651165

11661166
// ASIMD shift by register, basic
11671167
def : InstRW<[N3Write_2c_1V1], (instregex "^[SU]SHLv")>;
@@ -1197,16 +1197,16 @@ def : InstRW<[N3Write_3c_1V0], (instregex "^FCVTL(v2|v4)i32")>;
11971197
def : InstRW<[N3Write_4c_2V0], (instregex "^FCVTN(v4|v8)i16")>;
11981198

11991199
// ASIMD FP convert, narrow (F64 to F32)
1200-
def : InstRW<[N3Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32",
1200+
def : InstRW<[N3Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32", "^FCVTXNv1i64",
12011201
"^FCVTXN(v2|v4)f32")>;
12021202

12031203
// ASIMD FP convert, other, D-form F32 and Q-form F64
1204-
def : InstRW<[N3Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
1205-
"^[SU]CVTFv2f(32|64)$")>;
1204+
def : InstRW<[N3Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU](v2f(32|64)|s|d|v1i32|v1i64|v2i32_shift|v2i64_shift)$",
1205+
"^[SU]CVTF(v2f(32|64)|s|d|v1i32|v1i64|v2i32_shift|v2i64_shift)$")>;
12061206

12071207
// ASIMD FP convert, other, D-form F16 and Q-form F32
1208-
def : InstRW<[N3Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
1209-
"^[SU]CVTFv4f(16|32)$")>;
1208+
def : InstRW<[N3Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU](v4f(16|32)|v4i(16|32)_shift)$",
1209+
"^[SU]CVTF(v4f(16|32)|v4i(16|32)_shift)$")>;
12101210

12111211
// ASIMD FP convert, other, Q-form F16
12121212
def : InstRW<[N3Write_6c_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
@@ -1241,7 +1241,7 @@ def : InstRW<[N3Write_4c_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
12411241
def : InstRW<[N3Write_6c_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
12421242

12431243
// ASIMD FP multiply
1244-
def : InstRW<[N3Wr_FPM], (instregex "^FMULv", "^FMULXv")>;
1244+
def : InstRW<[N3Wr_FPM], (instregex "^FMULv", "^FMULX(v|32|64)")>;
12451245

12461246
// ASIMD FP multiply accumulate
12471247
def : InstRW<[N3Wr_FPMA, N3Rd_FPMA], (instregex "^FMLAv", "^FMLSv")>;
@@ -1330,9 +1330,9 @@ def : InstRW<[N3Write_4c_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
13301330

13311331
// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
13321332
def : InstRW<[N3Write_3c_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
1333-
FRECPEv1i64, FRECPEv2f32,
1333+
FRECPEv1i64, FRECPEv2f32, FRECPEv2f64,
13341334
FRSQRTEv1f16, FRSQRTEv1i32,
1335-
FRSQRTEv1i64, FRSQRTEv2f32)>;
1335+
FRSQRTEv1i64, FRSQRTEv2f32, FRSQRTEv2f64)>;
13361336

13371337
// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
13381338
def : InstRW<[N3Write_4c_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
@@ -1345,7 +1345,7 @@ def : InstRW<[N3Write_6c_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
13451345
def : InstRW<[N3Write_3c_1V0], (instregex "^FRECPXv")>;
13461346

13471347
// ASIMD reciprocal step
1348-
def : InstRW<[N3Write_4c_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
1348+
def : InstRW<[N3Write_4c_1V], (instregex "^FRECPS(v|32|64)", "^FRSQRTS(v|32|64)")>;
13491349

13501350
// ASIMD table lookup, 3 table regs
13511351
def : InstRW<[N3Write_4c_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;

0 commit comments

Comments
 (0)