Skip to content

Commit 4340159

Browse files
authored
[AArch64] Fix SVE FADDP latency on Neoverse-N3 (#167676)
This patch fixes the latency of the SVE FADDP instruction for the Neoverse-N3 SWOG. The latency of flaoting point arith, min/max pairwise SVE FADDP should be 3, as per the N3 SWOG.
1 parent ef9a02c commit 4340159

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1926,7 +1926,6 @@ def : InstRW<[N3Write_2c_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
19261926
// Floating point arithmetic
19271927
def : InstRW<[N3Write_2c_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
19281928
"^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
1929-
"^FADDP_ZPmZZ_[HSD]",
19301929
"^FNEG_ZPmZ_[HSD]",
19311930
"^FSUBR_ZPm[IZ]_[HSD]",
19321931
"^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
@@ -2001,7 +2000,8 @@ def : InstRW<[N3Write_10c_4V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
20012000
def : InstRW<[N3Write_13c_2V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
20022001

20032002
// Floating point arith, min/max pairwise
2004-
def : InstRW<[N3Write_3c_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2003+
def : InstRW<[N3Write_3c_1V], (instregex "^FADDP_ZPmZZ_[HSD]",
2004+
"^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
20052005

20062006
// Floating point min/max
20072007
def : InstRW<[N3Write_2c_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",

llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3993,9 +3993,9 @@ zip2 z31.s, z31.s, z31.s
39933993
# CHECK-NEXT: 2 4 1.00 fadda d0, p7, d0, z31.d
39943994
# CHECK-NEXT: 8 16 4.00 fadda h0, p7, h0, z31.h
39953995
# CHECK-NEXT: 4 8 2.00 fadda s0, p7, s0, z31.s
3996-
# CHECK-NEXT: 1 2 0.50 faddp z0.h, p0/m, z0.h, z1.h
3997-
# CHECK-NEXT: 1 2 0.50 faddp z29.s, p3/m, z29.s, z30.s
3998-
# CHECK-NEXT: 1 2 0.50 faddp z31.d, p7/m, z31.d, z30.d
3996+
# CHECK-NEXT: 1 3 0.50 faddp z0.h, p0/m, z0.h, z1.h
3997+
# CHECK-NEXT: 1 3 0.50 faddp z29.s, p3/m, z29.s, z30.s
3998+
# CHECK-NEXT: 1 3 0.50 faddp z31.d, p7/m, z31.d, z30.d
39993999
# CHECK-NEXT: 1 2 0.50 faddv d0, p7, z31.d
40004000
# CHECK-NEXT: 3 6 1.50 faddv h0, p7, z31.h
40014001
# CHECK-NEXT: 2 4 1.00 faddv s0, p7, z31.s

0 commit comments

Comments
 (0)