diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index 737fc7390455d..e23daec97bd2d 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -512,6 +512,12 @@ def N2Write_8c_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, let NumMicroOps = 7; } +def N2Write_7c_7V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 7; + let NumMicroOps = 7; + let ReleaseAtCycles = [7]; +} + //===----------------------------------------------------------------------===// // Define generic 8 micro-op types @@ -547,6 +553,15 @@ def N2Write_9c_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, let NumMicroOps = 8; } +//===----------------------------------------------------------------------===// +// Define generic 9 micro-op types + +def N2Write_9c_9V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 9; + let NumMicroOps = 9; + let ReleaseAtCycles = [9]; +} + //===----------------------------------------------------------------------===// // Define generic 10 micro-op types @@ -557,6 +572,12 @@ def N2Write_7c_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, let NumMicroOps = 10; } +def N2Write_10c_10V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 10; + let NumMicroOps = 10; + let ReleaseAtCycles = [10]; +} + //===----------------------------------------------------------------------===// // Define generic 12 micro-op types @@ -580,6 +601,21 @@ def N2Write_7c_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, let NumMicroOps = 15; } +def N2Write_15c_15V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 15; + let NumMicroOps = 15; + let ReleaseAtCycles = [15]; +} + +//===----------------------------------------------------------------------===// +// Define generic 16 micro-op types + +def N2Write_16c_16V0 : SchedWriteRes<[N2UnitV0]> { + let Latency = 16; + let NumMicroOps = 16; + let ReleaseAtCycles = [16]; +} + //===----------------------------------------------------------------------===// // Define generic 18 micro-op types @@ -795,22 +831,26 @@ def : SchedAlias; // FP compare def : SchedAlias; +// FP divide and square root operations are performed using an iterative +// algorithm and block subsequent similar operations to the same pipeline +// until complete (Arm Neoverse N2 Software Optimization Guide, 3.14). + // FP divide, square root -def : SchedAlias; +def : SchedAlias; // FP divide, H-form -def : InstRW<[N2Write_7c_1V0], (instrs FDIVHrr)>; +def : InstRW<[N2Write_7c_7V0], (instrs FDIVHrr)>; // FP divide, S-form -def : InstRW<[N2Write_10c_1V0], (instrs FDIVSrr)>; +def : InstRW<[N2Write_10c_10V0], (instrs FDIVSrr)>; // FP divide, D-form -def : InstRW<[N2Write_15c_1V0], (instrs FDIVDrr)>; +def : InstRW<[N2Write_15c_15V0], (instrs FDIVDrr)>; // FP square root, H-form -def : InstRW<[N2Write_7c_1V0], (instrs FSQRTHr)>; +def : InstRW<[N2Write_7c_7V0], (instrs FSQRTHr)>; // FP square root, S-form -def : InstRW<[N2Write_9c_1V0], (instrs FSQRTSr)>; +def : InstRW<[N2Write_9c_9V0], (instrs FSQRTSr)>; // FP square root, D-form -def : InstRW<[N2Write_16c_1V0], (instrs FSQRTDr)>; +def : InstRW<[N2Write_16c_16V0], (instrs FSQRTDr)>; // FP multiply def : WriteRes { let Latency = 3; } diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll index 70a638857ce4a..c8df283aace0b 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -262,8 +262,8 @@ define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) { ; CHECK-UNSAFE-LABEL: reassociate_adds_half: ; CHECK-UNSAFE: // %bb.0: ; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 -; CHECK-UNSAFE-NEXT: fadd h1, h3, h2 -; CHECK-UNSAFE-NEXT: fadd h0, h1, h0 +; CHECK-UNSAFE-NEXT: fadd h2, h3, h2 +; CHECK-UNSAFE-NEXT: fadd h0, h2, h0 ; CHECK-UNSAFE-NEXT: ret %t0 = fdiv half %x0, %x1 %t1 = fadd half %x2, %t0 @@ -284,8 +284,8 @@ define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) { ; CHECK-UNSAFE-LABEL: reassociate_muls_half: ; CHECK-UNSAFE: // %bb.0: ; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 -; CHECK-UNSAFE-NEXT: fmul h1, h3, h2 -; CHECK-UNSAFE-NEXT: fmul h0, h1, h0 +; CHECK-UNSAFE-NEXT: fmul h2, h3, h2 +; CHECK-UNSAFE-NEXT: fmul h0, h2, h0 ; CHECK-UNSAFE-NEXT: ret %t0 = fdiv half %x0, %x1 %t1 = fmul half %x2, %t0 diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s index f4c4a20573c4e..cf1cf0e98c801 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s @@ -1891,7 +1891,7 @@ drps # CHECK-NEXT: 1 2 0.50 fmov s0, s1 # CHECK-NEXT: 1 2 0.50 fabs s2, s3 # CHECK-NEXT: 1 2 0.50 fneg s4, s5 -# CHECK-NEXT: 1 9 1.00 fsqrt s6, s7 +# CHECK-NEXT: 9 9 9.00 fsqrt s6, s7 # CHECK-NEXT: 1 3 1.00 fcvt d8, s9 # CHECK-NEXT: 1 3 1.00 fcvt h10, s11 # CHECK-NEXT: 1 3 1.00 frintn s12, s13 @@ -1904,7 +1904,7 @@ drps # CHECK-NEXT: 1 2 0.50 fmov d0, d1 # CHECK-NEXT: 1 2 0.50 fabs d2, d3 # CHECK-NEXT: 1 2 0.50 fneg d4, d5 -# CHECK-NEXT: 1 16 1.00 fsqrt d6, d7 +# CHECK-NEXT: 16 16 16.00 fsqrt d6, d7 # CHECK-NEXT: 1 3 1.00 fcvt s8, d9 # CHECK-NEXT: 1 3 1.00 fcvt h10, d11 # CHECK-NEXT: 1 3 1.00 frintn d12, d13 @@ -1917,7 +1917,7 @@ drps # CHECK-NEXT: 1 3 1.00 fcvt s26, h27 # CHECK-NEXT: 1 3 1.00 fcvt d28, h29 # CHECK-NEXT: 1 3 0.50 fmul s20, s19, s17 -# CHECK-NEXT: 1 10 1.00 fdiv s1, s2, s3 +# CHECK-NEXT: 10 10 10.00 fdiv s1, s2, s3 # CHECK-NEXT: 1 2 0.50 fadd s4, s5, s6 # CHECK-NEXT: 1 2 0.50 fsub s7, s8, s9 # CHECK-NEXT: 1 2 0.50 fmax s10, s11, s12 @@ -1926,7 +1926,7 @@ drps # CHECK-NEXT: 1 2 0.50 fminnm s19, s20, s21 # CHECK-NEXT: 1 3 0.50 fnmul s22, s23, s2 # CHECK-NEXT: 1 3 0.50 fmul d20, d19, d17 -# CHECK-NEXT: 1 15 1.00 fdiv d1, d2, d3 +# CHECK-NEXT: 15 15 15.00 fdiv d1, d2, d3 # CHECK-NEXT: 1 2 0.50 fadd d4, d5, d6 # CHECK-NEXT: 1 2 0.50 fsub d7, d8, d9 # CHECK-NEXT: 1 2 0.50 fmax d10, d11, d12 @@ -2557,7 +2557,7 @@ drps # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] -# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 169.50 85.50 +# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 215.50 85.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions: @@ -3075,7 +3075,7 @@ drps # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov s0, s1 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fabs s2, s3 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fneg s4, s5 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - fsqrt s6, s7 +# CHECK-NEXT: - - - - - - - - - - - 9.00 - fsqrt s6, s7 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt d8, s9 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt h10, s11 # CHECK-NEXT: - - - - - - - - - - - 1.00 - frintn s12, s13 @@ -3088,7 +3088,7 @@ drps # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov d0, d1 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fabs d2, d3 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fneg d4, d5 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - fsqrt d6, d7 +# CHECK-NEXT: - - - - - - - - - - - 16.00 - fsqrt d6, d7 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt s8, d9 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt h10, d11 # CHECK-NEXT: - - - - - - - - - - - 1.00 - frintn d12, d13 @@ -3101,7 +3101,7 @@ drps # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt s26, h27 # CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt d28, h29 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmul s20, s19, s17 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - fdiv s1, s2, s3 +# CHECK-NEXT: - - - - - - - - - - - 10.00 - fdiv s1, s2, s3 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fadd s4, s5, s6 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fsub s7, s8, s9 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmax s10, s11, s12 @@ -3110,7 +3110,7 @@ drps # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fminnm s19, s20, s21 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fnmul s22, s23, s2 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmul d20, d19, d17 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - fdiv d1, d2, d3 +# CHECK-NEXT: - - - - - - - - - - - 15.00 - fdiv d1, d2, d3 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fadd d4, d5, d6 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fsub d7, d8, d9 # CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmax d10, d11, d12