Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 47 additions & 7 deletions llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,12 @@ def N2Write_8c_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
let NumMicroOps = 7;
}

def N2Write_7c_7V0 : SchedWriteRes<[N2UnitV0]> {
let Latency = 7;
let NumMicroOps = 7;
let ReleaseAtCycles = [7];
}

//===----------------------------------------------------------------------===//
// Define generic 8 micro-op types

Expand Down Expand Up @@ -547,6 +553,15 @@ def N2Write_9c_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
let NumMicroOps = 8;
}

//===----------------------------------------------------------------------===//
// Define generic 9 micro-op types

def N2Write_9c_9V0 : SchedWriteRes<[N2UnitV0]> {
let Latency = 9;
let NumMicroOps = 9;
let ReleaseAtCycles = [9];
}

//===----------------------------------------------------------------------===//
// Define generic 10 micro-op types

Expand All @@ -557,6 +572,12 @@ def N2Write_7c_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
let NumMicroOps = 10;
}

def N2Write_10c_10V0 : SchedWriteRes<[N2UnitV0]> {
let Latency = 10;
let NumMicroOps = 10;
let ReleaseAtCycles = [10];
}

//===----------------------------------------------------------------------===//
// Define generic 12 micro-op types

Expand All @@ -580,6 +601,21 @@ def N2Write_7c_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
let NumMicroOps = 15;
}

def N2Write_15c_15V0 : SchedWriteRes<[N2UnitV0]> {
let Latency = 15;
let NumMicroOps = 15;
let ReleaseAtCycles = [15];
}

//===----------------------------------------------------------------------===//
// Define generic 16 micro-op types

def N2Write_16c_16V0 : SchedWriteRes<[N2UnitV0]> {
let Latency = 16;
let NumMicroOps = 16;
let ReleaseAtCycles = [16];
}

//===----------------------------------------------------------------------===//
// Define generic 18 micro-op types

Expand Down Expand Up @@ -795,22 +831,26 @@ def : SchedAlias<WriteF, N2Write_2c_1V>;
// FP compare
def : SchedAlias<WriteFCmp, N2Write_2c_1V0>;

// FP divide and square root operations are performed using an iterative
// algorithm and block subsequent similar operations to the same pipeline
// until complete (Arm Neoverse N2 Software Optimization Guide, 3.14).

// FP divide, square root
def : SchedAlias<WriteFDiv, N2Write_7c_1V0>;
def : SchedAlias<WriteFDiv, N2Write_7c_7V0>;

// FP divide, H-form
def : InstRW<[N2Write_7c_1V0], (instrs FDIVHrr)>;
def : InstRW<[N2Write_7c_7V0], (instrs FDIVHrr)>;
// FP divide, S-form
def : InstRW<[N2Write_10c_1V0], (instrs FDIVSrr)>;
def : InstRW<[N2Write_10c_10V0], (instrs FDIVSrr)>;
// FP divide, D-form
def : InstRW<[N2Write_15c_1V0], (instrs FDIVDrr)>;
def : InstRW<[N2Write_15c_15V0], (instrs FDIVDrr)>;

// FP square root, H-form
def : InstRW<[N2Write_7c_1V0], (instrs FSQRTHr)>;
def : InstRW<[N2Write_7c_7V0], (instrs FSQRTHr)>;
// FP square root, S-form
def : InstRW<[N2Write_9c_1V0], (instrs FSQRTSr)>;
def : InstRW<[N2Write_9c_9V0], (instrs FSQRTSr)>;
// FP square root, D-form
def : InstRW<[N2Write_16c_1V0], (instrs FSQRTDr)>;
def : InstRW<[N2Write_16c_16V0], (instrs FSQRTDr)>;

// FP multiply
def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/machine-combiner.ll
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,8 @@ define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-UNSAFE-LABEL: reassociate_adds_half:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
; CHECK-UNSAFE-NEXT: fadd h2, h3, h2
; CHECK-UNSAFE-NEXT: fadd h0, h2, h0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fadd half %x2, %t0
Expand All @@ -284,8 +284,8 @@ define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-UNSAFE-LABEL: reassociate_muls_half:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
; CHECK-UNSAFE-NEXT: fmul h2, h3, h2
; CHECK-UNSAFE-NEXT: fmul h0, h2, h0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fmul half %x2, %t0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1891,7 +1891,7 @@ drps
# CHECK-NEXT: 1 2 0.50 fmov s0, s1
# CHECK-NEXT: 1 2 0.50 fabs s2, s3
# CHECK-NEXT: 1 2 0.50 fneg s4, s5
# CHECK-NEXT: 1 9 1.00 fsqrt s6, s7
# CHECK-NEXT: 9 9 9.00 fsqrt s6, s7
# CHECK-NEXT: 1 3 1.00 fcvt d8, s9
# CHECK-NEXT: 1 3 1.00 fcvt h10, s11
# CHECK-NEXT: 1 3 1.00 frintn s12, s13
Expand All @@ -1904,7 +1904,7 @@ drps
# CHECK-NEXT: 1 2 0.50 fmov d0, d1
# CHECK-NEXT: 1 2 0.50 fabs d2, d3
# CHECK-NEXT: 1 2 0.50 fneg d4, d5
# CHECK-NEXT: 1 16 1.00 fsqrt d6, d7
# CHECK-NEXT: 16 16 16.00 fsqrt d6, d7
# CHECK-NEXT: 1 3 1.00 fcvt s8, d9
# CHECK-NEXT: 1 3 1.00 fcvt h10, d11
# CHECK-NEXT: 1 3 1.00 frintn d12, d13
Expand All @@ -1917,7 +1917,7 @@ drps
# CHECK-NEXT: 1 3 1.00 fcvt s26, h27
# CHECK-NEXT: 1 3 1.00 fcvt d28, h29
# CHECK-NEXT: 1 3 0.50 fmul s20, s19, s17
# CHECK-NEXT: 1 10 1.00 fdiv s1, s2, s3
# CHECK-NEXT: 10 10 10.00 fdiv s1, s2, s3
# CHECK-NEXT: 1 2 0.50 fadd s4, s5, s6
# CHECK-NEXT: 1 2 0.50 fsub s7, s8, s9
# CHECK-NEXT: 1 2 0.50 fmax s10, s11, s12
Expand All @@ -1926,7 +1926,7 @@ drps
# CHECK-NEXT: 1 2 0.50 fminnm s19, s20, s21
# CHECK-NEXT: 1 3 0.50 fnmul s22, s23, s2
# CHECK-NEXT: 1 3 0.50 fmul d20, d19, d17
# CHECK-NEXT: 1 15 1.00 fdiv d1, d2, d3
# CHECK-NEXT: 15 15 15.00 fdiv d1, d2, d3
# CHECK-NEXT: 1 2 0.50 fadd d4, d5, d6
# CHECK-NEXT: 1 2 0.50 fsub d7, d8, d9
# CHECK-NEXT: 1 2 0.50 fmax d10, d11, d12
Expand Down Expand Up @@ -2557,7 +2557,7 @@ drps

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8]
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 169.50 85.50
# CHECK-NEXT: 11.00 11.00 33.00 33.00 87.33 151.33 151.33 517.00 251.00 162.50 162.50 215.50 85.50

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] Instructions:
Expand Down Expand Up @@ -3075,7 +3075,7 @@ drps
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov s0, s1
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fabs s2, s3
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fneg s4, s5
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fsqrt s6, s7
# CHECK-NEXT: - - - - - - - - - - - 9.00 - fsqrt s6, s7
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt d8, s9
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt h10, s11
# CHECK-NEXT: - - - - - - - - - - - 1.00 - frintn s12, s13
Expand All @@ -3088,7 +3088,7 @@ drps
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmov d0, d1
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fabs d2, d3
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fneg d4, d5
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fsqrt d6, d7
# CHECK-NEXT: - - - - - - - - - - - 16.00 - fsqrt d6, d7
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt s8, d9
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt h10, d11
# CHECK-NEXT: - - - - - - - - - - - 1.00 - frintn d12, d13
Expand All @@ -3101,7 +3101,7 @@ drps
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt s26, h27
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fcvt d28, h29
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmul s20, s19, s17
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fdiv s1, s2, s3
# CHECK-NEXT: - - - - - - - - - - - 10.00 - fdiv s1, s2, s3
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fadd s4, s5, s6
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fsub s7, s8, s9
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmax s10, s11, s12
Expand All @@ -3110,7 +3110,7 @@ drps
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fminnm s19, s20, s21
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fnmul s22, s23, s2
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmul d20, d19, d17
# CHECK-NEXT: - - - - - - - - - - - 1.00 - fdiv d1, d2, d3
# CHECK-NEXT: - - - - - - - - - - - 15.00 - fdiv d1, d2, d3
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fadd d4, d5, d6
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fsub d7, d8, d9
# CHECK-NEXT: - - - - - - - - - - - 0.50 0.50 fmax d10, d11, d12
Expand Down
Loading