Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
//===----------------------------------------------------------------------===//

def NeoverseN1Model : SchedMachineModel {
let IssueWidth = 8; // Maximum micro-ops dispatch rate.
let IssueWidth = 3; // This value comes from the decode bandwidth
// and empirical measurements showed that this
// value is better.
let MicroOpBufferSize = 128; // NOTE: Copied from Cortex-A76.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 11; // Cycles cost of branch mispredicted.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//

def NeoverseN3Model : SchedMachineModel {
let IssueWidth = 10; // Micro-ops dispatched at a time.
let IssueWidth = 5; // Micro-ops dispatched at a time.
let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
//===----------------------------------------------------------------------===//

def NeoverseV1Model : SchedMachineModel {
let IssueWidth = 15; // Maximum micro-ops dispatch rate.
let IssueWidth = 8; // This value comes from the decode bandwidth
// and empirical measurements showed that this
// value is better.
let MicroOpBufferSize = 256; // Micro-op re-order buffer.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 11; // Cycles cost of branch mispredicted.
Expand Down
4,417 changes: 2,214 additions & 2,203 deletions llvm/test/tools/llvm-mca/AArch64/Neoverse/N1-writeback.s

Large diffs are not rendered by default.

3,822 changes: 1,916 additions & 1,906 deletions llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-writeback.s

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2688,7 +2688,7 @@ drps
# CHECK-NEXT: 1 1 0.25 movk x7, #0, lsl #32
# CHECK-NEXT: 1 1 0.25 movz x8, #0, lsl #48
# CHECK-NEXT: 1 1 0.25 movk x9, #0, lsl #48
# CHECK-NEXT: 1 1 0.07 U msr DAIFSet, #0
# CHECK-NEXT: 1 1 0.12 U msr DAIFSet, #0
# CHECK-NEXT: 1 1 0.25 adr x2, #1600
# CHECK-NEXT: 1 1 0.25 adrp x21, #6553600
# CHECK-NEXT: 1 1 0.25 adr x0, #262144
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-clear-upper-regs.s
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 41
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 4.88
# CHECK-NEXT: IPC: 4.88
# CHECK-NEXT: Block RThroughput: 0.3
Expand Down Expand Up @@ -134,7 +134,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 44
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 4.55
# CHECK-NEXT: IPC: 4.55
# CHECK-NEXT: Block RThroughput: 0.3
Expand Down Expand Up @@ -211,7 +211,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 44
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 4.55
# CHECK-NEXT: IPC: 4.55
# CHECK-NEXT: Block RThroughput: 0.3
Expand Down Expand Up @@ -288,7 +288,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 44
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 4.55
# CHECK-NEXT: IPC: 4.55
# CHECK-NEXT: Block RThroughput: 0.3
Expand Down Expand Up @@ -365,7 +365,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 44
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 4.55
# CHECK-NEXT: IPC: 4.55
# CHECK-NEXT: Block RThroughput: 0.3
Expand Down Expand Up @@ -442,7 +442,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 44
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 4.55
# CHECK-NEXT: IPC: 4.55
# CHECK-NEXT: Block RThroughput: 0.3
Expand Down Expand Up @@ -519,7 +519,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 44
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 4.55
# CHECK-NEXT: IPC: 4.55
# CHECK-NEXT: Block RThroughput: 0.3
Expand Down Expand Up @@ -596,7 +596,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 44
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 4.55
# CHECK-NEXT: IPC: 4.55
# CHECK-NEXT: Block RThroughput: 0.3
Expand Down Expand Up @@ -673,7 +673,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 403
# CHECK-NEXT: Total uOps: 200

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 0.5
Expand Down Expand Up @@ -750,7 +750,7 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: Total Cycles: 1003
# CHECK-NEXT: Total uOps: 300

# CHECK: Dispatch Width: 15
# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 0.30
# CHECK-NEXT: IPC: 0.20
# CHECK-NEXT: Block RThroughput: 0.5
Expand Down Expand Up @@ -805,9 +805,9 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: [1,0] D==========eeeeeeeeER . . . . . ld1 { v0.b }[0], [sp]
# CHECK-NEXT: [1,1] D==================eeER . . . . . add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: [2,0] D====================eeeeeeeeER . . . ld1 { v0.b }[0], [sp]
# CHECK-NEXT: [2,1] D============================eeER . . . add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: [3,0] D==============================eeeeeeeeER . ld1 { v0.b }[0], [sp]
# CHECK-NEXT: [3,1] D======================================eeER add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: [2,1] .D===========================eeER . . . add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: [3,0] .D=============================eeeeeeeeER . ld1 { v0.b }[0], [sp]
# CHECK-NEXT: [3,1] .D=====================================eeER add v0.16b, v0.16b, v0.16b

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
Expand All @@ -816,6 +816,6 @@ add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 4 16.0 0.3 0.0 ld1 { v0.b }[0], [sp]
# CHECK-NEXT: 1. 4 24.0 0.0 0.0 add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 4 20.0 0.1 0.0 <total>
# CHECK-NEXT: 0. 4 15.8 0.3 0.0 ld1 { v0.b }[0], [sp]
# CHECK-NEXT: 1. 4 23.5 0.0 0.0 add v0.16b, v0.16b, v0.16b
# CHECK-NEXT: 4 19.6 0.1 0.0 <total>
Loading