From d992985d09f30aba3bbda43f3ecee49a1d6e263b Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Thu, 24 Apr 2025 17:53:47 +0800 Subject: [PATCH 1/2] [RISCV] Implement base scheduling model for andes 45 series processor. This patch implements scheduling model for IMAFD and Zb extension. The latency and throughput of all instructions, except load/store, are measured by llvm-exegesis. Scheduling model for V and other extensions will be added in a follow-up patch. --- llvm/lib/Target/RISCV/RISCV.td | 1 + llvm/lib/Target/RISCV/RISCVProcessors.td | 8 +- llvm/lib/Target/RISCV/RISCVSchedAndes45.td | 339 +++++++++++++++++++ llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s | 129 +++++++ llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s | 126 +++++++ 5 files changed, 599 insertions(+), 4 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVSchedAndes45.td create mode 100644 llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s create mode 100644 llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index e322ae340349c..b24d8637cb27f 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -51,6 +51,7 @@ include "RISCVMacroFusion.td" //===----------------------------------------------------------------------===// // RISC-V Scheduling Models //===----------------------------------------------------------------------===// +include "RISCVSchedAndes45.td" include "RISCVSchedGenericOOO.td" include "RISCVSchedMIPSP8700.td" include "RISCVSchedRocket.td" diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 735997de94e81..47968c7afcd96 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -649,7 +649,7 @@ def RP2350_HAZARD3 : RISCVProcessorModel<"rp2350-hazard3", FeatureStdExtZcmp]>; def ANDES_N45 : RISCVProcessorModel<"andes-n45", - NoSchedModel, + Andes45Model, [Feature32Bit, FeatureStdExtI, FeatureStdExtZicsr, @@ -662,7 +662,7 @@ def ANDES_N45 : RISCVProcessorModel<"andes-n45", FeatureStdExtB]>; def ANDES_NX45 : RISCVProcessorModel<"andes-nx45", - NoSchedModel, + Andes45Model, [Feature64Bit, FeatureStdExtI, FeatureStdExtZicsr, @@ -675,7 +675,7 @@ def ANDES_NX45 : RISCVProcessorModel<"andes-nx45", FeatureStdExtB]>; def ANDES_A45 : RISCVProcessorModel<"andes-a45", - NoSchedModel, + Andes45Model, [Feature32Bit, FeatureStdExtI, FeatureStdExtZicsr, @@ -688,7 +688,7 @@ def ANDES_A45 : RISCVProcessorModel<"andes-a45", FeatureStdExtB]>; def ANDES_AX45 : RISCVProcessorModel<"andes-ax45", - NoSchedModel, + Andes45Model, [Feature64Bit, FeatureStdExtI, FeatureStdExtZicsr, diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td new file mode 100644 index 0000000000000..f42b48a1d8b83 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td @@ -0,0 +1,339 @@ +//==- RISCVSchedAndes45.td - Andes45 Scheduling Definitions --*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +// FIXME: Implement sheduling model for V and other extensions. +def Andes45Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Andes45 is in-order processor + let IssueWidth = 2; // 2 micro-ops dispatched per cycle + let LoadLatency = 2; + let MispredictPenalty = 5; + let CompleteModel = 0; +} + +let SchedModel = Andes45Model in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +//===----------------------------------------------------------------------===// +// Andes 45 series CPU +// - 2 Interger Arithmetic and Logical Units (ALU) +// - Multiply / Divide Unit (MDU) +// - Load Store Unit (LSU) +// - Control and Status Register Unit (CSR) +// - Floating Point Multiply-Accumulate Unit (FMAC) +// - Floating Point Divide / SQRT Unit (FDIV) +// - Floating Point Move Unit (FMV) +// - Floating Point Misc Unit (FMISC) +//===----------------------------------------------------------------------===// + +let BufferSize = 0 in { +def Andes45ALU : ProcResource<2>; +def Andes45MDU : ProcResource<1>; +def Andes45LSU : ProcResource<1>; +def Andes45CSR : ProcResource<1>; + +def Andes45FMAC : ProcResource<1>; +def Andes45FDIV : ProcResource<1>; +def Andes45FMV : ProcResource<1>; +def Andes45FMISC : ProcResource<1>; +} + +// Integer arithmetic and logic +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Branching +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer multiplication +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +} + +// Integer division +let Latency = 39, ReleaseAtCycles = [39] in { +def : WriteRes; +def : WriteRes; +} + +// Integer remainder +let Latency = 39, ReleaseAtCycles = [39] in { +def : WriteRes; +def : WriteRes; +} + +// Memory +let Latency = 5 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 1 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Atomic Memory +let Latency = 9 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +} + +// FMAC +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// FDIV +let Latency = 12, ReleaseAtCycles = [12] in +def : WriteRes; +let Latency = 11, ReleaseAtCycles = [11] in +def : WriteRes; + +let Latency = 19, ReleaseAtCycles = [19] in +def : WriteRes; +let Latency = 18, ReleaseAtCycles = [18] in +def : WriteRes; + +let Latency = 33, ReleaseAtCycles = [33] in +def : WriteRes; +let Latency = 32, ReleaseAtCycles = [32] in +def : WriteRes; + +// FMV +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// FMISC +let Latency = 2 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Bitmanip +// Zba extension +def : WriteRes; +def : WriteRes; + +// Zbb extension +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Zbc extension +let Latency = 3 in +def : WriteRes; + +// Zbs extension +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Others +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// + +// Bypass and advance +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedQ; +defm : UnsupportedSchedSFB; +defm : UnsupportedSchedV; +defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZabha; +defm : UnsupportedSchedZbkb; +defm : UnsupportedSchedZbkx; +defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZvk; +} diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s new file mode 100644 index 0000000000000..d1ab4b3b6a7e0 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/fpr.s @@ -0,0 +1,129 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s + +// Can be dispatched in the same cycle if different functional units are used. +fadd.s ft0, fa0, fa1 +fdiv.s ft0, fa0, fa1 + +// FMAC +fadd.s ft0, fa0, fa1 +fmul.s ft0, fa0, fa1 +fmadd.s ft0, fa0, fa1, fa2 + +// FDIV +fdiv.s ft0, fa0, fa1 +fsqrt.s ft0, fa0 + +// FMV +fsgnj.s ft0, fa0, fa1 +fmv.x.w a0, fa0 + +// FMISC +fmin.s ft0, fa0, fa1 +fclass.s a0, fa0 +feq.s a0, fa0, fa1 +fcvt.s.w ft0, a0 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 13 +# CHECK-NEXT: Total Cycles: 63 +# CHECK-NEXT: Total uOps: 13 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.21 +# CHECK-NEXT: IPC: 0.21 +# CHECK-NEXT: Block RThroughput: 56.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 4 1.00 fadd.s ft0, fa0, fa1 +# CHECK-NEXT: 1 19 19.00 fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: 1 4 1.00 fadd.s ft0, fa0, fa1 +# CHECK-NEXT: 1 4 1.00 fmul.s ft0, fa0, fa1 +# CHECK-NEXT: 1 4 1.00 fmadd.s ft0, fa0, fa1, fa2 +# CHECK-NEXT: 1 19 19.00 fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: 1 18 18.00 fsqrt.s ft0, fa0 +# CHECK-NEXT: 1 1 1.00 fsgnj.s ft0, fa0, fa1 +# CHECK-NEXT: 1 1 1.00 fmv.x.w a0, fa0 +# CHECK-NEXT: 1 2 1.00 fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 1 2 1.00 fclass.s a0, fa0 +# CHECK-NEXT: 1 2 1.00 feq.s a0, fa0, fa1 +# CHECK-NEXT: 1 2 1.00 fcvt.s.w ft0, a0 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - Andes45ALU +# CHECK-NEXT: [0.1] - Andes45ALU +# CHECK-NEXT: [1] - Andes45CSR +# CHECK-NEXT: [2] - Andes45FDIV +# CHECK-NEXT: [3] - Andes45FMAC +# CHECK-NEXT: [4] - Andes45FMISC +# CHECK-NEXT: [5] - Andes45FMV +# CHECK-NEXT: [6] - Andes45LSU +# CHECK-NEXT: [7] - Andes45MDU + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - - 56.00 4.00 4.00 2.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - - - 1.00 - - - - fadd.s ft0, fa0, fa1 +# CHECK-NEXT: - - - 19.00 - - - - - fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - 1.00 - - - - fadd.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - 1.00 - - - - fmul.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - 1.00 - - - - fmadd.s ft0, fa0, fa1, fa2 +# CHECK-NEXT: - - - 19.00 - - - - - fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: - - - 18.00 - - - - - fsqrt.s ft0, fa0 +# CHECK-NEXT: - - - - - - 1.00 - - fsgnj.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - - - 1.00 - - fmv.x.w a0, fa0 +# CHECK-NEXT: - - - - - 1.00 - - - fmin.s ft0, fa0, fa1 +# CHECK-NEXT: - - - - - 1.00 - - - fclass.s a0, fa0 +# CHECK-NEXT: - - - - - 1.00 - - - feq.s a0, fa0, fa1 +# CHECK-NEXT: - - - - - 1.00 - - - fcvt.s.w ft0, a0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeE. . . . . . . . . . . . . fadd.s ft0, fa0, fa1 +# CHECK-NEXT: [0,1] DeeeeeeeeeeeeeeeeeeE. . . . . . . . . . fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: [0,2] . . . DeeeE. . . . . . . . . . fadd.s ft0, fa0, fa1 +# CHECK-NEXT: [0,3] . . . .DeeeE . . . . . . . . . fmul.s ft0, fa0, fa1 +# CHECK-NEXT: [0,4] . . . . DeeeE . . . . . . . . . fmadd.s ft0, fa0, fa1, fa2 +# CHECK-NEXT: [0,5] . . . . DeeeeeeeeeeeeeeeeeeE . . . . . . fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: [0,6] . . . . . . . . DeeeeeeeeeeeeeeeeeE . . fsqrt.s ft0, fa0 +# CHECK-NEXT: [0,7] . . . . . . . . . . . DE . . fsgnj.s ft0, fa0, fa1 +# CHECK-NEXT: [0,8] . . . . . . . . . . . .DE . . fmv.x.w a0, fa0 +# CHECK-NEXT: [0,9] . . . . . . . . . . . .DeE . . fmin.s ft0, fa0, fa1 +# CHECK-NEXT: [0,10] . . . . . . . . . . . . DeE. . fclass.s a0, fa0 +# CHECK-NEXT: [0,11] . . . . . . . . . . . . DeE . feq.s a0, fa0, fa1 +# CHECK-NEXT: [0,12] . . . . . . . . . . . . DeE fcvt.s.w ft0, a0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 fadd.s ft0, fa0, fa1 +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 fadd.s ft0, fa0, fa1 +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 fmul.s ft0, fa0, fa1 +# CHECK-NEXT: 4. 1 0.0 0.0 0.0 fmadd.s ft0, fa0, fa1, fa2 +# CHECK-NEXT: 5. 1 0.0 0.0 0.0 fdiv.s ft0, fa0, fa1 +# CHECK-NEXT: 6. 1 0.0 0.0 0.0 fsqrt.s ft0, fa0 +# CHECK-NEXT: 7. 1 0.0 0.0 0.0 fsgnj.s ft0, fa0, fa1 +# CHECK-NEXT: 8. 1 0.0 0.0 0.0 fmv.x.w a0, fa0 +# CHECK-NEXT: 9. 1 0.0 0.0 0.0 fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 10. 1 0.0 0.0 0.0 fclass.s a0, fa0 +# CHECK-NEXT: 11. 1 0.0 0.0 0.0 feq.s a0, fa0, fa1 +# CHECK-NEXT: 12. 1 0.0 0.0 0.0 fcvt.s.w ft0, a0 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s new file mode 100644 index 0000000000000..fb5d4e991fbba --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s @@ -0,0 +1,126 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s + +# Two ALUs without dependency can be dispatched in the same cycle. +add a0, a0, a0 +sub a1, a1, a1 + +# Otherwise, they can't be dispatched in the same cycle. +addw a0, a0, a0 +subw a0, a0, a0 + +// MDU +mul a0, a0, a0 +div a0, a0, a0 + +// Memory +lb a0, 4(a1) +lw a0, 4(a1) +sb a0, 4(a1) +sw a0, 4(a1) + +// CSR +csrrw a0, mstatus, zero + +// Bitmanip +sh1add a0, a0, a0 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 12 +# CHECK-NEXT: Total Cycles: 49 +# CHECK-NEXT: Total uOps: 12 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.24 +# CHECK-NEXT: IPC: 0.24 +# CHECK-NEXT: Block RThroughput: 40.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 add a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sub a1, a1, a1 +# CHECK-NEXT: 1 1 0.50 addw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 subw a0, a0, a0 +# CHECK-NEXT: 1 3 1.00 mul a0, a0, a0 +# CHECK-NEXT: 1 39 39.00 div a0, a0, a0 +# CHECK-NEXT: 1 5 1.00 * lb a0, 4(a1) +# CHECK-NEXT: 1 3 1.00 * lw a0, 4(a1) +# CHECK-NEXT: 1 1 1.00 * sb a0, 4(a1) +# CHECK-NEXT: 1 1 1.00 * sw a0, 4(a1) +# CHECK-NEXT: 1 1 1.00 U csrrw a0, mstatus, zero +# CHECK-NEXT: 1 1 0.50 sh1add a0, a0, a0 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - Andes45ALU +# CHECK-NEXT: [0.1] - Andes45ALU +# CHECK-NEXT: [1] - Andes45CSR +# CHECK-NEXT: [2] - Andes45FDIV +# CHECK-NEXT: [3] - Andes45FMAC +# CHECK-NEXT: [4] - Andes45FMISC +# CHECK-NEXT: [5] - Andes45FMV +# CHECK-NEXT: [6] - Andes45LSU +# CHECK-NEXT: [7] - Andes45MDU + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: 2.00 3.00 1.00 - - - - 4.00 40.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - - add a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - sub a1, a1, a1 +# CHECK-NEXT: - 1.00 - - - - - - - addw a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - subw a0, a0, a0 +# CHECK-NEXT: - - - - - - - - 1.00 mul a0, a0, a0 +# CHECK-NEXT: - - - - - - - - 39.00 div a0, a0, a0 +# CHECK-NEXT: - - - - - - - 1.00 - lb a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - lw a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - sb a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - sw a0, 4(a1) +# CHECK-NEXT: - - 1.00 - - - - - - csrrw a0, mstatus, zero +# CHECK-NEXT: - 1.00 - - - - - - - sh1add a0, a0, a0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012345678 + +# CHECK: [0,0] DE . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,1] DE . . . . . . . . . . sub a1, a1, a1 +# CHECK-NEXT: [0,2] .DE . . . . . . . . . . addw a0, a0, a0 +# CHECK-NEXT: [0,3] . DE . . . . . . . . . . subw a0, a0, a0 +# CHECK-NEXT: [0,4] . DeeE . . . . . . . . . mul a0, a0, a0 +# CHECK-NEXT: [0,5] . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . div a0, a0, a0 +# CHECK-NEXT: [0,6] . . . . . . . . DeeeeE . lb a0, 4(a1) +# CHECK-NEXT: [0,7] . . . . . . . . . DeeE . lw a0, 4(a1) +# CHECK-NEXT: [0,8] . . . . . . . . . DE . sb a0, 4(a1) +# CHECK-NEXT: [0,9] . . . . . . . . . .DE. sw a0, 4(a1) +# CHECK-NEXT: [0,10] . . . . . . . . . .DE. csrrw a0, mstatus, zero +# CHECK-NEXT: [0,11] . . . . . . . . . . DE sh1add a0, a0, a0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 add a0, a0, a0 +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 sub a1, a1, a1 +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 addw a0, a0, a0 +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 subw a0, a0, a0 +# CHECK-NEXT: 4. 1 0.0 0.0 0.0 mul a0, a0, a0 +# CHECK-NEXT: 5. 1 0.0 0.0 0.0 div a0, a0, a0 +# CHECK-NEXT: 6. 1 0.0 0.0 0.0 lb a0, 4(a1) +# CHECK-NEXT: 7. 1 0.0 0.0 0.0 lw a0, 4(a1) +# CHECK-NEXT: 8. 1 0.0 0.0 0.0 sb a0, 4(a1) +# CHECK-NEXT: 9. 1 0.0 0.0 0.0 sw a0, 4(a1) +# CHECK-NEXT: 10. 1 0.0 0.0 0.0 csrrw a0, mstatus, zero +# CHECK-NEXT: 11. 1 0.0 0.0 0.0 sh1add a0, a0, a0 +# CHECK-NEXT: 1 0.0 0.0 0.0 From 1028f25e2d5d1aa446c1925c727cf72b33974303 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Wed, 4 Jun 2025 13:35:49 +0800 Subject: [PATCH 2/2] Add more tests into gpr.s --- llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s | 188 ++++++++++++++++--- 1 file changed, 157 insertions(+), 31 deletions(-) diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s index fb5d4e991fbba..f6dc6eef3f0ff 100644 --- a/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s +++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/gpr.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -timeline -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mcpu=andes-nx45 -mattr=+zbc -timeline -iterations=1 < %s | FileCheck %s # Two ALUs without dependency can be dispatched in the same cycle. add a0, a0, a0 @@ -9,31 +9,67 @@ sub a1, a1, a1 addw a0, a0, a0 subw a0, a0, a0 +// ALU and Shift +slli a0, a0, 4 +slliw a0, a0, 4 +srl a0, a0, a0 +srlw a0, a0, a0 + // MDU mul a0, a0, a0 +mulw a0, a0, a0 div a0, a0, a0 +divw a0, a0, a0 // Memory lb a0, 4(a1) +lh a0, 4(a1) lw a0, 4(a1) +ld a0, 4(a1) + +flw fa0, 4(a1) +fld fa0, 4(a1) + sb a0, 4(a1) +sh a0, 4(a1) sw a0, 4(a1) +sd a0, 4(a1) + +// Atomic Memory +amoswap.w a0, a1, (a0) +amoswap.d a0, a1, (a0) +lr.w a0, (a0) +lr.d a0, (a0) +sc.w a0, a1, (a0) +sc.d a0, a1, (a0) // CSR csrrw a0, mstatus, zero // Bitmanip sh1add a0, a0, a0 +sh1add.uw a0, a0, a0 +rori a0, a0, 4 +roriw a0, a0, 4 +rol a0, a0, a0 +rolw a0, a0, a0 +clz a0, a0 +clzw a0, a0 +clmul a0, a0, a0 +bclri a0, a0, 4 +bclr a0, a0, a0 +bexti a0, a0, 4 +bext a0, a0, a0 # CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 12 -# CHECK-NEXT: Total Cycles: 49 -# CHECK-NEXT: Total uOps: 12 +# CHECK-NEXT: Instructions: 42 +# CHECK-NEXT: Total Cycles: 158 +# CHECK-NEXT: Total uOps: 42 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.24 -# CHECK-NEXT: IPC: 0.24 -# CHECK-NEXT: Block RThroughput: 40.0 +# CHECK-NEXT: uOps Per Cycle: 0.27 +# CHECK-NEXT: IPC: 0.27 +# CHECK-NEXT: Block RThroughput: 80.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -48,14 +84,44 @@ sh1add a0, a0, a0 # CHECK-NEXT: 1 1 0.50 sub a1, a1, a1 # CHECK-NEXT: 1 1 0.50 addw a0, a0, a0 # CHECK-NEXT: 1 1 0.50 subw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 slli a0, a0, 4 +# CHECK-NEXT: 1 1 0.50 slliw a0, a0, 4 +# CHECK-NEXT: 1 1 0.50 srl a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 srlw a0, a0, a0 # CHECK-NEXT: 1 3 1.00 mul a0, a0, a0 +# CHECK-NEXT: 1 3 1.00 mulw a0, a0, a0 # CHECK-NEXT: 1 39 39.00 div a0, a0, a0 +# CHECK-NEXT: 1 39 39.00 divw a0, a0, a0 # CHECK-NEXT: 1 5 1.00 * lb a0, 4(a1) +# CHECK-NEXT: 1 5 1.00 * lh a0, 4(a1) # CHECK-NEXT: 1 3 1.00 * lw a0, 4(a1) +# CHECK-NEXT: 1 3 1.00 * ld a0, 4(a1) +# CHECK-NEXT: 1 3 1.00 * flw fa0, 4(a1) +# CHECK-NEXT: 1 3 1.00 * fld fa0, 4(a1) # CHECK-NEXT: 1 1 1.00 * sb a0, 4(a1) +# CHECK-NEXT: 1 1 1.00 * sh a0, 4(a1) # CHECK-NEXT: 1 1 1.00 * sw a0, 4(a1) +# CHECK-NEXT: 1 1 1.00 * sd a0, 4(a1) +# CHECK-NEXT: 1 9 1.00 * * amoswap.w a0, a1, (a0) +# CHECK-NEXT: 1 9 1.00 * * amoswap.d a0, a1, (a0) +# CHECK-NEXT: 1 9 1.00 * lr.w a0, (a0) +# CHECK-NEXT: 1 9 1.00 * lr.d a0, (a0) +# CHECK-NEXT: 1 3 1.00 * sc.w a0, a1, (a0) +# CHECK-NEXT: 1 3 1.00 * sc.d a0, a1, (a0) # CHECK-NEXT: 1 1 1.00 U csrrw a0, mstatus, zero # CHECK-NEXT: 1 1 0.50 sh1add a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 sh1add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 rori a0, a0, 4 +# CHECK-NEXT: 1 1 0.50 roriw a0, a0, 4 +# CHECK-NEXT: 1 1 0.50 rol a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 rolw a0, a0, a0 +# CHECK-NEXT: 1 3 0.50 clz a0, a0 +# CHECK-NEXT: 1 3 0.50 clzw a0, a0 +# CHECK-NEXT: 1 3 0.50 clmul a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 bclri a0, a0, 4 +# CHECK-NEXT: 1 1 0.50 bclr a0, a0, a0 +# CHECK-NEXT: 1 1 0.50 bexti a0, a0, 4 +# CHECK-NEXT: 1 1 0.50 bext a0, a0, a0 # CHECK: Resources: # CHECK-NEXT: [0.0] - Andes45ALU @@ -70,7 +136,7 @@ sh1add a0, a0, a0 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: 2.00 3.00 1.00 - - - - 4.00 40.00 +# CHECK-NEXT: 10.00 11.00 1.00 - - - - 16.00 80.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -78,31 +144,61 @@ sh1add a0, a0, a0 # CHECK-NEXT: 1.00 - - - - - - - - sub a1, a1, a1 # CHECK-NEXT: - 1.00 - - - - - - - addw a0, a0, a0 # CHECK-NEXT: 1.00 - - - - - - - - subw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - slli a0, a0, 4 +# CHECK-NEXT: 1.00 - - - - - - - - slliw a0, a0, 4 +# CHECK-NEXT: - 1.00 - - - - - - - srl a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - srlw a0, a0, a0 # CHECK-NEXT: - - - - - - - - 1.00 mul a0, a0, a0 +# CHECK-NEXT: - - - - - - - - 1.00 mulw a0, a0, a0 # CHECK-NEXT: - - - - - - - - 39.00 div a0, a0, a0 +# CHECK-NEXT: - - - - - - - - 39.00 divw a0, a0, a0 # CHECK-NEXT: - - - - - - - 1.00 - lb a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - lh a0, 4(a1) # CHECK-NEXT: - - - - - - - 1.00 - lw a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - ld a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - flw fa0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - fld fa0, 4(a1) # CHECK-NEXT: - - - - - - - 1.00 - sb a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - sh a0, 4(a1) # CHECK-NEXT: - - - - - - - 1.00 - sw a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - sd a0, 4(a1) +# CHECK-NEXT: - - - - - - - 1.00 - amoswap.w a0, a1, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - amoswap.d a0, a1, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - lr.w a0, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - lr.d a0, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - sc.w a0, a1, (a0) +# CHECK-NEXT: - - - - - - - 1.00 - sc.d a0, a1, (a0) # CHECK-NEXT: - - 1.00 - - - - - - csrrw a0, mstatus, zero # CHECK-NEXT: - 1.00 - - - - - - - sh1add a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - sh1add.uw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - rori a0, a0, 4 +# CHECK-NEXT: 1.00 - - - - - - - - roriw a0, a0, 4 +# CHECK-NEXT: - 1.00 - - - - - - - rol a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - rolw a0, a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - clz a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - clzw a0, a0 +# CHECK-NEXT: - 1.00 - - - - - - - clmul a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - bclri a0, a0, 4 +# CHECK-NEXT: - 1.00 - - - - - - - bclr a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - - bexti a0, a0, 4 +# CHECK-NEXT: - 1.00 - - - - - - - bext a0, a0, a0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012345678 - -# CHECK: [0,0] DE . . . . . . . . . . add a0, a0, a0 -# CHECK-NEXT: [0,1] DE . . . . . . . . . . sub a1, a1, a1 -# CHECK-NEXT: [0,2] .DE . . . . . . . . . . addw a0, a0, a0 -# CHECK-NEXT: [0,3] . DE . . . . . . . . . . subw a0, a0, a0 -# CHECK-NEXT: [0,4] . DeeE . . . . . . . . . mul a0, a0, a0 -# CHECK-NEXT: [0,5] . .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE . div a0, a0, a0 -# CHECK-NEXT: [0,6] . . . . . . . . DeeeeE . lb a0, 4(a1) -# CHECK-NEXT: [0,7] . . . . . . . . . DeeE . lw a0, 4(a1) -# CHECK-NEXT: [0,8] . . . . . . . . . DE . sb a0, 4(a1) -# CHECK-NEXT: [0,9] . . . . . . . . . .DE. sw a0, 4(a1) -# CHECK-NEXT: [0,10] . . . . . . . . . .DE. csrrw a0, mstatus, zero -# CHECK-NEXT: [0,11] . . . . . . . . . . DE sh1add a0, a0, a0 +# CHECK-NEXT: 0123456789 0123456789 012 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 + +# CHECK: [0,0] DE . . . . . . . . . . . add a0, a0, a0 +# CHECK-NEXT: [0,1] DE . . . . . . . . . . . sub a1, a1, a1 +# CHECK-NEXT: [0,2] .DE . . . . . . . . . . . addw a0, a0, a0 +# CHECK-NEXT: [0,3] . DE . . . . . . . . . . . subw a0, a0, a0 +# CHECK-NEXT: [0,4] . DE. . . . . . . . . . . slli a0, a0, 4 +# CHECK-NEXT: [0,5] . DE . . . . . . . . . . slliw a0, a0, 4 +# CHECK-NEXT: [0,6] . DE . . . . . . . . . . srl a0, a0, a0 +# CHECK-NEXT: [0,7] . .DE . . . . . . . . . . srlw a0, a0, a0 +# CHECK-NEXT: [0,8] . . DeeE . . . . . . . . . mul a0, a0, a0 +# CHECK-NEXT: [0,9] . . DeeE . . . . . . . . . mulw a0, a0, a0 +# CHECK-NEXT: [0,10] . . . DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeE div a0, a0, a0 +# CHECK-NEXT: Truncated display due to cycle limit # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -115,12 +211,42 @@ sh1add a0, a0, a0 # CHECK-NEXT: 1. 1 0.0 0.0 0.0 sub a1, a1, a1 # CHECK-NEXT: 2. 1 0.0 0.0 0.0 addw a0, a0, a0 # CHECK-NEXT: 3. 1 0.0 0.0 0.0 subw a0, a0, a0 -# CHECK-NEXT: 4. 1 0.0 0.0 0.0 mul a0, a0, a0 -# CHECK-NEXT: 5. 1 0.0 0.0 0.0 div a0, a0, a0 -# CHECK-NEXT: 6. 1 0.0 0.0 0.0 lb a0, 4(a1) -# CHECK-NEXT: 7. 1 0.0 0.0 0.0 lw a0, 4(a1) -# CHECK-NEXT: 8. 1 0.0 0.0 0.0 sb a0, 4(a1) -# CHECK-NEXT: 9. 1 0.0 0.0 0.0 sw a0, 4(a1) -# CHECK-NEXT: 10. 1 0.0 0.0 0.0 csrrw a0, mstatus, zero -# CHECK-NEXT: 11. 1 0.0 0.0 0.0 sh1add a0, a0, a0 +# CHECK-NEXT: 4. 1 0.0 0.0 0.0 slli a0, a0, 4 +# CHECK-NEXT: 5. 1 0.0 0.0 0.0 slliw a0, a0, 4 +# CHECK-NEXT: 6. 1 0.0 0.0 0.0 srl a0, a0, a0 +# CHECK-NEXT: 7. 1 0.0 0.0 0.0 srlw a0, a0, a0 +# CHECK-NEXT: 8. 1 0.0 0.0 0.0 mul a0, a0, a0 +# CHECK-NEXT: 9. 1 0.0 0.0 0.0 mulw a0, a0, a0 +# CHECK-NEXT: 10. 1 0.0 0.0 0.0 div a0, a0, a0 +# CHECK-NEXT: 11. 1 0.0 0.0 0.0 divw a0, a0, a0 +# CHECK-NEXT: 12. 1 0.0 0.0 0.0 lb a0, 4(a1) +# CHECK-NEXT: 13. 1 0.0 0.0 0.0 lh a0, 4(a1) +# CHECK-NEXT: 14. 1 0.0 0.0 0.0 lw a0, 4(a1) +# CHECK-NEXT: 15. 1 0.0 0.0 0.0 ld a0, 4(a1) +# CHECK-NEXT: 16. 1 0.0 0.0 0.0 flw fa0, 4(a1) +# CHECK-NEXT: 17. 1 0.0 0.0 0.0 fld fa0, 4(a1) +# CHECK-NEXT: 18. 1 0.0 0.0 0.0 sb a0, 4(a1) +# CHECK-NEXT: 19. 1 0.0 0.0 0.0 sh a0, 4(a1) +# CHECK-NEXT: 20. 1 0.0 0.0 0.0 sw a0, 4(a1) +# CHECK-NEXT: 21. 1 0.0 0.0 0.0 sd a0, 4(a1) +# CHECK-NEXT: 22. 1 0.0 0.0 0.0 amoswap.w a0, a1, (a0) +# CHECK-NEXT: 23. 1 0.0 0.0 0.0 amoswap.d a0, a1, (a0) +# CHECK-NEXT: 24. 1 0.0 0.0 0.0 lr.w a0, (a0) +# CHECK-NEXT: 25. 1 0.0 0.0 0.0 lr.d a0, (a0) +# CHECK-NEXT: 26. 1 0.0 0.0 0.0 sc.w a0, a1, (a0) +# CHECK-NEXT: 27. 1 0.0 0.0 0.0 sc.d a0, a1, (a0) +# CHECK-NEXT: 28. 1 0.0 0.0 0.0 csrrw a0, mstatus, zero +# CHECK-NEXT: 29. 1 0.0 0.0 0.0 sh1add a0, a0, a0 +# CHECK-NEXT: 30. 1 0.0 0.0 0.0 sh1add.uw a0, a0, a0 +# CHECK-NEXT: 31. 1 0.0 0.0 0.0 rori a0, a0, 4 +# CHECK-NEXT: 32. 1 0.0 0.0 0.0 roriw a0, a0, 4 +# CHECK-NEXT: 33. 1 0.0 0.0 0.0 rol a0, a0, a0 +# CHECK-NEXT: 34. 1 0.0 0.0 0.0 rolw a0, a0, a0 +# CHECK-NEXT: 35. 1 0.0 0.0 0.0 clz a0, a0 +# CHECK-NEXT: 36. 1 0.0 0.0 0.0 clzw a0, a0 +# CHECK-NEXT: 37. 1 0.0 0.0 0.0 clmul a0, a0, a0 +# CHECK-NEXT: 38. 1 0.0 0.0 0.0 bclri a0, a0, 4 +# CHECK-NEXT: 39. 1 0.0 0.0 0.0 bclr a0, a0, a0 +# CHECK-NEXT: 40. 1 0.0 0.0 0.0 bexti a0, a0, 4 +# CHECK-NEXT: 41. 1 0.0 0.0 0.0 bext a0, a0, a0 # CHECK-NEXT: 1 0.0 0.0 0.0