diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 4e0c64a5ca2c6..87c07c3cd505f 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -50,6 +50,7 @@ include "RISCVSchedMIPSP8700.td" include "RISCVSchedRocket.td" include "RISCVSchedSiFive7.td" include "RISCVSchedSiFiveP400.td" +include "RISCVSchedSiFiveP500.td" include "RISCVSchedSiFiveP600.td" include "RISCVSchedSyntacoreSCR1.td" include "RISCVSchedSyntacoreSCR345.td" diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 6dfed7ddeb9f6..28b13f74c2991 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -327,7 +327,7 @@ defvar SiFiveP500TuneFeatures = [TuneNoDefaultUnroll, TuneAUIPCADDIFusion, TunePostRAScheduler]; -def SIFIVE_P550 : RISCVProcessorModel<"sifive-p550", NoSchedModel, +def SIFIVE_P550 : RISCVProcessorModel<"sifive-p550", SiFiveP500Model, [Feature64Bit, FeatureStdExtI, FeatureStdExtZifencei, diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td new file mode 100644 index 0000000000000..32cfa701c4fdb --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td @@ -0,0 +1,361 @@ +//==- RISCVSchedSiFiveP500.td - SiFiveP500 Scheduling Defs ---*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +def SiFiveP500Model : SchedMachineModel { + let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. + let MicroOpBufferSize = 96; // Max micro-ops that can be buffered. + let LoadLatency = 4; // Cycles for loads to access the cache. + let MispredictPenalty = 9; // Extra cycles for a mispredicted branch. + let CompleteModel = false; +} + +// The SiFiveP500 microarchitecure has 7 pipelines: +// Three pipelines for integer operations. +// Two pipelines for FPU operations. +// One pipeline for Load operations. +// One pipeline for Store operations. +let SchedModel = SiFiveP500Model in { + +def SiFiveP500IEXQ0 : ProcResource<1>; +def SiFiveP500IEXQ1 : ProcResource<1>; +def SiFiveP500IEXQ2 : ProcResource<1>; +def SiFiveP500FEXQ0 : ProcResource<1>; +def SiFiveP500FEXQ1 : ProcResource<1>; +def SiFiveP500Load : ProcResource<1>; +def SiFiveP500Store : ProcResource<1>; + +def SiFiveP500IntArith : ProcResGroup<[SiFiveP500IEXQ0, SiFiveP500IEXQ1, SiFiveP500IEXQ2]>; +defvar SiFiveP500Branch = SiFiveP500IEXQ0; +defvar SiFiveP500SYS = SiFiveP500IEXQ1; +defvar SiFiveP500CMOV = SiFiveP500IEXQ1; +defvar SiFiveP500MulI2F = SiFiveP500IEXQ2; +def SiFiveP500Div : ProcResource<1>; + +def SiFiveP500FloatArith : ProcResGroup<[SiFiveP500FEXQ0, SiFiveP500FEXQ1]>; +defvar SiFiveP500F2I = SiFiveP500FEXQ0; +def SiFiveP500FloatDiv : ProcResource<1>; + +let Latency = 1 in { +// Integer arithmetic and logic +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +// Branching +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// CMOV +def P500WriteCMOV : SchedWriteRes<[SiFiveP500Branch, SiFiveP500CMOV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def : InstRW<[P500WriteCMOV], (instrs PseudoCCMOVGPRNoX0)>; + +let Latency = 3 in { +// Integer multiplication +def : WriteRes; +def : WriteRes; +// cpop[w] look exactly like multiply. +def : WriteRes; +def : WriteRes; +} + +// Integer division +def : WriteRes { + let Latency = 35; + let ReleaseAtCycles = [1, 34]; +} +def : WriteRes { + let Latency = 20; + let ReleaseAtCycles = [1, 19]; +} + +// Integer remainder +def : WriteRes { + let Latency = 35; + let ReleaseAtCycles = [1, 34]; +} +def : WriteRes { + let Latency = 20; + let ReleaseAtCycles = [1, 19]; +} + +let Latency = 1 in { +// Bitmanip +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +def : WriteRes; + +def : WriteRes; +def : WriteRes; +} + +// Memory +let Latency = 1 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +} +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +} + +let Latency = 6 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Atomic memory +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Floating point +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 2 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Half precision. +def : WriteRes { + let Latency = 19; + let ReleaseAtCycles = [1, 18]; +} +def : WriteRes { + let Latency = 18; + let ReleaseAtCycles = [1, 17]; +} + +// Single precision. +def : WriteRes { + let Latency = 19; + let ReleaseAtCycles = [1, 18]; +} +def : WriteRes { + let Latency = 18; + let ReleaseAtCycles = [1, 17]; +} + +// Double precision +def : WriteRes { + let Latency = 33; + let ReleaseAtCycles = [1, 32]; +} +def : WriteRes { + let Latency = 33; + let ReleaseAtCycles = [1, 32]; +} + +// Conversions +let Latency = 2 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Others +def : WriteRes; +def : WriteRes; + +// FIXME: This could be better modeled by looking at the regclasses of the operands. +def : InstRW<[WriteIALU, ReadIALU], (instrs COPY)>; + +//===----------------------------------------------------------------------===// +// Bypass and advance +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Bitmanip +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; +defm : UnsupportedSchedZabha; +defm : UnsupportedSchedZbc; +defm : UnsupportedSchedZbs; +defm : UnsupportedSchedZbkb; +defm : UnsupportedSchedZbkx; +defm : UnsupportedSchedSFB; +defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZvk; +defm : UnsupportedSchedXsfvcp; +} diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/alu.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/alu.s new file mode 100644 index 0000000000000..d7791b73bf1bf --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/alu.s @@ -0,0 +1,200 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p550 -iterations=1 < %s | FileCheck %s + +add t0, t0, t0 +addw t2, t2, t2 +sub t1, t1, t1 +subw t3, t3, t3 + +addi t0, t0, 100 +addiw t2, t2, 200 + +and t2, t2, t2 +or t3, t3, t3 +xor t4, t4, t4 + +andi t2, t2, 10 +ori t3, t3, 20 +xori t4, t4, 30 + +sll t5, t5, t6 +srl t0, s0, t6 +sra s1, s1, t6 + +slli t5, t5, 1 +srli t0, s0, 2 +srai s1, s1, 3 + +mul s6, s6, s7 +mulw s4, s4, a2 + +div a0, a0, a0 +divw a1, a1, a1 +rem a2, a2, a2 +remw a2, a2, a2 + +rol t5, t5, t6 +ror t6, t6, t5 +rori t5, t5, 5 + +rolw t5, t5, t6 +rorw t6, t6, t5 +roriw t5, t5, 5 + +andn a0, a0, a1 +orn a1, a2, a3 +xnor a2, a3, a4 + +min a3, a4, a5 +max a4, a5, a6 +minu a5, a6, a7 +maxu a6, a7, s0 + +orc.b s0, s1 +rev8 s1, s2 + +cpop t1, t1 +cpopw t2, t2 + +sh1add t0, t1, t2 +sh2add t0, t1, t2 +sh3add t0, t1, t2 + +add.uw s0, s1, s2 +sh1add.uw t0, t1, t2 +sh2add.uw t0, t1, t2 +sh3add.uw t0, t1, t2 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 48 +# CHECK-NEXT: Total Cycles: 120 +# CHECK-NEXT: Total uOps: 48 + +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 0.40 +# CHECK-NEXT: IPC: 0.40 +# CHECK-NEXT: Block RThroughput: 106.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.33 add t0, t0, t0 +# CHECK-NEXT: 1 1 0.33 addw t2, t2, t2 +# CHECK-NEXT: 1 1 0.33 sub t1, t1, t1 +# CHECK-NEXT: 1 1 0.33 subw t3, t3, t3 +# CHECK-NEXT: 1 1 0.33 addi t0, t0, 100 +# CHECK-NEXT: 1 1 0.33 addiw t2, t2, 200 +# CHECK-NEXT: 1 1 0.33 and t2, t2, t2 +# CHECK-NEXT: 1 1 0.33 or t3, t3, t3 +# CHECK-NEXT: 1 1 0.33 xor t4, t4, t4 +# CHECK-NEXT: 1 1 0.33 andi t2, t2, 10 +# CHECK-NEXT: 1 1 0.33 ori t3, t3, 20 +# CHECK-NEXT: 1 1 0.33 xori t4, t4, 30 +# CHECK-NEXT: 1 1 0.33 sll t5, t5, t6 +# CHECK-NEXT: 1 1 0.33 srl t0, s0, t6 +# CHECK-NEXT: 1 1 0.33 sra s1, s1, t6 +# CHECK-NEXT: 1 1 0.33 slli t5, t5, 1 +# CHECK-NEXT: 1 1 0.33 srli t0, s0, 2 +# CHECK-NEXT: 1 1 0.33 srai s1, s1, 3 +# CHECK-NEXT: 1 3 1.00 mul s6, s6, s7 +# CHECK-NEXT: 1 3 1.00 mulw s4, s4, a2 +# CHECK-NEXT: 1 35 34.00 div a0, a0, a0 +# CHECK-NEXT: 1 20 19.00 divw a1, a1, a1 +# CHECK-NEXT: 1 35 34.00 rem a2, a2, a2 +# CHECK-NEXT: 1 20 19.00 remw a2, a2, a2 +# CHECK-NEXT: 1 1 0.33 rol t5, t5, t6 +# CHECK-NEXT: 1 1 0.33 ror t6, t6, t5 +# CHECK-NEXT: 1 1 0.33 rori t5, t5, 5 +# CHECK-NEXT: 1 1 0.33 rolw t5, t5, t6 +# CHECK-NEXT: 1 1 0.33 rorw t6, t6, t5 +# CHECK-NEXT: 1 1 0.33 roriw t5, t5, 5 +# CHECK-NEXT: 1 1 0.33 andn a0, a0, a1 +# CHECK-NEXT: 1 1 0.33 orn a1, a2, a3 +# CHECK-NEXT: 1 1 0.33 xnor a2, a3, a4 +# CHECK-NEXT: 1 1 0.33 min a3, a4, a5 +# CHECK-NEXT: 1 1 0.33 max a4, a5, a6 +# CHECK-NEXT: 1 1 0.33 minu a5, a6, a7 +# CHECK-NEXT: 1 1 0.33 maxu a6, a7, s0 +# CHECK-NEXT: 1 1 0.33 orc.b s0, s1 +# CHECK-NEXT: 1 1 0.33 rev8 s1, s2 +# CHECK-NEXT: 1 3 1.00 cpop t1, t1 +# CHECK-NEXT: 1 3 1.00 cpopw t2, t2 +# CHECK-NEXT: 1 1 0.33 sh1add t0, t1, t2 +# CHECK-NEXT: 1 1 0.33 sh2add t0, t1, t2 +# CHECK-NEXT: 1 1 0.33 sh3add t0, t1, t2 +# CHECK-NEXT: 1 1 0.33 add.uw s0, s1, s2 +# CHECK-NEXT: 1 1 0.33 sh1add.uw t0, t1, t2 +# CHECK-NEXT: 1 1 0.33 sh2add.uw t0, t1, t2 +# CHECK-NEXT: 1 1 0.33 sh3add.uw t0, t1, t2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFiveP500Div +# CHECK-NEXT: [1] - SiFiveP500FEXQ0 +# CHECK-NEXT: [2] - SiFiveP500FEXQ1 +# CHECK-NEXT: [3] - SiFiveP500FloatDiv +# CHECK-NEXT: [4] - SiFiveP500IEXQ0 +# CHECK-NEXT: [5] - SiFiveP500IEXQ1 +# CHECK-NEXT: [6] - SiFiveP500IEXQ2 +# CHECK-NEXT: [7] - SiFiveP500Load +# CHECK-NEXT: [8] - SiFiveP500Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] +# CHECK-NEXT: 106.00 - - - 14.00 15.00 19.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] Instructions: +# CHECK-NEXT: - - - - - - 1.00 - - add t0, t0, t0 +# CHECK-NEXT: - - - - - 1.00 - - - addw t2, t2, t2 +# CHECK-NEXT: - - - - 1.00 - - - - sub t1, t1, t1 +# CHECK-NEXT: - - - - - - 1.00 - - subw t3, t3, t3 +# CHECK-NEXT: - - - - - 1.00 - - - addi t0, t0, 100 +# CHECK-NEXT: - - - - 1.00 - - - - addiw t2, t2, 200 +# CHECK-NEXT: - - - - - - 1.00 - - and t2, t2, t2 +# CHECK-NEXT: - - - - - 1.00 - - - or t3, t3, t3 +# CHECK-NEXT: - - - - 1.00 - - - - xor t4, t4, t4 +# CHECK-NEXT: - - - - - - 1.00 - - andi t2, t2, 10 +# CHECK-NEXT: - - - - - 1.00 - - - ori t3, t3, 20 +# CHECK-NEXT: - - - - 1.00 - - - - xori t4, t4, 30 +# CHECK-NEXT: - - - - - - 1.00 - - sll t5, t5, t6 +# CHECK-NEXT: - - - - - 1.00 - - - srl t0, s0, t6 +# CHECK-NEXT: - - - - 1.00 - - - - sra s1, s1, t6 +# CHECK-NEXT: - - - - - - 1.00 - - slli t5, t5, 1 +# CHECK-NEXT: - - - - - 1.00 - - - srli t0, s0, 2 +# CHECK-NEXT: - - - - 1.00 - - - - srai s1, s1, 3 +# CHECK-NEXT: - - - - - - 1.00 - - mul s6, s6, s7 +# CHECK-NEXT: - - - - - - 1.00 - - mulw s4, s4, a2 +# CHECK-NEXT: 34.00 - - - - - 1.00 - - div a0, a0, a0 +# CHECK-NEXT: 19.00 - - - - - 1.00 - - divw a1, a1, a1 +# CHECK-NEXT: 34.00 - - - - - 1.00 - - rem a2, a2, a2 +# CHECK-NEXT: 19.00 - - - - - 1.00 - - remw a2, a2, a2 +# CHECK-NEXT: - - - - - 1.00 - - - rol t5, t5, t6 +# CHECK-NEXT: - - - - 1.00 - - - - ror t6, t6, t5 +# CHECK-NEXT: - - - - - 1.00 - - - rori t5, t5, 5 +# CHECK-NEXT: - - - - 1.00 - - - - rolw t5, t5, t6 +# CHECK-NEXT: - - - - - 1.00 - - - rorw t6, t6, t5 +# CHECK-NEXT: - - - - 1.00 - - - - roriw t5, t5, 5 +# CHECK-NEXT: - - - - - 1.00 - - - andn a0, a0, a1 +# CHECK-NEXT: - - - - 1.00 - - - - orn a1, a2, a3 +# CHECK-NEXT: - - - - - - 1.00 - - xnor a2, a3, a4 +# CHECK-NEXT: - - - - - - 1.00 - - min a3, a4, a5 +# CHECK-NEXT: - - - - - 1.00 - - - max a4, a5, a6 +# CHECK-NEXT: - - - - 1.00 - - - - minu a5, a6, a7 +# CHECK-NEXT: - - - - - - 1.00 - - maxu a6, a7, s0 +# CHECK-NEXT: - - - - - 1.00 - - - orc.b s0, s1 +# CHECK-NEXT: - - - - 1.00 - - - - rev8 s1, s2 +# CHECK-NEXT: - - - - - - 1.00 - - cpop t1, t1 +# CHECK-NEXT: - - - - - - 1.00 - - cpopw t2, t2 +# CHECK-NEXT: - - - - 1.00 - - - - sh1add t0, t1, t2 +# CHECK-NEXT: - - - - - 1.00 - - - sh2add t0, t1, t2 +# CHECK-NEXT: - - - - - - 1.00 - - sh3add t0, t1, t2 +# CHECK-NEXT: - - - - - 1.00 - - - add.uw s0, s1, s2 +# CHECK-NEXT: - - - - - 1.00 - - - sh1add.uw t0, t1, t2 +# CHECK-NEXT: - - - - 1.00 - - - - sh2add.uw t0, t1, t2 +# CHECK-NEXT: - - - - - - 1.00 - - sh3add.uw t0, t1, t2 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/fp.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/fp.s new file mode 100644 index 0000000000000..8eb64405b7911 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/fp.s @@ -0,0 +1,155 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p550 -iterations=1 < %s | FileCheck %s + +fmin.s ft0, fa0, fa1 +fmax.s ft1, fa0, fa1 +fmadd.s fs0, fs0, fs8, fs9 +fmsub.s fs1, fs1, fs8, fs9 +fmul.s fs3, fs3, fs4 +fadd.s fs4, fs4, fs5 +fdiv.s fs2, fs3, fs4 +fsqrt.s ft1, fa2 + +fcvt.s.w ft2, a0 +fcvt.s.l ft3, a1 +fcvt.s.wu ft2, a0 +fcvt.s.lu ft3, a1 +fcvt.w.s a0, ft2 +fcvt.l.s a1, fs2 +fcvt.wu.s a0, ft2 +fcvt.lu.s a1, fs2 + +feq.s a2, fa0, fa1 +fclass.s a3, fa2 + +fmin.d ft2, ft4, ft5 +fmax.d ft3, ft4, ft5 +fmadd.d fs0, fs0, fs8, fs9 +fmsub.d fs1, fs1, fs8, fs9 +fmul.d ft4, ft4, ft5 +fadd.d ft4, ft5, ft6 +fdiv.d fs4, fa3, ft5 +fsqrt.d ft2, fa3 + +fcvt.d.w ft2, a0 +fcvt.d.l ft3, a1 +fcvt.d.w ft2, a0 +fcvt.d.l ft3, a1 +fcvt.w.d a0, ft2 +fcvt.l.d a1, fs2 + +feq.d a2, fa0, fa1 +fclass.d a3, fa2 + +fcvt.d.s ft1, ft2 +fcvt.s.d ft3, ft4 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 36 +# CHECK-NEXT: Total Cycles: 106 +# CHECK-NEXT: Total uOps: 36 + +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 0.34 +# CHECK-NEXT: IPC: 0.34 +# CHECK-NEXT: Block RThroughput: 99.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 2 0.50 fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 1 2 0.50 fmax.s ft1, fa0, fa1 +# CHECK-NEXT: 1 4 0.50 fmadd.s fs0, fs0, fs8, fs9 +# CHECK-NEXT: 1 4 0.50 fmsub.s fs1, fs1, fs8, fs9 +# CHECK-NEXT: 1 4 0.50 fmul.s fs3, fs3, fs4 +# CHECK-NEXT: 1 4 0.50 fadd.s fs4, fs4, fs5 +# CHECK-NEXT: 1 19 18.00 fdiv.s fs2, fs3, fs4 +# CHECK-NEXT: 1 18 17.00 fsqrt.s ft1, fa2 +# CHECK-NEXT: 1 2 1.00 fcvt.s.w ft2, a0 +# CHECK-NEXT: 1 2 1.00 fcvt.s.l ft3, a1 +# CHECK-NEXT: 1 2 1.00 fcvt.s.wu ft2, a0 +# CHECK-NEXT: 1 2 1.00 fcvt.s.lu ft3, a1 +# CHECK-NEXT: 1 2 1.00 fcvt.w.s a0, ft2 +# CHECK-NEXT: 1 2 1.00 fcvt.l.s a1, fs2 +# CHECK-NEXT: 1 2 1.00 fcvt.wu.s a0, ft2 +# CHECK-NEXT: 1 2 1.00 fcvt.lu.s a1, fs2 +# CHECK-NEXT: 1 2 1.00 feq.s a2, fa0, fa1 +# CHECK-NEXT: 1 2 1.00 fclass.s a3, fa2 +# CHECK-NEXT: 1 2 0.50 fmin.d ft2, ft4, ft5 +# CHECK-NEXT: 1 2 0.50 fmax.d ft3, ft4, ft5 +# CHECK-NEXT: 1 4 0.50 fmadd.d fs0, fs0, fs8, fs9 +# CHECK-NEXT: 1 4 0.50 fmsub.d fs1, fs1, fs8, fs9 +# CHECK-NEXT: 1 4 0.50 fmul.d ft4, ft4, ft5 +# CHECK-NEXT: 1 4 0.50 fadd.d ft4, ft5, ft6 +# CHECK-NEXT: 1 33 32.00 fdiv.d fs4, fa3, ft5 +# CHECK-NEXT: 1 33 32.00 fsqrt.d ft2, fa3 +# CHECK-NEXT: 1 2 1.00 fcvt.d.w ft2, a0 +# CHECK-NEXT: 1 2 1.00 fcvt.d.l ft3, a1 +# CHECK-NEXT: 1 2 1.00 fcvt.d.w ft2, a0 +# CHECK-NEXT: 1 2 1.00 fcvt.d.l ft3, a1 +# CHECK-NEXT: 1 2 1.00 fcvt.w.d a0, ft2 +# CHECK-NEXT: 1 2 1.00 fcvt.l.d a1, fs2 +# CHECK-NEXT: 1 2 1.00 feq.d a2, fa0, fa1 +# CHECK-NEXT: 1 2 1.00 fclass.d a3, fa2 +# CHECK-NEXT: 1 2 0.50 fcvt.d.s ft1, ft2 +# CHECK-NEXT: 1 2 0.50 fcvt.s.d ft3, ft4 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFiveP500Div +# CHECK-NEXT: [1] - SiFiveP500FEXQ0 +# CHECK-NEXT: [2] - SiFiveP500FEXQ1 +# CHECK-NEXT: [3] - SiFiveP500FloatDiv +# CHECK-NEXT: [4] - SiFiveP500IEXQ0 +# CHECK-NEXT: [5] - SiFiveP500IEXQ1 +# CHECK-NEXT: [6] - SiFiveP500IEXQ2 +# CHECK-NEXT: [7] - SiFiveP500Load +# CHECK-NEXT: [8] - SiFiveP500Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] +# CHECK-NEXT: - 14.00 14.00 99.00 - - 8.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - - fmin.s ft0, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - - fmax.s ft1, fa0, fa1 +# CHECK-NEXT: - - 1.00 - - - - - - fmadd.s fs0, fs0, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - - fmsub.s fs1, fs1, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - - fmul.s fs3, fs3, fs4 +# CHECK-NEXT: - 1.00 - - - - - - - fadd.s fs4, fs4, fs5 +# CHECK-NEXT: - - 1.00 18.00 - - - - - fdiv.s fs2, fs3, fs4 +# CHECK-NEXT: - - 1.00 17.00 - - - - - fsqrt.s ft1, fa2 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt.s.w ft2, a0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt.s.l ft3, a1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt.s.wu ft2, a0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt.s.lu ft3, a1 +# CHECK-NEXT: - 1.00 - - - - - - - fcvt.w.s a0, ft2 +# CHECK-NEXT: - 1.00 - - - - - - - fcvt.l.s a1, fs2 +# CHECK-NEXT: - 1.00 - - - - - - - fcvt.wu.s a0, ft2 +# CHECK-NEXT: - 1.00 - - - - - - - fcvt.lu.s a1, fs2 +# CHECK-NEXT: - 1.00 - - - - - - - feq.s a2, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - - fclass.s a3, fa2 +# CHECK-NEXT: - - 1.00 - - - - - - fmin.d ft2, ft4, ft5 +# CHECK-NEXT: - - 1.00 - - - - - - fmax.d ft3, ft4, ft5 +# CHECK-NEXT: - - 1.00 - - - - - - fmadd.d fs0, fs0, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - - fmsub.d fs1, fs1, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - - fmul.d ft4, ft4, ft5 +# CHECK-NEXT: - - 1.00 - - - - - - fadd.d ft4, ft5, ft6 +# CHECK-NEXT: - - 1.00 32.00 - - - - - fdiv.d fs4, fa3, ft5 +# CHECK-NEXT: - - 1.00 32.00 - - - - - fsqrt.d ft2, fa3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt.d.w ft2, a0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt.d.l ft3, a1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt.d.w ft2, a0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt.d.l ft3, a1 +# CHECK-NEXT: - 1.00 - - - - - - - fcvt.w.d a0, ft2 +# CHECK-NEXT: - 1.00 - - - - - - - fcvt.l.d a1, fs2 +# CHECK-NEXT: - 1.00 - - - - - - - feq.d a2, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - - fclass.d a3, fa2 +# CHECK-NEXT: - - 1.00 - - - - - - fcvt.d.s ft1, ft2 +# CHECK-NEXT: - - 1.00 - - - - - - fcvt.s.d ft3, ft4 diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/load.s b/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/load.s new file mode 100644 index 0000000000000..2b7df9215e0cb --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/SiFiveP500/load.s @@ -0,0 +1,54 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-p550 -iterations=1 < %s | FileCheck %s + +lw t0, 0(a0) +ld t0, 0(a0) + +flw ft0, 0(a0) +fld ft0, 0(a0) + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 4 +# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 3 +# CHECK-NEXT: uOps Per Cycle: 0.33 +# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: Block RThroughput: 4.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 4 1.00 * lw t0, 0(a0) +# CHECK-NEXT: 1 4 1.00 * ld t0, 0(a0) +# CHECK-NEXT: 1 6 1.00 * flw ft0, 0(a0) +# CHECK-NEXT: 1 6 1.00 * fld ft0, 0(a0) + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFiveP500Div +# CHECK-NEXT: [1] - SiFiveP500FEXQ0 +# CHECK-NEXT: [2] - SiFiveP500FEXQ1 +# CHECK-NEXT: [3] - SiFiveP500FloatDiv +# CHECK-NEXT: [4] - SiFiveP500IEXQ0 +# CHECK-NEXT: [5] - SiFiveP500IEXQ1 +# CHECK-NEXT: [6] - SiFiveP500IEXQ2 +# CHECK-NEXT: [7] - SiFiveP500Load +# CHECK-NEXT: [8] - SiFiveP500Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] +# CHECK-NEXT: - - - - - - - 4.00 - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] Instructions: +# CHECK-NEXT: - - - - - - - 1.00 - lw t0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 - ld t0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 - flw ft0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 - fld ft0, 0(a0)