From e4edc40cf603157ba780a68066ba978622b04742 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Fri, 20 Dec 2024 18:29:31 +0800 Subject: [PATCH 1/7] [RISCV] Add a generic OOO CPU We add a generic out-of-order CPU model here just like what GCC has done. People may use this model to evaluate some optimizations, and more importantly, people can use this model as a template to customize their own CPU model. The design (units, cycles, ...) of this model is random so don't take it seriously. --- llvm/lib/Target/RISCV/RISCV.td | 2 +- llvm/lib/Target/RISCV/RISCVProcessors.td | 2 + llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 494 ++++++++++++++ .../tools/llvm-mca/RISCV/GenericOOO/atomic.s | 601 ++++++++++++++++++ .../RISCV/GenericOOO/floating-point.s | 483 ++++++++++++++ .../tools/llvm-mca/RISCV/GenericOOO/integer.s | 510 +++++++++++++++ 6 files changed, 2091 insertions(+), 1 deletion(-) create mode 100644 llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td create mode 100644 llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s create mode 100644 llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s create mode 100644 llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 87c07c3cd505f..2c2271e486a84 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -45,7 +45,7 @@ include "RISCVMacroFusion.td" //===----------------------------------------------------------------------===// // RISC-V Scheduling Models //===----------------------------------------------------------------------===// - +include "RISCVSchedGenericOOO.td" include "RISCVSchedMIPSP8700.td" include "RISCVSchedRocket.td" include "RISCVSchedSiFive7.td" diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index b5eea138732a5..c2d98c2180299 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -103,6 +103,8 @@ def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64", // Support generic for compatibility with other targets. The triple will be used // to change to the appropriate rv32/rv64 version. def GENERIC : RISCVTuneProcessorModel<"generic", NoSchedModel>, GenericTuneInfo; +def GENERIC_OOO : RISCVTuneProcessorModel<"generic-ooo", GenericOOOModel>, + GenericTuneInfo; def MIPS_P8700 : RISCVProcessorModel<"mips-p8700", MIPSP8700Model, diff --git a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td new file mode 100644 index 0000000000000..f7bf824ccebe0 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td @@ -0,0 +1,494 @@ +//===-- RISCVSchedGenericOOO.td - Generic O3 Processor -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// We assume that: +// * 6-issue out-of-order CPU with 192 ROB entries. +// * Units: +// * IXU (Integer GenericOOOALU Unit): 4 units, only one can execute division. +// * FXU (Floating-point Unit): 2 units. +// * LSU (Load/Store Unit): 2 units. +// * VXU (Vector Unit): 1 unit. +// * Latency: +// * Integer instructions: 1 cycle. +// * Multiplication instructions: 4 cycles. +// * Multiplication/Division instructions: 7-13 cycles. +// * Floating-point instructions: 4-6 cycles. +// * Vector instructions: 2-6 cycles. +// * Load/Store: +// * IXU: 4 cycles. +// * FXU: 6 cycles. +// * VXU: 6 cycles. +// * Integer/floating-point/vector div/rem/sqrt/... are non-pipelined. +//===----------------------------------------------------------------------===// + +def GenericOOOModel : SchedMachineModel { + int IssueWidth = 6; + int MicroOpBufferSize = 192; + int LoadLatency = 4; + int MispredictPenalty = 8; + let CompleteModel = 0; +} + +let SchedModel = GenericOOOModel in { +//===----------------------------------------------------------------------===// +// Resource groups +//===----------------------------------------------------------------------===// +def GenericOOODIV : ProcResource<1>; +def GenericOOOIXU : ProcResource<3>; +def GenericOOOALU : ProcResGroup<[GenericOOODIV, GenericOOOIXU]>; +def GenericOOOLSU : ProcResource<2>; +def GenericOOOFPU : ProcResource<2>; +// TODO: Add vector scheduling. +// def GenericOOOVXU : ProcResource<1>; + +//===----------------------------------------------------------------------===// +// Branches +//===----------------------------------------------------------------------===// +def : WriteRes; +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Integer arithmetic and logic +//===----------------------------------------------------------------------===// +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Integer multiplication +//===----------------------------------------------------------------------===// +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +//===----------------------------------------------------------------------===// +// Integer division +//===----------------------------------------------------------------------===// +def : WriteRes { + let Latency = 13; + let ReleaseAtCycles = [13]; +} +def : WriteRes { + let Latency = 21; + let ReleaseAtCycles = [21]; +} +def : WriteRes { + let Latency = 13; + let ReleaseAtCycles = [13]; +} +def : WriteRes { + let Latency = 21; + let ReleaseAtCycles = [21]; +} + +//===----------------------------------------------------------------------===// +// Integer memory +//===----------------------------------------------------------------------===// +// Load +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Store +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Atomic +//===----------------------------------------------------------------------===// +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +let Latency = 5 in { + def : WriteRes; + def : WriteRes; +} + +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Floating-point +//===----------------------------------------------------------------------===// +// Floating-point load +let Latency = 6 in { + def : WriteRes; + def : WriteRes; +} + +// Floating-point store +def : WriteRes; +def : WriteRes; + +// Arithmetic and logic +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +let Latency = 5 in { + def : WriteRes; + def : WriteRes; +} + +let Latency = 6 in { + def : WriteRes; + def : WriteRes; +} + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Compare +let Latency = 2 in { + def : WriteRes; + def : WriteRes; +} + +// Division +let Latency = 13, ReleaseAtCycles = [13] in { + def : WriteRes; + def : WriteRes; +} + +let Latency = 17, ReleaseAtCycles = [17] in { + def : WriteRes; + def : WriteRes; +} + +// Conversions +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +let Latency = 6 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Classify +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Zicsr extension +//===----------------------------------------------------------------------===// +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Zabha extension +//===----------------------------------------------------------------------===// +let Latency = 5 in { + def : WriteRes; + def : WriteRes; +} + +//===----------------------------------------------------------------------===// +// Zba extension +//===----------------------------------------------------------------------===// +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Zbb extension +//===----------------------------------------------------------------------===// +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Zbc extension +//===----------------------------------------------------------------------===// +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Zbs extension +//===----------------------------------------------------------------------===// +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Zbkb extension +//===----------------------------------------------------------------------===// +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Zbkx extension +//===----------------------------------------------------------------------===// +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Zfa extension +//===----------------------------------------------------------------------===// +let Latency = 3 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 5 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +//===----------------------------------------------------------------------===// +// Zfh extension +//===----------------------------------------------------------------------===// +// Zfhmin +// Load/Store +let Latency = 6 in +def : WriteRes; +def : WriteRes; + +// Conversions +let Latency = 3 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +// Other than Zfhmin +let Latency = 4 in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Arithmetic and logic +let Latency = 4 in +def : WriteRes; + +let Latency = 5 in +def : WriteRes; + +let Latency = 6 in +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +// Compare +let Latency = 2 in +def : WriteRes; + +// Division +let Latency = 9, ReleaseAtCycles = [9] in { + def : WriteRes; + def : WriteRes; +} + +// Classify +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Misc +//===----------------------------------------------------------------------===// +let Latency = 0 in +def : WriteRes; + +//===----------------------------------------------------------------------===// +// Bypass and advance +//===----------------------------------------------------------------------===// +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Zabha +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Zba extension +def : ReadAdvance; +def : ReadAdvance; + +// Zbb extension +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Zbc extension +def : ReadAdvance; + +// Zbs extension +def : ReadAdvance; +def : ReadAdvance; + +// Zbkb +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Zbkx +def : ReadAdvance; + +// Zfa extension +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Zfh extension +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Unsupported extensions +//===----------------------------------------------------------------------===// +defm : UnsupportedSchedV; +defm : UnsupportedSchedZvk; +defm : UnsupportedSchedZvkned; +defm : UnsupportedSchedSFB; +defm : UnsupportedSchedXsfvcp; +} diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s new file mode 100644 index 0000000000000..e8c19eaa4c618 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s @@ -0,0 +1,601 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zabha -mcpu=generic-ooo --all-stats -iterations=1 < %s | FileCheck %s + +# Zalrsc +lr.w t0, (t1) +lr.w.aq t1, (t2) +lr.w.rl t2, (t3) +lr.w.aqrl t3, (t4) +sc.w t6, t5, (t4) +sc.w.aq t5, t4, (t3) +sc.w.rl t4, t3, (t2) +sc.w.aqrl t3, t2, (t1) + +lr.d t0, (t1) +lr.d.aq t1, (t2) +lr.d.rl t2, (t3) +lr.d.aqrl t3, (t4) +sc.d t6, t5, (t4) +sc.d.aq t5, t4, (t3) +sc.d.rl t4, t3, (t2) +sc.d.aqrl t3, t2, (t1) + +# Zaamo +amoswap.w a4, ra, (s0) +amoadd.w a1, a2, (a3) +amoxor.w a2, a3, (a4) +amoand.w a3, a4, (a5) +amoor.w a4, a5, (a6) +amomin.w a5, a6, (a7) +amomax.w s7, s6, (s5) +amominu.w s6, s5, (s4) +amomaxu.w s5, s4, (s3) + +amoswap.w.aq a4, ra, (s0) +amoadd.w.aq a1, a2, (a3) +amoxor.w.aq a2, a3, (a4) +amoand.w.aq a3, a4, (a5) +amoor.w.aq a4, a5, (a6) +amomin.w.aq a5, a6, (a7) +amomax.w.aq s7, s6, (s5) +amominu.w.aq s6, s5, (s4) +amomaxu.w.aq s5, s4, (s3) + +amoswap.w.rl a4, ra, (s0) +amoadd.w.rl a1, a2, (a3) +amoxor.w.rl a2, a3, (a4) +amoand.w.rl a3, a4, (a5) +amoor.w.rl a4, a5, (a6) +amomin.w.rl a5, a6, (a7) +amomax.w.rl s7, s6, (s5) +amominu.w.rl s6, s5, (s4) +amomaxu.w.rl s5, s4, (s3) + +amoswap.w.aqrl a4, ra, (s0) +amoadd.w.aqrl a1, a2, (a3) +amoxor.w.aqrl a2, a3, (a4) +amoand.w.aqrl a3, a4, (a5) +amoor.w.aqrl a4, a5, (a6) +amomin.w.aqrl a5, a6, (a7) +amomax.w.aqrl s7, s6, (s5) +amominu.w.aqrl s6, s5, (s4) +amomaxu.w.aqrl s5, s4, (s3) + +amoswap.d a4, ra, (s0) +amoadd.d a1, a2, (a3) +amoxor.d a2, a3, (a4) +amoand.d a3, a4, (a5) +amoor.d a4, a5, (a6) +amomin.d a5, a6, (a7) +amomax.d s7, s6, (s5) +amominu.d s6, s5, (s4) +amomaxu.d s5, s4, (s3) + +amoswap.d.aq a4, ra, (s0) +amoadd.d.aq a1, a2, (a3) +amoxor.d.aq a2, a3, (a4) +amoand.d.aq a3, a4, (a5) +amoor.d.aq a4, a5, (a6) +amomin.d.aq a5, a6, (a7) +amomax.d.aq s7, s6, (s5) +amominu.d.aq s6, s5, (s4) +amomaxu.d.aq s5, s4, (s3) + +amoswap.d.rl a4, ra, (s0) +amoadd.d.rl a1, a2, (a3) +amoxor.d.rl a2, a3, (a4) +amoand.d.rl a3, a4, (a5) +amoor.d.rl a4, a5, (a6) +amomin.d.rl a5, a6, (a7) +amomax.d.rl s7, s6, (s5) +amominu.d.rl s6, s5, (s4) +amomaxu.d.rl s5, s4, (s3) + +amoswap.d.aqrl a4, ra, (s0) +amoadd.d.aqrl a1, a2, (a3) +amoxor.d.aqrl a2, a3, (a4) +amoand.d.aqrl a3, a4, (a5) +amoor.d.aqrl a4, a5, (a6) +amomin.d.aqrl a5, a6, (a7) +amomax.d.aqrl s7, s6, (s5) +amominu.d.aqrl s6, s5, (s4) +amomaxu.d.aqrl s5, s4, (s3) + +# Zabha +amoswap.b a4, ra, (s0) +amoadd.b a1, a2, (a3) +amoxor.b a2, a3, (a4) +amoand.b a3, a4, (a5) +amoor.b a4, a5, (a6) +amomin.b a5, a6, (a7) +amomax.b s7, s6, (s5) +amominu.b s6, s5, (s4) +amomaxu.b s5, s4, (s3) + +amoswap.b.aq a4, ra, (s0) +amoadd.b.aq a1, a2, (a3) +amoxor.b.aq a2, a3, (a4) +amoand.b.aq a3, a4, (a5) +amoor.b.aq a4, a5, (a6) +amomin.b.aq a5, a6, (a7) +amomax.b.aq s7, s6, (s5) +amominu.b.aq s6, s5, (s4) +amomaxu.b.aq s5, s4, (s3) + +amoswap.b.rl a4, ra, (s0) +amoadd.b.rl a1, a2, (a3) +amoxor.b.rl a2, a3, (a4) +amoand.b.rl a3, a4, (a5) +amoor.b.rl a4, a5, (a6) +amomin.b.rl a5, a6, (a7) +amomax.b.rl s7, s6, (s5) +amominu.b.rl s6, s5, (s4) +amomaxu.b.rl s5, s4, (s3) + +amoswap.b.aqrl a4, ra, (s0) +amoadd.b.aqrl a1, a2, (a3) +amoxor.b.aqrl a2, a3, (a4) +amoand.b.aqrl a3, a4, (a5) +amoor.b.aqrl a4, a5, (a6) +amomin.b.aqrl a5, a6, (a7) +amomax.b.aqrl s7, s6, (s5) +amominu.b.aqrl s6, s5, (s4) +amomaxu.b.aqrl s5, s4, (s3) + +amoswap.h a4, ra, (s0) +amoadd.h a1, a2, (a3) +amoxor.h a2, a3, (a4) +amoand.h a3, a4, (a5) +amoor.h a4, a5, (a6) +amomin.h a5, a6, (a7) +amomax.h s7, s6, (s5) +amominu.h s6, s5, (s4) +amomaxu.h s5, s4, (s3) + +amoswap.h.aq a4, ra, (s0) +amoadd.h.aq a1, a2, (a3) +amoxor.h.aq a2, a3, (a4) +amoand.h.aq a3, a4, (a5) +amoor.h.aq a4, a5, (a6) +amomin.h.aq a5, a6, (a7) +amomax.h.aq s7, s6, (s5) +amominu.h.aq s6, s5, (s4) +amomaxu.h.aq s5, s4, (s3) + +amoswap.h.rl a4, ra, (s0) +amoadd.h.rl a1, a2, (a3) +amoxor.h.rl a2, a3, (a4) +amoand.h.rl a3, a4, (a5) +amoor.h.rl a4, a5, (a6) +amomin.h.rl a5, a6, (a7) +amomax.h.rl s7, s6, (s5) +amominu.h.rl s6, s5, (s4) +amomaxu.h.rl s5, s4, (s3) + +amoswap.h.aqrl a4, ra, (s0) +amoadd.h.aqrl a1, a2, (a3) +amoxor.h.aqrl a2, a3, (a4) +amoand.h.aqrl a3, a4, (a5) +amoor.h.aqrl a4, a5, (a6) +amomin.h.aqrl a5, a6, (a7) +amomax.h.aqrl s7, s6, (s5) +amominu.h.aqrl s6, s5, (s4) +amomaxu.h.aqrl s5, s4, (s3) + +# Zacas +# amocas.w a1, a3, (a5) +# amocas.w a1, a3, 0(a5) +# amocas.w zero, zero, (a5) +# amocas.w.aq zero, zero, (a5) +# amocas.w.rl zero, zero, (a5) +# amocas.w.aqrl zero, zero, (a5) + +# amocas.d a0, a2, (a1) +# amocas.d a0, a2, 0(a1) +# amocas.d zero, zero, (a1) +# amocas.d.aq zero, zero, (a1) +# amocas.d.rl zero, zero, (a1) +# amocas.d.aqrl zero, zero, (a1) + +# amocas.q a0, a2, (a1) +# amocas.q a0, a2, 0(a1) +# amocas.q zero, zero, (a1) +# amocas.q.aq zero, zero, (a1) +# amocas.q.rl zero, zero, (a1) +# amocas.q.aqrl zero, zero, (a1) + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 160 +# CHECK-NEXT: Total Cycles: 148 +# CHECK-NEXT: Total uOps: 160 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 1.08 +# CHECK-NEXT: IPC: 1.08 +# CHECK-NEXT: Block RThroughput: 80.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 4 0.50 * lr.w t0, (t1) +# CHECK-NEXT: 1 4 0.50 * lr.w.aq t1, (t2) +# CHECK-NEXT: 1 4 0.50 * lr.w.rl t2, (t3) +# CHECK-NEXT: 1 4 0.50 * lr.w.aqrl t3, (t4) +# CHECK-NEXT: 1 1 0.50 * sc.w t6, t5, (t4) +# CHECK-NEXT: 1 1 0.50 * sc.w.aq t5, t4, (t3) +# CHECK-NEXT: 1 1 0.50 * sc.w.rl t4, t3, (t2) +# CHECK-NEXT: 1 1 0.50 * sc.w.aqrl t3, t2, (t1) +# CHECK-NEXT: 1 4 0.50 * lr.d t0, (t1) +# CHECK-NEXT: 1 4 0.50 * lr.d.aq t1, (t2) +# CHECK-NEXT: 1 4 0.50 * lr.d.rl t2, (t3) +# CHECK-NEXT: 1 4 0.50 * lr.d.aqrl t3, (t4) +# CHECK-NEXT: 1 1 0.50 * sc.d t6, t5, (t4) +# CHECK-NEXT: 1 1 0.50 * sc.d.aq t5, t4, (t3) +# CHECK-NEXT: 1 1 0.50 * sc.d.rl t4, t3, (t2) +# CHECK-NEXT: 1 1 0.50 * sc.d.aqrl t3, t2, (t1) +# CHECK-NEXT: 1 5 0.50 * * amoswap.w a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.w a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.w a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.w a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.w a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.w a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.w s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.w s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.w s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.w.aq a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.w.aq a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.w.aq a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.w.aq a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.w.aq a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.w.aq a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.w.aq s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.w.aq s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.aq s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.w.rl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.w.rl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.w.rl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.w.rl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.w.rl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.w.rl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.w.rl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.w.rl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.rl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.w.aqrl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.w.aqrl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.w.aqrl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.w.aqrl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.w.aqrl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.w.aqrl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.w.aqrl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.w.aqrl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.w.aqrl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.d a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.d a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.d a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.d a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.d a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.d a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.d s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.d s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.d s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.d.aq a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.d.aq a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.d.aq a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.d.aq a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.d.aq a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.d.aq a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.d.aq s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.d.aq s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.aq s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.d.rl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.d.rl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.d.rl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.d.rl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.d.rl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.d.rl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.d.rl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.d.rl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.rl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.d.aqrl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.d.aqrl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.d.aqrl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.d.aqrl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.d.aqrl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.d.aqrl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.d.aqrl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.d.aqrl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.d.aqrl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.b a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.b a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.b a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.b a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.b a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.b a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.b s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.b s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.b s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.b.aq a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.b.aq a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.b.aq a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.b.aq a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.b.aq a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.b.aq a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.b.aq s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.b.aq s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.b.aq s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.b.rl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.b.rl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.b.rl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.b.rl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.b.rl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.b.rl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.b.rl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.b.rl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.b.rl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.b.aqrl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.b.aqrl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.b.aqrl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.b.aqrl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.b.aqrl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.b.aqrl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.b.aqrl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.b.aqrl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.b.aqrl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.h a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.h a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.h a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.h a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.h a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.h a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.h s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.h s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.h s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.h.aq a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.h.aq a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.h.aq a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.h.aq a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.h.aq a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.h.aq a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.h.aq s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.h.aq s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.h.aq s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.h.rl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.h.rl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.h.rl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.h.rl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.h.rl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.h.rl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.h.rl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.h.rl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.h.rl s5, s4, (s3) +# CHECK-NEXT: 1 5 0.50 * * amoswap.h.aqrl a4, ra, (s0) +# CHECK-NEXT: 1 5 0.50 * * amoadd.h.aqrl a1, a2, (a3) +# CHECK-NEXT: 1 5 0.50 * * amoxor.h.aqrl a2, a3, (a4) +# CHECK-NEXT: 1 5 0.50 * * amoand.h.aqrl a3, a4, (a5) +# CHECK-NEXT: 1 5 0.50 * * amoor.h.aqrl a4, a5, (a6) +# CHECK-NEXT: 1 5 0.50 * * amomin.h.aqrl a5, a6, (a7) +# CHECK-NEXT: 1 5 0.50 * * amomax.h.aqrl s7, s6, (s5) +# CHECK-NEXT: 1 5 0.50 * * amominu.h.aqrl s6, s5, (s4) +# CHECK-NEXT: 1 5 0.50 * * amomaxu.h.aqrl s5, s4, (s3) + +# CHECK: Dynamic Dispatch Stall Cycles: +# CHECK-NEXT: RAT - Register unavailable: 0 +# CHECK-NEXT: RCU - Retire tokens unavailable: 0 +# CHECK-NEXT: SCHEDQ - Scheduler full: 0 +# CHECK-NEXT: LQ - Load queue full: 0 +# CHECK-NEXT: SQ - Store queue full: 0 +# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 +# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0 + +# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 121 (81.8%) +# CHECK-NEXT: 4, 1 (0.7%) +# CHECK-NEXT: 6, 26 (17.6%) + +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 58 (39.2%) +# CHECK-NEXT: 1, 20 (13.5%) +# CHECK-NEXT: 2, 70 (47.3%) + +# CHECK: Scheduler's queue usage: +# CHECK-NEXT: No scheduler resources used. + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 58 (39.2%) +# CHECK-NEXT: 1, 22 (14.9%) +# CHECK-NEXT: 2, 66 (44.6%) +# CHECK-NEXT: 3, 2 (1.4%) + +# CHECK: Total ROB Entries: 192 +# CHECK-NEXT: Max Used ROB Entries: 136 ( 70.8% ) +# CHECK-NEXT: Average Used ROB Entries per cy: 69 ( 35.9% ) + +# CHECK: Register File statistics: +# CHECK-NEXT: Total number of mappings created: 160 +# CHECK-NEXT: Max number of mappings used: 136 + +# CHECK: Resources: +# CHECK-NEXT: [0] - GenericOOODIV +# CHECK-NEXT: [1.0] - GenericOOOFPU +# CHECK-NEXT: [1.1] - GenericOOOFPU +# CHECK-NEXT: [2.0] - GenericOOOIXU +# CHECK-NEXT: [2.1] - GenericOOOIXU +# CHECK-NEXT: [2.2] - GenericOOOIXU +# CHECK-NEXT: [3.0] - GenericOOOLSU +# CHECK-NEXT: [3.1] - GenericOOOLSU + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] +# CHECK-NEXT: - - - - - - 80.00 80.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] Instructions: +# CHECK-NEXT: - - - - - - - 1.00 lr.w t0, (t1) +# CHECK-NEXT: - - - - - - 1.00 - lr.w.aq t1, (t2) +# CHECK-NEXT: - - - - - - - 1.00 lr.w.rl t2, (t3) +# CHECK-NEXT: - - - - - - 1.00 - lr.w.aqrl t3, (t4) +# CHECK-NEXT: - - - - - - - 1.00 sc.w t6, t5, (t4) +# CHECK-NEXT: - - - - - - - 1.00 sc.w.aq t5, t4, (t3) +# CHECK-NEXT: - - - - - - 1.00 - sc.w.rl t4, t3, (t2) +# CHECK-NEXT: - - - - - - - 1.00 sc.w.aqrl t3, t2, (t1) +# CHECK-NEXT: - - - - - - 1.00 - lr.d t0, (t1) +# CHECK-NEXT: - - - - - - 1.00 - lr.d.aq t1, (t2) +# CHECK-NEXT: - - - - - - - 1.00 lr.d.rl t2, (t3) +# CHECK-NEXT: - - - - - - 1.00 - lr.d.aqrl t3, (t4) +# CHECK-NEXT: - - - - - - - 1.00 sc.d t6, t5, (t4) +# CHECK-NEXT: - - - - - - 1.00 - sc.d.aq t5, t4, (t3) +# CHECK-NEXT: - - - - - - - 1.00 sc.d.rl t4, t3, (t2) +# CHECK-NEXT: - - - - - - 1.00 - sc.d.aqrl t3, t2, (t1) +# CHECK-NEXT: - - - - - - - 1.00 amoswap.w a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.w a1, a2, (a3) +# CHECK-NEXT: - - - - - - - 1.00 amoxor.w a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.w a3, a4, (a5) +# CHECK-NEXT: - - - - - - - 1.00 amoor.w a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.w a5, a6, (a7) +# CHECK-NEXT: - - - - - - - 1.00 amomax.w s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.w s6, s5, (s4) +# CHECK-NEXT: - - - - - - - 1.00 amomaxu.w s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.w.aq a4, ra, (s0) +# CHECK-NEXT: - - - - - - - 1.00 amoadd.w.aq a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.w.aq a2, a3, (a4) +# CHECK-NEXT: - - - - - - - 1.00 amoand.w.aq a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.w.aq a4, a5, (a6) +# CHECK-NEXT: - - - - - - - 1.00 amomin.w.aq a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.w.aq s7, s6, (s5) +# CHECK-NEXT: - - - - - - - 1.00 amominu.w.aq s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.w.aq s5, s4, (s3) +# CHECK-NEXT: - - - - - - - 1.00 amoswap.w.rl a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.w.rl a1, a2, (a3) +# CHECK-NEXT: - - - - - - - 1.00 amoxor.w.rl a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.w.rl a3, a4, (a5) +# CHECK-NEXT: - - - - - - - 1.00 amoor.w.rl a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.w.rl a5, a6, (a7) +# CHECK-NEXT: - - - - - - - 1.00 amomax.w.rl s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.w.rl s6, s5, (s4) +# CHECK-NEXT: - - - - - - - 1.00 amomaxu.w.rl s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.w.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - - - - 1.00 amoadd.w.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.w.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - - - - 1.00 amoand.w.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.w.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - - - - 1.00 amomin.w.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.w.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - - - - 1.00 amominu.w.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.w.aqrl s5, s4, (s3) +# CHECK-NEXT: - - - - - - - 1.00 amoswap.d a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.d a1, a2, (a3) +# CHECK-NEXT: - - - - - - - 1.00 amoxor.d a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.d a3, a4, (a5) +# CHECK-NEXT: - - - - - - - 1.00 amoor.d a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.d a5, a6, (a7) +# CHECK-NEXT: - - - - - - - 1.00 amomax.d s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.d s6, s5, (s4) +# CHECK-NEXT: - - - - - - - 1.00 amomaxu.d s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.d.aq a4, ra, (s0) +# CHECK-NEXT: - - - - - - - 1.00 amoadd.d.aq a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.d.aq a2, a3, (a4) +# CHECK-NEXT: - - - - - - - 1.00 amoand.d.aq a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.d.aq a4, a5, (a6) +# CHECK-NEXT: - - - - - - - 1.00 amomin.d.aq a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.d.aq s7, s6, (s5) +# CHECK-NEXT: - - - - - - - 1.00 amominu.d.aq s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.d.aq s5, s4, (s3) +# CHECK-NEXT: - - - - - - - 1.00 amoswap.d.rl a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.d.rl a1, a2, (a3) +# CHECK-NEXT: - - - - - - - 1.00 amoxor.d.rl a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.d.rl a3, a4, (a5) +# CHECK-NEXT: - - - - - - - 1.00 amoor.d.rl a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.d.rl a5, a6, (a7) +# CHECK-NEXT: - - - - - - - 1.00 amomax.d.rl s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.d.rl s6, s5, (s4) +# CHECK-NEXT: - - - - - - - 1.00 amomaxu.d.rl s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.d.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - - - - 1.00 amoadd.d.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.d.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - - - - 1.00 amoand.d.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.d.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - - - - 1.00 amomin.d.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.d.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - - - - 1.00 amominu.d.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.d.aqrl s5, s4, (s3) +# CHECK-NEXT: - - - - - - - 1.00 amoswap.b a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.b a1, a2, (a3) +# CHECK-NEXT: - - - - - - - 1.00 amoxor.b a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.b a3, a4, (a5) +# CHECK-NEXT: - - - - - - - 1.00 amoor.b a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.b a5, a6, (a7) +# CHECK-NEXT: - - - - - - - 1.00 amomax.b s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.b s6, s5, (s4) +# CHECK-NEXT: - - - - - - - 1.00 amomaxu.b s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.b.aq a4, ra, (s0) +# CHECK-NEXT: - - - - - - - 1.00 amoadd.b.aq a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.b.aq a2, a3, (a4) +# CHECK-NEXT: - - - - - - - 1.00 amoand.b.aq a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.b.aq a4, a5, (a6) +# CHECK-NEXT: - - - - - - - 1.00 amomin.b.aq a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.b.aq s7, s6, (s5) +# CHECK-NEXT: - - - - - - - 1.00 amominu.b.aq s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.b.aq s5, s4, (s3) +# CHECK-NEXT: - - - - - - - 1.00 amoswap.b.rl a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.b.rl a1, a2, (a3) +# CHECK-NEXT: - - - - - - - 1.00 amoxor.b.rl a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.b.rl a3, a4, (a5) +# CHECK-NEXT: - - - - - - - 1.00 amoor.b.rl a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.b.rl a5, a6, (a7) +# CHECK-NEXT: - - - - - - - 1.00 amomax.b.rl s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.b.rl s6, s5, (s4) +# CHECK-NEXT: - - - - - - - 1.00 amomaxu.b.rl s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.b.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - - - - 1.00 amoadd.b.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.b.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - - - - 1.00 amoand.b.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.b.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - - - - 1.00 amomin.b.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.b.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - - - - 1.00 amominu.b.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.b.aqrl s5, s4, (s3) +# CHECK-NEXT: - - - - - - - 1.00 amoswap.h a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.h a1, a2, (a3) +# CHECK-NEXT: - - - - - - - 1.00 amoxor.h a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.h a3, a4, (a5) +# CHECK-NEXT: - - - - - - - 1.00 amoor.h a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.h a5, a6, (a7) +# CHECK-NEXT: - - - - - - - 1.00 amomax.h s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.h s6, s5, (s4) +# CHECK-NEXT: - - - - - - - 1.00 amomaxu.h s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.h.aq a4, ra, (s0) +# CHECK-NEXT: - - - - - - - 1.00 amoadd.h.aq a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.h.aq a2, a3, (a4) +# CHECK-NEXT: - - - - - - - 1.00 amoand.h.aq a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.h.aq a4, a5, (a6) +# CHECK-NEXT: - - - - - - - 1.00 amomin.h.aq a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.h.aq s7, s6, (s5) +# CHECK-NEXT: - - - - - - - 1.00 amominu.h.aq s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.h.aq s5, s4, (s3) +# CHECK-NEXT: - - - - - - - 1.00 amoswap.h.rl a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.h.rl a1, a2, (a3) +# CHECK-NEXT: - - - - - - - 1.00 amoxor.h.rl a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.h.rl a3, a4, (a5) +# CHECK-NEXT: - - - - - - - 1.00 amoor.h.rl a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.h.rl a5, a6, (a7) +# CHECK-NEXT: - - - - - - - 1.00 amomax.h.rl s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.h.rl s6, s5, (s4) +# CHECK-NEXT: - - - - - - - 1.00 amomaxu.h.rl s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.h.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - - - - 1.00 amoadd.h.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.h.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - - - - 1.00 amoand.h.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.h.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - - - - 1.00 amomin.h.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.h.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - - - - 1.00 amominu.h.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.h.aqrl s5, s4, (s3) diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s new file mode 100644 index 0000000000000..ac0b12ed7d2d5 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s @@ -0,0 +1,483 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zfh -mcpu=generic-ooo --all-stats -iterations=1 < %s | FileCheck %s + +# Floating-Point Load and Store Instructions +## Half-Precision +flh ft0, 0(a0) +fsh ft0, 0(a0) + +## Single-Precision +flw ft0, 0(a0) +fsw ft0, 0(a0) + +## Double-Precision +fld ft0, 0(a0) +fsd ft0, 0(a0) + +# Floating-Point Computational Instructions +## Half-Precision +fadd.h f26, f27, f28 +fsub.h f29, f30, f31 +fmul.h ft0, ft1, ft2 +fdiv.h ft3, ft4, ft5 +fsqrt.h ft6, ft7 +fmin.h fa5, fa6, fa7 +fmax.h fs2, fs3, fs4 +fmadd.h f10, f11, f12, f31 +fmsub.h f14, f15, f16, f17 +fnmsub.h f18, f19, f20, f21 +fnmadd.h f22, f23, f24, f25 + +## Single-Precision +fadd.s f26, f27, f28 +fsub.s f29, f30, f31 +fmul.s ft0, ft1, ft2 +fdiv.s ft3, ft4, ft5 +fsqrt.s ft6, ft7 +fmin.s fa5, fa6, fa7 +fmax.s fs2, fs3, fs4 +fmadd.s f10, f11, f12, f31 +fmsub.s f14, f15, f16, f17 +fnmsub.s f18, f19, f20, f21 +fnmadd.s f22, f23, f24, f25 + +## Double-Precision +fadd.d f26, f27, f28 +fsub.d f29, f30, f31 +fmul.d ft0, ft1, ft2 +fdiv.d ft3, ft4, ft5 +fsqrt.d ft6, ft7 +fmin.d fa5, fa6, fa7 +fmax.d fs2, fs3, fs4 +fmadd.d f10, f11, f12, f31 +fmsub.d f14, f15, f16, f17 +fnmsub.d f18, f19, f20, f21 +fnmadd.d f22, f23, f24, f25 + +# Floating-Point Conversion and Move Instructions +## Half-Precision +fmv.x.h a2, fs7 +fmv.h.x ft1, a6 + +fcvt.s.h fa0, ft0 +fcvt.s.h fa0, ft0, rup + +fcvt.h.s ft2, fa2 +fcvt.d.h fa0, ft0 + +fcvt.d.h fa0, ft0, rup +fcvt.h.d ft2, fa2 + +## Single-Precision +fcvt.w.s a0, fs5 +fcvt.wu.s a1, fs6 +fcvt.s.w ft11, a4 +fcvt.s.wu ft0, a5 + +fcvt.l.s a0, ft0 +fcvt.lu.s a1, ft1 +fcvt.s.l ft2, a2 +fcvt.s.lu ft3, a3 + +fmv.x.w a2, fs7 +fmv.w.x ft1, a6 + +fsgnj.s fs1, fa0, fa1 +fsgnjn.s fa1, fa3, fa4 + +## Double-Precision +fcvt.wu.d a4, ft11 +fcvt.w.d a4, ft11 +fcvt.d.w ft0, a5 +fcvt.d.wu ft1, a6 + +fcvt.s.d fs5, fs6 +fcvt.d.s fs7, fs8 + +fcvt.l.d a0, ft0 +fcvt.lu.d a1, ft1 +fcvt.d.l ft3, a3 +fcvt.d.lu ft4, a4 + +fmv.x.d a2, ft2 +fmv.d.x ft5, a5 + +fsgnj.d fs1, fa0, fa1 +fsgnjn.d fa1, fa3, fa4 + +# Floating-Point Compare Instructions +## Half-Precision +feq.h a1, fs8, fs9 +flt.h a2, fs10, fs11 +fle.h a3, ft8, ft9 + +## Single-Precision +feq.s a1, fs8, fs9 +flt.s a2, fs10, fs11 +fle.s a3, ft8, ft9 + +## Double-Precision +feq.d a1, fs8, fs9 +flt.d a2, fs10, fs11 +fle.d a3, ft8, ft9 + +# Floating-Point Classify Instruction +## Half-Precision +fclass.s a3, ft10 +## Single-Precision +fclass.s a3, ft10 +## Double-Precision +fclass.d a3, ft10 + +# Zfa +## Load-Immediate Instructions +fli.h ft1, -1.0 +fli.s ft1, -1.0 +fli.d ft1, -1.0 + +## Minimum and Maximum Instructions +fminm.h fa0, fa1, fa2 +fmaxm.h fs3, fs4, fs5 + +fminm.s fa0, fa1, fa2 +fmaxm.s fs3, fs4, fs5 + +fminm.d fa0, fa1, fa2 +fmaxm.d fs3, fs4, fs5 + +## Round-to-Integer Instructions +fround.h fs1, fs2 +froundnx.h fs1, fs2 + +fround.s fs1, fs2 +froundnx.s fs1, fs2 + +fround.d fs1, fs2 +froundnx.d fs1, fs2 + +## Modular Convert-to-Integer Instruction +fcvtmod.w.d a1, ft1, rtz + +## Comparison Instructions +fltq.h a1, fs1, fs2 +fleq.h a1, ft1, ft2 +fgtq.h a1, fs1, fs2 +fgeq.h a1, ft1, ft2 + +fltq.s a1, fs1, fs2 +fleq.s a1, ft1, ft2 +fgtq.s a1, fs1, fs2 +fgeq.s a1, ft1, ft2 + +fltq.d a1, fs1, fs2 +fleq.d a1, ft1, ft2 +fgtq.d a1, fs1, fs2 +fgeq.d a1, ft1, ft2 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 113 +# CHECK-NEXT: Total Cycles: 95 +# CHECK-NEXT: Total uOps: 113 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 1.19 +# CHECK-NEXT: IPC: 1.19 +# CHECK-NEXT: Block RThroughput: 89.5 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 6 0.50 * flh ft0, 0(a0) +# CHECK-NEXT: 1 1 0.50 * fsh ft0, 0(a0) +# CHECK-NEXT: 1 6 0.50 * flw ft0, 0(a0) +# CHECK-NEXT: 1 1 0.50 * fsw ft0, 0(a0) +# CHECK-NEXT: 1 6 0.50 * fld ft0, 0(a0) +# CHECK-NEXT: 1 1 0.50 * fsd ft0, 0(a0) +# CHECK-NEXT: 1 4 0.50 fadd.h fs10, fs11, ft8 +# CHECK-NEXT: 1 4 0.50 fsub.h ft9, ft10, ft11 +# CHECK-NEXT: 1 5 0.50 fmul.h ft0, ft1, ft2 +# CHECK-NEXT: 1 9 4.50 fdiv.h ft3, ft4, ft5 +# CHECK-NEXT: 1 9 4.50 fsqrt.h ft6, ft7 +# CHECK-NEXT: 1 1 0.50 fmin.h fa5, fa6, fa7 +# CHECK-NEXT: 1 1 0.50 fmax.h fs2, fs3, fs4 +# CHECK-NEXT: 1 6 0.50 fmadd.h fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 6 0.50 fmsub.h fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 6 0.50 fnmsub.h fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 6 0.50 fnmadd.h fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 4 0.50 fadd.s fs10, fs11, ft8 +# CHECK-NEXT: 1 4 0.50 fsub.s ft9, ft10, ft11 +# CHECK-NEXT: 1 5 0.50 fmul.s ft0, ft1, ft2 +# CHECK-NEXT: 1 13 6.50 fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: 1 13 6.50 fsqrt.s ft6, ft7 +# CHECK-NEXT: 1 1 0.50 fmin.s fa5, fa6, fa7 +# CHECK-NEXT: 1 1 0.50 fmax.s fs2, fs3, fs4 +# CHECK-NEXT: 1 6 0.50 fmadd.s fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 6 0.50 fmsub.s fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 6 0.50 fnmsub.s fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 6 0.50 fnmadd.s fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 4 0.50 fadd.d fs10, fs11, ft8 +# CHECK-NEXT: 1 4 0.50 fsub.d ft9, ft10, ft11 +# CHECK-NEXT: 1 5 0.50 fmul.d ft0, ft1, ft2 +# CHECK-NEXT: 1 17 8.50 fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: 1 17 8.50 fsqrt.d ft6, ft7 +# CHECK-NEXT: 1 1 0.50 fmin.d fa5, fa6, fa7 +# CHECK-NEXT: 1 1 0.50 fmax.d fs2, fs3, fs4 +# CHECK-NEXT: 1 6 0.50 fmadd.d fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 6 0.50 fmsub.d fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 6 0.50 fnmsub.d fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 6 0.50 fnmadd.d fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 4 0.50 fmv.x.h a2, fs7 +# CHECK-NEXT: 1 4 0.50 fmv.h.x ft1, a6 +# CHECK-NEXT: 1 3 0.50 fcvt.s.h fa0, ft0 +# CHECK-NEXT: 1 3 0.50 fcvt.s.h fa0, ft0, rup +# CHECK-NEXT: 1 3 0.50 fcvt.h.s ft2, fa2 +# CHECK-NEXT: 1 3 0.50 fcvt.d.h fa0, ft0 +# CHECK-NEXT: 1 3 0.50 fcvt.d.h fa0, ft0, rup +# CHECK-NEXT: 1 3 0.50 fcvt.h.d ft2, fa2 +# CHECK-NEXT: 1 4 0.50 fcvt.w.s a0, fs5 +# CHECK-NEXT: 1 4 0.50 fcvt.wu.s a1, fs6 +# CHECK-NEXT: 1 4 0.50 fcvt.s.w ft11, a4 +# CHECK-NEXT: 1 4 0.50 fcvt.s.wu ft0, a5 +# CHECK-NEXT: 1 4 0.50 fcvt.l.s a0, ft0 +# CHECK-NEXT: 1 4 0.50 fcvt.lu.s a1, ft1 +# CHECK-NEXT: 1 4 0.50 fcvt.s.l ft2, a2 +# CHECK-NEXT: 1 4 0.50 fcvt.s.lu ft3, a3 +# CHECK-NEXT: 1 6 0.50 fmv.x.w a2, fs7 +# CHECK-NEXT: 1 6 0.50 fmv.w.x ft1, a6 +# CHECK-NEXT: 1 1 0.50 fsgnj.s fs1, fa0, fa1 +# CHECK-NEXT: 1 1 0.50 fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: 1 4 0.50 fcvt.wu.d a4, ft11 +# CHECK-NEXT: 1 4 0.50 fcvt.w.d a4, ft11 +# CHECK-NEXT: 1 4 0.50 fcvt.d.w ft0, a5 +# CHECK-NEXT: 1 4 0.50 fcvt.d.wu ft1, a6 +# CHECK-NEXT: 1 4 0.50 fcvt.s.d fs5, fs6 +# CHECK-NEXT: 1 4 0.50 fcvt.d.s fs7, fs8 +# CHECK-NEXT: 1 4 0.50 fcvt.l.d a0, ft0 +# CHECK-NEXT: 1 4 0.50 fcvt.lu.d a1, ft1 +# CHECK-NEXT: 1 4 0.50 fcvt.d.l ft3, a3 +# CHECK-NEXT: 1 4 0.50 fcvt.d.lu ft4, a4 +# CHECK-NEXT: 1 6 0.50 fmv.x.d a2, ft2 +# CHECK-NEXT: 1 6 0.50 fmv.d.x ft5, a5 +# CHECK-NEXT: 1 1 0.50 fsgnj.d fs1, fa0, fa1 +# CHECK-NEXT: 1 1 0.50 fsgnjn.d fa1, fa3, fa4 +# CHECK-NEXT: 1 2 0.50 feq.h a1, fs8, fs9 +# CHECK-NEXT: 1 2 0.50 flt.h a2, fs10, fs11 +# CHECK-NEXT: 1 2 0.50 fle.h a3, ft8, ft9 +# CHECK-NEXT: 1 2 0.50 feq.s a1, fs8, fs9 +# CHECK-NEXT: 1 2 0.50 flt.s a2, fs10, fs11 +# CHECK-NEXT: 1 2 0.50 fle.s a3, ft8, ft9 +# CHECK-NEXT: 1 2 0.50 feq.d a1, fs8, fs9 +# CHECK-NEXT: 1 2 0.50 flt.d a2, fs10, fs11 +# CHECK-NEXT: 1 2 0.50 fle.d a3, ft8, ft9 +# CHECK-NEXT: 1 1 0.50 fclass.s a3, ft10 +# CHECK-NEXT: 1 1 0.50 fclass.s a3, ft10 +# CHECK-NEXT: 1 1 0.50 fclass.d a3, ft10 +# CHECK-NEXT: 1 5 0.50 fli.h ft1, -1.0 +# CHECK-NEXT: 1 5 0.50 fli.s ft1, -1.0 +# CHECK-NEXT: 1 5 0.50 fli.d ft1, -1.0 +# CHECK-NEXT: 1 1 0.50 fminm.h fa0, fa1, fa2 +# CHECK-NEXT: 1 1 0.50 fmaxm.h fs3, fs4, fs5 +# CHECK-NEXT: 1 1 0.50 fminm.s fa0, fa1, fa2 +# CHECK-NEXT: 1 1 0.50 fmaxm.s fs3, fs4, fs5 +# CHECK-NEXT: 1 1 0.50 fminm.d fa0, fa1, fa2 +# CHECK-NEXT: 1 1 0.50 fmaxm.d fs3, fs4, fs5 +# CHECK-NEXT: 1 3 0.50 fround.h fs1, fs2 +# CHECK-NEXT: 1 3 0.50 froundnx.h fs1, fs2 +# CHECK-NEXT: 1 3 0.50 fround.s fs1, fs2 +# CHECK-NEXT: 1 3 0.50 froundnx.s fs1, fs2 +# CHECK-NEXT: 1 3 0.50 fround.d fs1, fs2 +# CHECK-NEXT: 1 3 0.50 froundnx.d fs1, fs2 +# CHECK-NEXT: 1 4 0.50 fcvtmod.w.d a1, ft1, rtz +# CHECK-NEXT: 1 2 0.50 fltq.h a1, fs1, fs2 +# CHECK-NEXT: 1 2 0.50 fleq.h a1, ft1, ft2 +# CHECK-NEXT: 1 2 0.50 fltq.h a1, fs2, fs1 +# CHECK-NEXT: 1 2 0.50 fleq.h a1, ft2, ft1 +# CHECK-NEXT: 1 2 0.50 fltq.s a1, fs1, fs2 +# CHECK-NEXT: 1 2 0.50 fleq.s a1, ft1, ft2 +# CHECK-NEXT: 1 2 0.50 fltq.s a1, fs2, fs1 +# CHECK-NEXT: 1 2 0.50 fleq.s a1, ft2, ft1 +# CHECK-NEXT: 1 2 0.50 fltq.d a1, fs1, fs2 +# CHECK-NEXT: 1 2 0.50 fleq.d a1, ft1, ft2 +# CHECK-NEXT: 1 2 0.50 fltq.d a1, fs2, fs1 +# CHECK-NEXT: 1 2 0.50 fleq.d a1, ft2, ft1 + +# CHECK: Dynamic Dispatch Stall Cycles: +# CHECK-NEXT: RAT - Register unavailable: 0 +# CHECK-NEXT: RCU - Retire tokens unavailable: 0 +# CHECK-NEXT: SCHEDQ - Scheduler full: 0 +# CHECK-NEXT: LQ - Load queue full: 0 +# CHECK-NEXT: SQ - Store queue full: 0 +# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 +# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0 + +# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 76 (80.0%) +# CHECK-NEXT: 5, 1 (1.1%) +# CHECK-NEXT: 6, 18 (18.9%) + +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 37 (38.9%) +# CHECK-NEXT: 1, 4 (4.2%) +# CHECK-NEXT: 2, 53 (55.8%) +# CHECK-NEXT: 3, 1 (1.1%) + +# CHECK: Scheduler's queue usage: +# CHECK-NEXT: No scheduler resources used. + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 56 (58.9%) +# CHECK-NEXT: 1, 12 (12.6%) +# CHECK-NEXT: 2, 13 (13.7%) +# CHECK-NEXT: 3, 3 (3.2%) +# CHECK-NEXT: 4, 6 (6.3%) +# CHECK-NEXT: 6, 1 (1.1%) +# CHECK-NEXT: 8, 1 (1.1%) +# CHECK-NEXT: 9, 2 (2.1%) +# CHECK-NEXT: 10, 1 (1.1%) + +# CHECK: Total ROB Entries: 192 +# CHECK-NEXT: Max Used ROB Entries: 100 ( 52.1% ) +# CHECK-NEXT: Average Used ROB Entries per cy: 61 ( 31.8% ) + +# CHECK: Register File statistics: +# CHECK-NEXT: Total number of mappings created: 110 +# CHECK-NEXT: Max number of mappings used: 100 + +# CHECK: Resources: +# CHECK-NEXT: [0] - GenericOOODIV +# CHECK-NEXT: [1.0] - GenericOOOFPU +# CHECK-NEXT: [1.1] - GenericOOOFPU +# CHECK-NEXT: [2.0] - GenericOOOIXU +# CHECK-NEXT: [2.1] - GenericOOOIXU +# CHECK-NEXT: [2.2] - GenericOOOIXU +# CHECK-NEXT: [3.0] - GenericOOOLSU +# CHECK-NEXT: [3.1] - GenericOOOLSU + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] +# CHECK-NEXT: - 89.00 90.00 - - - 3.00 3.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] Instructions: +# CHECK-NEXT: - - - - - - - 1.00 flh ft0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - fsh ft0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - flw ft0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 fsw ft0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 fld ft0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - fsd ft0, 0(a0) +# CHECK-NEXT: - - 1.00 - - - - - fadd.h fs10, fs11, ft8 +# CHECK-NEXT: - 1.00 - - - - - - fsub.h ft9, ft10, ft11 +# CHECK-NEXT: - - 1.00 - - - - - fmul.h ft0, ft1, ft2 +# CHECK-NEXT: - 9.00 - - - - - - fdiv.h ft3, ft4, ft5 +# CHECK-NEXT: - - 9.00 - - - - - fsqrt.h ft6, ft7 +# CHECK-NEXT: - 1.00 - - - - - - fmin.h fa5, fa6, fa7 +# CHECK-NEXT: - - 1.00 - - - - - fmax.h fs2, fs3, fs4 +# CHECK-NEXT: - 1.00 - - - - - - fmadd.h fa0, fa1, fa2, ft11 +# CHECK-NEXT: - - 1.00 - - - - - fmsub.h fa4, fa5, fa6, fa7 +# CHECK-NEXT: - 1.00 - - - - - - fnmsub.h fs2, fs3, fs4, fs5 +# CHECK-NEXT: - - 1.00 - - - - - fnmadd.h fs6, fs7, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - fadd.s fs10, fs11, ft8 +# CHECK-NEXT: - - 1.00 - - - - - fsub.s ft9, ft10, ft11 +# CHECK-NEXT: - 1.00 - - - - - - fmul.s ft0, ft1, ft2 +# CHECK-NEXT: - - 13.00 - - - - - fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: - 13.00 - - - - - - fsqrt.s ft6, ft7 +# CHECK-NEXT: - - 1.00 - - - - - fmin.s fa5, fa6, fa7 +# CHECK-NEXT: - 1.00 - - - - - - fmax.s fs2, fs3, fs4 +# CHECK-NEXT: - - 1.00 - - - - - fmadd.s fa0, fa1, fa2, ft11 +# CHECK-NEXT: - 1.00 - - - - - - fmsub.s fa4, fa5, fa6, fa7 +# CHECK-NEXT: - - 1.00 - - - - - fnmsub.s fs2, fs3, fs4, fs5 +# CHECK-NEXT: - 1.00 - - - - - - fnmadd.s fs6, fs7, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - fadd.d fs10, fs11, ft8 +# CHECK-NEXT: - 1.00 - - - - - - fsub.d ft9, ft10, ft11 +# CHECK-NEXT: - - 1.00 - - - - - fmul.d ft0, ft1, ft2 +# CHECK-NEXT: - - 17.00 - - - - - fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: - 17.00 - - - - - - fsqrt.d ft6, ft7 +# CHECK-NEXT: - - 1.00 - - - - - fmin.d fa5, fa6, fa7 +# CHECK-NEXT: - 1.00 - - - - - - fmax.d fs2, fs3, fs4 +# CHECK-NEXT: - 1.00 - - - - - - fmadd.d fa0, fa1, fa2, ft11 +# CHECK-NEXT: - - 1.00 - - - - - fmsub.d fa4, fa5, fa6, fa7 +# CHECK-NEXT: - 1.00 - - - - - - fnmsub.d fs2, fs3, fs4, fs5 +# CHECK-NEXT: - - 1.00 - - - - - fnmadd.d fs6, fs7, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - fmv.x.h a2, fs7 +# CHECK-NEXT: - - 1.00 - - - - - fmv.h.x ft1, a6 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.h fa0, ft0 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.s.h fa0, ft0, rup +# CHECK-NEXT: - 1.00 - - - - - - fcvt.h.s ft2, fa2 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.h fa0, ft0 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.d.h fa0, ft0, rup +# CHECK-NEXT: - - 1.00 - - - - - fcvt.h.d ft2, fa2 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.w.s a0, fs5 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.wu.s a1, fs6 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.w ft11, a4 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.wu ft0, a5 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.l.s a0, ft0 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.lu.s a1, ft1 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.l ft2, a2 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.lu ft3, a3 +# CHECK-NEXT: - - 1.00 - - - - - fmv.x.w a2, fs7 +# CHECK-NEXT: - 1.00 - - - - - - fmv.w.x ft1, a6 +# CHECK-NEXT: - - 1.00 - - - - - fsgnj.s fs1, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.wu.d a4, ft11 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.w.d a4, ft11 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.w ft0, a5 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.wu ft1, a6 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.s.d fs5, fs6 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.d.s fs7, fs8 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.l.d a0, ft0 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.lu.d a1, ft1 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.l ft3, a3 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.lu ft4, a4 +# CHECK-NEXT: - - 1.00 - - - - - fmv.x.d a2, ft2 +# CHECK-NEXT: - 1.00 - - - - - - fmv.d.x ft5, a5 +# CHECK-NEXT: - - 1.00 - - - - - fsgnj.d fs1, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - fsgnjn.d fa1, fa3, fa4 +# CHECK-NEXT: - 1.00 - - - - - - feq.h a1, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - flt.h a2, fs10, fs11 +# CHECK-NEXT: - 1.00 - - - - - - fle.h a3, ft8, ft9 +# CHECK-NEXT: - - 1.00 - - - - - feq.s a1, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - flt.s a2, fs10, fs11 +# CHECK-NEXT: - - 1.00 - - - - - fle.s a3, ft8, ft9 +# CHECK-NEXT: - 1.00 - - - - - - feq.d a1, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - flt.d a2, fs10, fs11 +# CHECK-NEXT: - - 1.00 - - - - - fle.d a3, ft8, ft9 +# CHECK-NEXT: - 1.00 - - - - - - fclass.s a3, ft10 +# CHECK-NEXT: - - 1.00 - - - - - fclass.s a3, ft10 +# CHECK-NEXT: - 1.00 - - - - - - fclass.d a3, ft10 +# CHECK-NEXT: - - 1.00 - - - - - fli.h ft1, -1.0 +# CHECK-NEXT: - 1.00 - - - - - - fli.s ft1, -1.0 +# CHECK-NEXT: - 1.00 - - - - - - fli.d ft1, -1.0 +# CHECK-NEXT: - - 1.00 - - - - - fminm.h fa0, fa1, fa2 +# CHECK-NEXT: - 1.00 - - - - - - fmaxm.h fs3, fs4, fs5 +# CHECK-NEXT: - - 1.00 - - - - - fminm.s fa0, fa1, fa2 +# CHECK-NEXT: - 1.00 - - - - - - fmaxm.s fs3, fs4, fs5 +# CHECK-NEXT: - - 1.00 - - - - - fminm.d fa0, fa1, fa2 +# CHECK-NEXT: - 1.00 - - - - - - fmaxm.d fs3, fs4, fs5 +# CHECK-NEXT: - 1.00 - - - - - - fround.h fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - froundnx.h fs1, fs2 +# CHECK-NEXT: - 1.00 - - - - - - fround.s fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - froundnx.s fs1, fs2 +# CHECK-NEXT: - 1.00 - - - - - - fround.d fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - froundnx.d fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fcvtmod.w.d a1, ft1, rtz +# CHECK-NEXT: - 1.00 - - - - - - fltq.h a1, fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fleq.h a1, ft1, ft2 +# CHECK-NEXT: - 1.00 - - - - - - fltq.h a1, fs2, fs1 +# CHECK-NEXT: - - 1.00 - - - - - fleq.h a1, ft2, ft1 +# CHECK-NEXT: - 1.00 - - - - - - fltq.s a1, fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fleq.s a1, ft1, ft2 +# CHECK-NEXT: - 1.00 - - - - - - fltq.s a1, fs2, fs1 +# CHECK-NEXT: - - 1.00 - - - - - fleq.s a1, ft2, ft1 +# CHECK-NEXT: - 1.00 - - - - - - fltq.d a1, fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fleq.d a1, ft1, ft2 +# CHECK-NEXT: - 1.00 - - - - - - fltq.d a1, fs2, fs1 +# CHECK-NEXT: - - 1.00 - - - - - fleq.d a1, ft2, ft1 diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s new file mode 100644 index 0000000000000..f58a859885f58 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s @@ -0,0 +1,510 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zbc,+zbkb,+zbkx -mcpu=generic-ooo --all-stats -iterations=1 < %s | FileCheck %s + +# Integer Register-Immediate Instructions +addi a0, a0, 1 +addiw a0, a0, 1 +slti a0, a0, 1 +sltiu a0, a0, 1 + +andi a0, a0, 1 +ori a0, a0, 1 +xori a0, a0, 1 + +slli a0, a0, 1 +srli a0, a0, 1 +srai a0, a0, 1 +slliw a0, a0, 1 +srliw a0, a0, 1 +sraiw a0, a0, 1 + +lui a0, 1 +auipc a1, 1 + +# Integer Register-Register Operations + +add a0, a0, a1 +addw a0, a0, a0 +slt a0, a0, a0 +sltu a0, a0, a0 + +and a0, a0, a0 +or a0, a0, a0 +xor a0, a0, a0 + +sll a0, a0, a0 +srl a0, a0, a0 +sra a0, a0, a0 +sllw a0, a0, a0 +srlw a0, a0, a0 +sraw a0, a0, a0 + +sub a0, a0, a0 +subw a0, a0, a0 + +# Control Transfer Instructions + +## Unconditional Jumps +jal a0, 1f +1: +jalr a0 +beq a0, a0, 1f +1: +bne a0, a0, 1f +1: +blt a0, a0, 1f +1: +bltu a0, a0, 1f +1: +bge a0, a0, 1f +1: +bgeu a0, a0, 1f +1: +add a0, a0, a0 + +# Load and Store Instructions +lb t0, 0(a0) +lbu t0, 0(a0) +lh t0, 0(a0) +lhu t0, 0(a0) +lw t0, 0(a0) +lwu t0, 0(a0) +ld t0, 0(a0) + +sb t0, 0(a0) +sh t0, 0(a0) +sw t0, 0(a0) +sd t0, 0(a0) + +# Multiply/Division +mul a0, a0, a0 +mulh a0, a0, a0 +mulhu a0, a0, a0 +mulhsu a0, a0, a0 +mulw a0, a0, a0 +div a0, a1, a2 +divu a0, a1, a2 +rem a0, a1, a2 +remu a0, a1, a2 +divw a0, a1, a2 +divuw a0, a1, a2 +remw a0, a1, a2 +remuw a0, a1, a2 + +# Zicsr +csrrw t0, 0xfff, t1 +csrrs s3, 0x001, s5 +csrrc sp, 0x000, ra +csrrwi a5, 0x000, 0 +csrrsi t2, 0xfff, 31 +csrrci t1, 0x140, 5 + +# Zicond +czero.eqz a0, a1, a2 +czero.nez a0, a1, a2 + +# Zicond +czero.eqz a0, a1, a2 +czero.nez a0, a1, a2 + +# Zicbom/Zicbop/Zicboz +# cbo.clean 0(a0) +# cbo.flush 0(a0) +# cbo.inval 0(a0) + +# cbo.zero 0(a0) + +# prefetch.i 0(a0) +# prefetch.r 0(a0) +# prefetch.w 0(a0) + +# Zba +add.uw a0, a0, a0 +slli.uw a0, a0, 1 +sh1add.uw a0, a0, a0 +sh2add.uw a0, a0, a0 +sh3add.uw a0, a0, a0 +sh1add a0, a0, a0 +sh2add a0, a0, a0 +sh3add a0, a0, a0 + +# Zbb +andn a0, a0, a0 +orn a0, a0, a0 +xnor a0, a0, a0 + +clz a0, a0 +clzw a0, a0 +ctz a0, a0 +ctzw a0, a0 + +cpop a0, a0 +cpopw a0, a0 + +min a0, a0, a0 +minu a0, a0, a0 +max a0, a0, a0 +maxu a0, a0, a0 + +sext.b a0, a0 +sext.h a0, a0 +zext.h a0, a0 + +rol a0, a0, a0 +rolw a0, a0, a0 +ror a0, a0, a0 +rorw a0, a0, a0 +rori a0, a0, 1 +roriw a0, a0, 1 + +orc.b a0, a0 + +rev8 a0, a0 + +# Zbc +clmul a0, a1, a2 +clmulh a0, a1, a2 +clmulr a0, a1, a2 + +# Zbs +bclr a0, a1, a2 +bclri a0, a1, 1 +bext a0, a1, a2 +bexti a0, a1, 1 +binv a0, a1, a2 +binvi a0, a1, 1 +bset a0, a1, a2 +bseti a0, a1, 1 + +# Zbkb +pack a0, a1, a2 +packh a0, a1, a2 +brev8 a0, a1 + +# Zbkx +xperm8 a0, a1, a2 +xperm4 a0, a1, a2 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 121 +# CHECK-NEXT: Total Cycles: 155 +# CHECK-NEXT: Total uOps: 121 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.78 +# CHECK-NEXT: IPC: 0.78 +# CHECK-NEXT: Block RThroughput: 136.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.25 addi a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 addiw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 slti a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 seqz a0, a0 +# CHECK-NEXT: 1 1 0.25 andi a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 ori a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 xori a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 slli a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 srli a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 srai a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 slliw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 srliw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sraiw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 lui a0, 1 +# CHECK-NEXT: 1 1 0.25 auipc a1, 1 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a1 +# CHECK-NEXT: 1 1 0.25 addw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 slt a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sltu a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 and a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 or a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 xor a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sll a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 srl a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sra a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sllw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 srlw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sraw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sub a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 subw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 jal a0, .Ltmp0 +# CHECK-NEXT: 1 1 0.25 jalr a0 +# CHECK-NEXT: 1 1 0.25 beq a0, a0, .Ltmp1 +# CHECK-NEXT: 1 1 0.25 bne a0, a0, .Ltmp2 +# CHECK-NEXT: 1 1 0.25 blt a0, a0, .Ltmp3 +# CHECK-NEXT: 1 1 0.25 bltu a0, a0, .Ltmp4 +# CHECK-NEXT: 1 1 0.25 bge a0, a0, .Ltmp5 +# CHECK-NEXT: 1 1 0.25 bgeu a0, a0, .Ltmp6 +# CHECK-NEXT: 1 1 0.25 add a0, a0, a0 +# CHECK-NEXT: 1 4 0.50 * lb t0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * lbu t0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * lh t0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * lhu t0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * lw t0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * lwu t0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * ld t0, 0(a0) +# CHECK-NEXT: 1 1 0.50 * sb t0, 0(a0) +# CHECK-NEXT: 1 1 0.50 * sh t0, 0(a0) +# CHECK-NEXT: 1 1 0.50 * sw t0, 0(a0) +# CHECK-NEXT: 1 1 0.50 * sd t0, 0(a0) +# CHECK-NEXT: 1 4 0.25 mul a0, a0, a0 +# CHECK-NEXT: 1 4 0.25 mulh a0, a0, a0 +# CHECK-NEXT: 1 4 0.25 mulhu a0, a0, a0 +# CHECK-NEXT: 1 4 0.25 mulhsu a0, a0, a0 +# CHECK-NEXT: 1 4 0.25 mulw a0, a0, a0 +# CHECK-NEXT: 1 21 21.00 div a0, a1, a2 +# CHECK-NEXT: 1 21 21.00 divu a0, a1, a2 +# CHECK-NEXT: 1 21 21.00 rem a0, a1, a2 +# CHECK-NEXT: 1 21 21.00 remu a0, a1, a2 +# CHECK-NEXT: 1 13 13.00 divw a0, a1, a2 +# CHECK-NEXT: 1 13 13.00 divuw a0, a1, a2 +# CHECK-NEXT: 1 13 13.00 remw a0, a1, a2 +# CHECK-NEXT: 1 13 13.00 remuw a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 U csrrw t0, 4095, t1 +# CHECK-NEXT: 1 1 0.25 U csrrs s3, fflags, s5 +# CHECK-NEXT: 1 1 0.25 U csrrc sp, 0, ra +# CHECK-NEXT: 1 1 0.25 U csrrwi a5, 0, 0 +# CHECK-NEXT: 1 1 0.25 U csrrsi t2, 4095, 31 +# CHECK-NEXT: 1 1 0.25 U csrrci t1, sscratch, 5 +# CHECK-NEXT: 1 1 0.25 czero.eqz a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 czero.nez a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 czero.eqz a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 czero.nez a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 slli.uw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 sh1add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh2add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh3add.uw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh1add a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh2add a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sh3add a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 andn a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 orn a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 xnor a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 clz a0, a0 +# CHECK-NEXT: 1 1 0.25 clzw a0, a0 +# CHECK-NEXT: 1 1 0.25 ctz a0, a0 +# CHECK-NEXT: 1 1 0.25 ctzw a0, a0 +# CHECK-NEXT: 1 1 0.25 cpop a0, a0 +# CHECK-NEXT: 1 1 0.25 cpopw a0, a0 +# CHECK-NEXT: 1 1 0.25 min a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 minu a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 max a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 maxu a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 sext.b a0, a0 +# CHECK-NEXT: 1 1 0.25 sext.h a0, a0 +# CHECK-NEXT: 1 1 0.25 zext.h a0, a0 +# CHECK-NEXT: 1 1 0.25 rol a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 rolw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 ror a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 rorw a0, a0, a0 +# CHECK-NEXT: 1 1 0.25 rori a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 roriw a0, a0, 1 +# CHECK-NEXT: 1 1 0.25 orc.b a0, a0 +# CHECK-NEXT: 1 1 0.25 rev8 a0, a0 +# CHECK-NEXT: 1 1 0.25 clmul a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 clmulh a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 clmulr a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 bclr a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 bclri a0, a1, 1 +# CHECK-NEXT: 1 1 0.25 bext a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 bexti a0, a1, 1 +# CHECK-NEXT: 1 1 0.25 binv a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 binvi a0, a1, 1 +# CHECK-NEXT: 1 1 0.25 bset a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 bseti a0, a1, 1 +# CHECK-NEXT: 1 1 0.25 pack a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 packh a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 brev8 a0, a1 +# CHECK-NEXT: 1 1 0.25 xperm8 a0, a1, a2 +# CHECK-NEXT: 1 1 0.25 xperm4 a0, a1, a2 + +# CHECK: Dynamic Dispatch Stall Cycles: +# CHECK-NEXT: RAT - Register unavailable: 0 +# CHECK-NEXT: RCU - Retire tokens unavailable: 0 +# CHECK-NEXT: SCHEDQ - Scheduler full: 0 +# CHECK-NEXT: LQ - Load queue full: 0 +# CHECK-NEXT: SQ - Store queue full: 0 +# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 +# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0 + +# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: +# CHECK-NEXT: [# dispatched], [# cycles] +# CHECK-NEXT: 0, 134 (86.5%) +# CHECK-NEXT: 1, 1 (0.6%) +# CHECK-NEXT: 6, 20 (12.9%) + +# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: +# CHECK-NEXT: [# issued], [# cycles] +# CHECK-NEXT: 0, 102 (65.8%) +# CHECK-NEXT: 1, 27 (17.4%) +# CHECK-NEXT: 2, 4 (2.6%) +# CHECK-NEXT: 3, 13 (8.4%) +# CHECK-NEXT: 4, 3 (1.9%) +# CHECK-NEXT: 5, 1 (0.6%) +# CHECK-NEXT: 6, 5 (3.2%) + +# CHECK: Scheduler's queue usage: +# CHECK-NEXT: No scheduler resources used. + +# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: +# CHECK-NEXT: [# retired], [# cycles] +# CHECK-NEXT: 0, 132 (85.2%) +# CHECK-NEXT: 1, 19 (12.3%) +# CHECK-NEXT: 2, 1 (0.6%) +# CHECK-NEXT: 13, 1 (0.6%) +# CHECK-NEXT: 28, 1 (0.6%) +# CHECK-NEXT: 59, 1 (0.6%) + +# CHECK: Total ROB Entries: 192 +# CHECK-NEXT: Max Used ROB Entries: 91 ( 47.4% ) +# CHECK-NEXT: Average Used ROB Entries per cy: 75 ( 39.1% ) + +# CHECK: Register File statistics: +# CHECK-NEXT: Total number of mappings created: 111 +# CHECK-NEXT: Max number of mappings used: 81 + +# CHECK: Resources: +# CHECK-NEXT: [0] - GenericOOODIV +# CHECK-NEXT: [1.0] - GenericOOOFPU +# CHECK-NEXT: [1.1] - GenericOOOFPU +# CHECK-NEXT: [2.0] - GenericOOOIXU +# CHECK-NEXT: [2.1] - GenericOOOIXU +# CHECK-NEXT: [2.2] - GenericOOOIXU +# CHECK-NEXT: [3.0] - GenericOOOLSU +# CHECK-NEXT: [3.1] - GenericOOOLSU + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] +# CHECK-NEXT: 147.00 - - 30.00 30.00 31.00 5.00 6.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] Instructions: +# CHECK-NEXT: - - - - - 1.00 - - addi a0, a0, 1 +# CHECK-NEXT: - - - - 1.00 - - - addiw a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - slti a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - seqz a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - andi a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - ori a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - xori a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - slli a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - srli a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - srai a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - slliw a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - srliw a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - sraiw a0, a0, 1 +# CHECK-NEXT: - - - - - 1.00 - - lui a0, 1 +# CHECK-NEXT: - - - - 1.00 - - - auipc a1, 1 +# CHECK-NEXT: - - - 1.00 - - - - add a0, a0, a1 +# CHECK-NEXT: - - - - 1.00 - - - addw a0, a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - slt a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sltu a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - and a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - or a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - xor a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sll a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - srl a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sra a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - sllw a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - srlw a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - sraw a0, a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - sub a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - subw a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - jal a0, .Ltmp0 +# CHECK-NEXT: - - - - - 1.00 - - jalr a0 +# CHECK-NEXT: - - - - 1.00 - - - beq a0, a0, .Ltmp1 +# CHECK-NEXT: - - - - 1.00 - - - bne a0, a0, .Ltmp2 +# CHECK-NEXT: - - - - - 1.00 - - blt a0, a0, .Ltmp3 +# CHECK-NEXT: - - - - 1.00 - - - bltu a0, a0, .Ltmp4 +# CHECK-NEXT: - - - - - 1.00 - - bge a0, a0, .Ltmp5 +# CHECK-NEXT: - - - - 1.00 - - - bgeu a0, a0, .Ltmp6 +# CHECK-NEXT: - - - - - 1.00 - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - - 1.00 lb t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - lbu t0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 lh t0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 lhu t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - lw t0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 lwu t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - ld t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - sb t0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 sh t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - sw t0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 sd t0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - mul a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - mulh a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - mulhu a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - mulhsu a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - mulw a0, a0, a0 +# CHECK-NEXT: 21.00 - - - - - - - div a0, a1, a2 +# CHECK-NEXT: 21.00 - - - - - - - divu a0, a1, a2 +# CHECK-NEXT: 21.00 - - - - - - - rem a0, a1, a2 +# CHECK-NEXT: 21.00 - - - - - - - remu a0, a1, a2 +# CHECK-NEXT: 13.00 - - - - - - - divw a0, a1, a2 +# CHECK-NEXT: 13.00 - - - - - - - divuw a0, a1, a2 +# CHECK-NEXT: 13.00 - - - - - - - remw a0, a1, a2 +# CHECK-NEXT: 13.00 - - - - - - - remuw a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - csrrw t0, 4095, t1 +# CHECK-NEXT: - - - - - 1.00 - - csrrs s3, fflags, s5 +# CHECK-NEXT: - - - - 1.00 - - - csrrc sp, 0, ra +# CHECK-NEXT: - - - - - 1.00 - - csrrwi a5, 0, 0 +# CHECK-NEXT: - - - - 1.00 - - - csrrsi t2, 4095, 31 +# CHECK-NEXT: - - - 1.00 - - - - csrrci t1, sscratch, 5 +# CHECK-NEXT: - - - - - 1.00 - - czero.eqz a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - czero.nez a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - czero.eqz a0, a1, a2 +# CHECK-NEXT: - - - - - 1.00 - - czero.nez a0, a1, a2 +# CHECK-NEXT: - - - 1.00 - - - - add.uw a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - slli.uw a0, a0, 1 +# CHECK-NEXT: - - - - - 1.00 - - sh1add.uw a0, a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - sh2add.uw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sh3add.uw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sh1add a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sh2add a0, a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - sh3add a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - andn a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - orn a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - xnor a0, a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - clz a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - clzw a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - ctz a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - ctzw a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - cpop a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - cpopw a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - min a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - minu a0, a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - max a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - maxu a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sext.b a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - sext.h a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - zext.h a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - rol a0, a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - rolw a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - ror a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - rorw a0, a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - rori a0, a0, 1 +# CHECK-NEXT: - - - - 1.00 - - - roriw a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - orc.b a0, a0 +# CHECK-NEXT: - - - - - 1.00 - - rev8 a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - clmul a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - clmulh a0, a1, a2 +# CHECK-NEXT: - - - - - 1.00 - - clmulr a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - bclr a0, a1, a2 +# CHECK-NEXT: - - - - - 1.00 - - bclri a0, a1, 1 +# CHECK-NEXT: - - - - 1.00 - - - bext a0, a1, a2 +# CHECK-NEXT: - - - - - 1.00 - - bexti a0, a1, 1 +# CHECK-NEXT: - - - - 1.00 - - - binv a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - binvi a0, a1, 1 +# CHECK-NEXT: - - - - - 1.00 - - bset a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - bseti a0, a1, 1 +# CHECK-NEXT: - - - - - 1.00 - - pack a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - packh a0, a1, a2 +# CHECK-NEXT: - - - - - 1.00 - - brev8 a0, a1 +# CHECK-NEXT: - - - - 1.00 - - - xperm8 a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - xperm4 a0, a1, a2 From 45cbe2de5adb0da5e7494395fcbfba3f99ea571a Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Tue, 31 Dec 2024 18:51:00 +0800 Subject: [PATCH 2/7] Update target-invalid-cpu-note/riscv.c --- clang/test/Misc/target-invalid-cpu-note/riscv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/test/Misc/target-invalid-cpu-note/riscv.c b/clang/test/Misc/target-invalid-cpu-note/riscv.c index fb54dcb5b3a93..6e4323958957a 100644 --- a/clang/test/Misc/target-invalid-cpu-note/riscv.c +++ b/clang/test/Misc/target-invalid-cpu-note/riscv.c @@ -66,6 +66,7 @@ // TUNE-RISCV32-SAME: {{^}}, syntacore-scr4-rv32 // TUNE-RISCV32-SAME: {{^}}, syntacore-scr5-rv32 // TUNE-RISCV32-SAME: {{^}}, generic +// TUNE-RISCV32-SAME: {{^}}, generic-ooo // TUNE-RISCV32-SAME: {{^}}, rocket // TUNE-RISCV32-SAME: {{^}}, sifive-7-series // TUNE-RISCV32-SAME: {{$}} @@ -96,6 +97,7 @@ // TUNE-RISCV64-SAME: {{^}}, veyron-v1 // TUNE-RISCV64-SAME: {{^}}, xiangshan-nanhu // TUNE-RISCV64-SAME: {{^}}, generic +// TUNE-RISCV64-SAME: {{^}}, generic-ooo // TUNE-RISCV64-SAME: {{^}}, rocket // TUNE-RISCV64-SAME: {{^}}, sifive-7-series // TUNE-RISCV64-SAME: {{$}} From 9a2658da92535701a6fb662726285c5881cf0c0a Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Fri, 17 Jan 2025 18:05:31 +0800 Subject: [PATCH 3/7] Adjust latency and remove --all-stats --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 38 ++-- .../tools/llvm-mca/RISCV/GenericOOO/atomic.s | 41 +---- .../RISCV/GenericOOO/floating-point.s | 167 +++++++----------- .../tools/llvm-mca/RISCV/GenericOOO/integer.s | 47 +---- 4 files changed, 82 insertions(+), 211 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td index f7bf824ccebe0..06e36a062a46e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td +++ b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td @@ -10,7 +10,7 @@ // We assume that: // * 6-issue out-of-order CPU with 192 ROB entries. // * Units: -// * IXU (Integer GenericOOOALU Unit): 4 units, only one can execute division. +// * IXU (Integer ALU Unit): 4 units, only one can execute division. // * FXU (Floating-point Unit): 2 units. // * LSU (Load/Store Unit): 2 units. // * VXU (Vector Unit): 1 unit. @@ -18,11 +18,11 @@ // * Integer instructions: 1 cycle. // * Multiplication instructions: 4 cycles. // * Multiplication/Division instructions: 7-13 cycles. -// * Floating-point instructions: 4-6 cycles. +// * Floating-point instructions: 2-6 cycles. // * Vector instructions: 2-6 cycles. // * Load/Store: // * IXU: 4 cycles. -// * FXU: 6 cycles. +// * FXU: 4 cycles. // * VXU: 6 cycles. // * Integer/floating-point/vector div/rem/sqrt/... are non-pipelined. //===----------------------------------------------------------------------===// @@ -129,7 +129,7 @@ def : WriteRes; // Floating-point //===----------------------------------------------------------------------===// // Floating-point load -let Latency = 6 in { +let Latency = 4 in { def : WriteRes; def : WriteRes; } @@ -139,12 +139,12 @@ def : WriteRes; def : WriteRes; // Arithmetic and logic -let Latency = 4 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; } -let Latency = 5 in { +let Latency = 4 in { def : WriteRes; def : WriteRes; } @@ -177,29 +177,29 @@ let Latency = 17, ReleaseAtCycles = [17] in { } // Conversions -let Latency = 4 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; } -let Latency = 4 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; } -let Latency = 4 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; } -let Latency = 4 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; } -let Latency = 6 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; def : WriteRes; @@ -275,13 +275,13 @@ def : WriteRes; //===----------------------------------------------------------------------===// // Zfa extension //===----------------------------------------------------------------------===// -let Latency = 3 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; def : WriteRes; } -let Latency = 5 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; def : WriteRes; @@ -292,25 +292,25 @@ let Latency = 5 in { //===----------------------------------------------------------------------===// // Zfhmin // Load/Store -let Latency = 6 in +let Latency = 4 in def : WriteRes; def : WriteRes; // Conversions -let Latency = 3 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; } -let Latency = 4 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; } // Other than Zfhmin -let Latency = 4 in { +let Latency = 2 in { def : WriteRes; def : WriteRes; def : WriteRes; @@ -318,10 +318,10 @@ let Latency = 4 in { } // Arithmetic and logic -let Latency = 4 in +let Latency = 2 in def : WriteRes; -let Latency = 5 in +let Latency = 4 in def : WriteRes; let Latency = 6 in diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s index e8c19eaa4c618..60532fae10f2f 100644 --- a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zabha -mcpu=generic-ooo --all-stats -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zabha -mcpu=generic-ooo -iterations=1 < %s | FileCheck %s # Zalrsc lr.w t0, (t1) @@ -384,45 +384,6 @@ amomaxu.h.aqrl s5, s4, (s3) # CHECK-NEXT: 1 5 0.50 * * amominu.h.aqrl s6, s5, (s4) # CHECK-NEXT: 1 5 0.50 * * amomaxu.h.aqrl s5, s4, (s3) -# CHECK: Dynamic Dispatch Stall Cycles: -# CHECK-NEXT: RAT - Register unavailable: 0 -# CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 0 -# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 -# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0 - -# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: -# CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 121 (81.8%) -# CHECK-NEXT: 4, 1 (0.7%) -# CHECK-NEXT: 6, 26 (17.6%) - -# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: -# CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 58 (39.2%) -# CHECK-NEXT: 1, 20 (13.5%) -# CHECK-NEXT: 2, 70 (47.3%) - -# CHECK: Scheduler's queue usage: -# CHECK-NEXT: No scheduler resources used. - -# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: -# CHECK-NEXT: [# retired], [# cycles] -# CHECK-NEXT: 0, 58 (39.2%) -# CHECK-NEXT: 1, 22 (14.9%) -# CHECK-NEXT: 2, 66 (44.6%) -# CHECK-NEXT: 3, 2 (1.4%) - -# CHECK: Total ROB Entries: 192 -# CHECK-NEXT: Max Used ROB Entries: 136 ( 70.8% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 69 ( 35.9% ) - -# CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 160 -# CHECK-NEXT: Max number of mappings used: 136 - # CHECK: Resources: # CHECK-NEXT: [0] - GenericOOODIV # CHECK-NEXT: [1.0] - GenericOOOFPU diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s index ac0b12ed7d2d5..23efebe3f9a26 100644 --- a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zfh -mcpu=generic-ooo --all-stats -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zfh -mcpu=generic-ooo -iterations=1 < %s | FileCheck %s # Floating-Point Load and Store Instructions ## Half-Precision @@ -193,15 +193,15 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 6 0.50 * flh ft0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * flh ft0, 0(a0) # CHECK-NEXT: 1 1 0.50 * fsh ft0, 0(a0) -# CHECK-NEXT: 1 6 0.50 * flw ft0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * flw ft0, 0(a0) # CHECK-NEXT: 1 1 0.50 * fsw ft0, 0(a0) -# CHECK-NEXT: 1 6 0.50 * fld ft0, 0(a0) +# CHECK-NEXT: 1 4 0.50 * fld ft0, 0(a0) # CHECK-NEXT: 1 1 0.50 * fsd ft0, 0(a0) -# CHECK-NEXT: 1 4 0.50 fadd.h fs10, fs11, ft8 -# CHECK-NEXT: 1 4 0.50 fsub.h ft9, ft10, ft11 -# CHECK-NEXT: 1 5 0.50 fmul.h ft0, ft1, ft2 +# CHECK-NEXT: 1 2 0.50 fadd.h fs10, fs11, ft8 +# CHECK-NEXT: 1 2 0.50 fsub.h ft9, ft10, ft11 +# CHECK-NEXT: 1 4 0.50 fmul.h ft0, ft1, ft2 # CHECK-NEXT: 1 9 4.50 fdiv.h ft3, ft4, ft5 # CHECK-NEXT: 1 9 4.50 fsqrt.h ft6, ft7 # CHECK-NEXT: 1 1 0.50 fmin.h fa5, fa6, fa7 @@ -210,9 +210,9 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: 1 6 0.50 fmsub.h fa4, fa5, fa6, fa7 # CHECK-NEXT: 1 6 0.50 fnmsub.h fs2, fs3, fs4, fs5 # CHECK-NEXT: 1 6 0.50 fnmadd.h fs6, fs7, fs8, fs9 -# CHECK-NEXT: 1 4 0.50 fadd.s fs10, fs11, ft8 -# CHECK-NEXT: 1 4 0.50 fsub.s ft9, ft10, ft11 -# CHECK-NEXT: 1 5 0.50 fmul.s ft0, ft1, ft2 +# CHECK-NEXT: 1 2 0.50 fadd.s fs10, fs11, ft8 +# CHECK-NEXT: 1 2 0.50 fsub.s ft9, ft10, ft11 +# CHECK-NEXT: 1 4 0.50 fmul.s ft0, ft1, ft2 # CHECK-NEXT: 1 13 6.50 fdiv.s ft3, ft4, ft5 # CHECK-NEXT: 1 13 6.50 fsqrt.s ft6, ft7 # CHECK-NEXT: 1 1 0.50 fmin.s fa5, fa6, fa7 @@ -221,9 +221,9 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: 1 6 0.50 fmsub.s fa4, fa5, fa6, fa7 # CHECK-NEXT: 1 6 0.50 fnmsub.s fs2, fs3, fs4, fs5 # CHECK-NEXT: 1 6 0.50 fnmadd.s fs6, fs7, fs8, fs9 -# CHECK-NEXT: 1 4 0.50 fadd.d fs10, fs11, ft8 -# CHECK-NEXT: 1 4 0.50 fsub.d ft9, ft10, ft11 -# CHECK-NEXT: 1 5 0.50 fmul.d ft0, ft1, ft2 +# CHECK-NEXT: 1 2 0.50 fadd.d fs10, fs11, ft8 +# CHECK-NEXT: 1 2 0.50 fsub.d ft9, ft10, ft11 +# CHECK-NEXT: 1 4 0.50 fmul.d ft0, ft1, ft2 # CHECK-NEXT: 1 17 8.50 fdiv.d ft3, ft4, ft5 # CHECK-NEXT: 1 17 8.50 fsqrt.d ft6, ft7 # CHECK-NEXT: 1 1 0.50 fmin.d fa5, fa6, fa7 @@ -232,38 +232,38 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: 1 6 0.50 fmsub.d fa4, fa5, fa6, fa7 # CHECK-NEXT: 1 6 0.50 fnmsub.d fs2, fs3, fs4, fs5 # CHECK-NEXT: 1 6 0.50 fnmadd.d fs6, fs7, fs8, fs9 -# CHECK-NEXT: 1 4 0.50 fmv.x.h a2, fs7 -# CHECK-NEXT: 1 4 0.50 fmv.h.x ft1, a6 -# CHECK-NEXT: 1 3 0.50 fcvt.s.h fa0, ft0 -# CHECK-NEXT: 1 3 0.50 fcvt.s.h fa0, ft0, rup -# CHECK-NEXT: 1 3 0.50 fcvt.h.s ft2, fa2 -# CHECK-NEXT: 1 3 0.50 fcvt.d.h fa0, ft0 -# CHECK-NEXT: 1 3 0.50 fcvt.d.h fa0, ft0, rup -# CHECK-NEXT: 1 3 0.50 fcvt.h.d ft2, fa2 -# CHECK-NEXT: 1 4 0.50 fcvt.w.s a0, fs5 -# CHECK-NEXT: 1 4 0.50 fcvt.wu.s a1, fs6 -# CHECK-NEXT: 1 4 0.50 fcvt.s.w ft11, a4 -# CHECK-NEXT: 1 4 0.50 fcvt.s.wu ft0, a5 -# CHECK-NEXT: 1 4 0.50 fcvt.l.s a0, ft0 -# CHECK-NEXT: 1 4 0.50 fcvt.lu.s a1, ft1 -# CHECK-NEXT: 1 4 0.50 fcvt.s.l ft2, a2 -# CHECK-NEXT: 1 4 0.50 fcvt.s.lu ft3, a3 -# CHECK-NEXT: 1 6 0.50 fmv.x.w a2, fs7 -# CHECK-NEXT: 1 6 0.50 fmv.w.x ft1, a6 +# CHECK-NEXT: 1 2 0.50 fmv.x.h a2, fs7 +# CHECK-NEXT: 1 2 0.50 fmv.h.x ft1, a6 +# CHECK-NEXT: 1 2 0.50 fcvt.s.h fa0, ft0 +# CHECK-NEXT: 1 2 0.50 fcvt.s.h fa0, ft0, rup +# CHECK-NEXT: 1 2 0.50 fcvt.h.s ft2, fa2 +# CHECK-NEXT: 1 2 0.50 fcvt.d.h fa0, ft0 +# CHECK-NEXT: 1 2 0.50 fcvt.d.h fa0, ft0, rup +# CHECK-NEXT: 1 2 0.50 fcvt.h.d ft2, fa2 +# CHECK-NEXT: 1 2 0.50 fcvt.w.s a0, fs5 +# CHECK-NEXT: 1 2 0.50 fcvt.wu.s a1, fs6 +# CHECK-NEXT: 1 2 0.50 fcvt.s.w ft11, a4 +# CHECK-NEXT: 1 2 0.50 fcvt.s.wu ft0, a5 +# CHECK-NEXT: 1 2 0.50 fcvt.l.s a0, ft0 +# CHECK-NEXT: 1 2 0.50 fcvt.lu.s a1, ft1 +# CHECK-NEXT: 1 2 0.50 fcvt.s.l ft2, a2 +# CHECK-NEXT: 1 2 0.50 fcvt.s.lu ft3, a3 +# CHECK-NEXT: 1 2 0.50 fmv.x.w a2, fs7 +# CHECK-NEXT: 1 2 0.50 fmv.w.x ft1, a6 # CHECK-NEXT: 1 1 0.50 fsgnj.s fs1, fa0, fa1 # CHECK-NEXT: 1 1 0.50 fsgnjn.s fa1, fa3, fa4 -# CHECK-NEXT: 1 4 0.50 fcvt.wu.d a4, ft11 -# CHECK-NEXT: 1 4 0.50 fcvt.w.d a4, ft11 -# CHECK-NEXT: 1 4 0.50 fcvt.d.w ft0, a5 -# CHECK-NEXT: 1 4 0.50 fcvt.d.wu ft1, a6 -# CHECK-NEXT: 1 4 0.50 fcvt.s.d fs5, fs6 -# CHECK-NEXT: 1 4 0.50 fcvt.d.s fs7, fs8 -# CHECK-NEXT: 1 4 0.50 fcvt.l.d a0, ft0 -# CHECK-NEXT: 1 4 0.50 fcvt.lu.d a1, ft1 -# CHECK-NEXT: 1 4 0.50 fcvt.d.l ft3, a3 -# CHECK-NEXT: 1 4 0.50 fcvt.d.lu ft4, a4 -# CHECK-NEXT: 1 6 0.50 fmv.x.d a2, ft2 -# CHECK-NEXT: 1 6 0.50 fmv.d.x ft5, a5 +# CHECK-NEXT: 1 2 0.50 fcvt.wu.d a4, ft11 +# CHECK-NEXT: 1 2 0.50 fcvt.w.d a4, ft11 +# CHECK-NEXT: 1 2 0.50 fcvt.d.w ft0, a5 +# CHECK-NEXT: 1 2 0.50 fcvt.d.wu ft1, a6 +# CHECK-NEXT: 1 2 0.50 fcvt.s.d fs5, fs6 +# CHECK-NEXT: 1 2 0.50 fcvt.d.s fs7, fs8 +# CHECK-NEXT: 1 2 0.50 fcvt.l.d a0, ft0 +# CHECK-NEXT: 1 2 0.50 fcvt.lu.d a1, ft1 +# CHECK-NEXT: 1 2 0.50 fcvt.d.l ft3, a3 +# CHECK-NEXT: 1 2 0.50 fcvt.d.lu ft4, a4 +# CHECK-NEXT: 1 2 0.50 fmv.x.d a2, ft2 +# CHECK-NEXT: 1 2 0.50 fmv.d.x ft5, a5 # CHECK-NEXT: 1 1 0.50 fsgnj.d fs1, fa0, fa1 # CHECK-NEXT: 1 1 0.50 fsgnjn.d fa1, fa3, fa4 # CHECK-NEXT: 1 2 0.50 feq.h a1, fs8, fs9 @@ -278,22 +278,22 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: 1 1 0.50 fclass.s a3, ft10 # CHECK-NEXT: 1 1 0.50 fclass.s a3, ft10 # CHECK-NEXT: 1 1 0.50 fclass.d a3, ft10 -# CHECK-NEXT: 1 5 0.50 fli.h ft1, -1.0 -# CHECK-NEXT: 1 5 0.50 fli.s ft1, -1.0 -# CHECK-NEXT: 1 5 0.50 fli.d ft1, -1.0 +# CHECK-NEXT: 1 2 0.50 fli.h ft1, -1.0 +# CHECK-NEXT: 1 2 0.50 fli.s ft1, -1.0 +# CHECK-NEXT: 1 2 0.50 fli.d ft1, -1.0 # CHECK-NEXT: 1 1 0.50 fminm.h fa0, fa1, fa2 # CHECK-NEXT: 1 1 0.50 fmaxm.h fs3, fs4, fs5 # CHECK-NEXT: 1 1 0.50 fminm.s fa0, fa1, fa2 # CHECK-NEXT: 1 1 0.50 fmaxm.s fs3, fs4, fs5 # CHECK-NEXT: 1 1 0.50 fminm.d fa0, fa1, fa2 # CHECK-NEXT: 1 1 0.50 fmaxm.d fs3, fs4, fs5 -# CHECK-NEXT: 1 3 0.50 fround.h fs1, fs2 -# CHECK-NEXT: 1 3 0.50 froundnx.h fs1, fs2 -# CHECK-NEXT: 1 3 0.50 fround.s fs1, fs2 -# CHECK-NEXT: 1 3 0.50 froundnx.s fs1, fs2 -# CHECK-NEXT: 1 3 0.50 fround.d fs1, fs2 -# CHECK-NEXT: 1 3 0.50 froundnx.d fs1, fs2 -# CHECK-NEXT: 1 4 0.50 fcvtmod.w.d a1, ft1, rtz +# CHECK-NEXT: 1 2 0.50 fround.h fs1, fs2 +# CHECK-NEXT: 1 2 0.50 froundnx.h fs1, fs2 +# CHECK-NEXT: 1 2 0.50 fround.s fs1, fs2 +# CHECK-NEXT: 1 2 0.50 froundnx.s fs1, fs2 +# CHECK-NEXT: 1 2 0.50 fround.d fs1, fs2 +# CHECK-NEXT: 1 2 0.50 froundnx.d fs1, fs2 +# CHECK-NEXT: 1 2 0.50 fcvtmod.w.d a1, ft1, rtz # CHECK-NEXT: 1 2 0.50 fltq.h a1, fs1, fs2 # CHECK-NEXT: 1 2 0.50 fleq.h a1, ft1, ft2 # CHECK-NEXT: 1 2 0.50 fltq.h a1, fs2, fs1 @@ -307,51 +307,6 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: 1 2 0.50 fltq.d a1, fs2, fs1 # CHECK-NEXT: 1 2 0.50 fleq.d a1, ft2, ft1 -# CHECK: Dynamic Dispatch Stall Cycles: -# CHECK-NEXT: RAT - Register unavailable: 0 -# CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 0 -# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 -# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0 - -# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: -# CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 76 (80.0%) -# CHECK-NEXT: 5, 1 (1.1%) -# CHECK-NEXT: 6, 18 (18.9%) - -# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: -# CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 37 (38.9%) -# CHECK-NEXT: 1, 4 (4.2%) -# CHECK-NEXT: 2, 53 (55.8%) -# CHECK-NEXT: 3, 1 (1.1%) - -# CHECK: Scheduler's queue usage: -# CHECK-NEXT: No scheduler resources used. - -# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: -# CHECK-NEXT: [# retired], [# cycles] -# CHECK-NEXT: 0, 56 (58.9%) -# CHECK-NEXT: 1, 12 (12.6%) -# CHECK-NEXT: 2, 13 (13.7%) -# CHECK-NEXT: 3, 3 (3.2%) -# CHECK-NEXT: 4, 6 (6.3%) -# CHECK-NEXT: 6, 1 (1.1%) -# CHECK-NEXT: 8, 1 (1.1%) -# CHECK-NEXT: 9, 2 (2.1%) -# CHECK-NEXT: 10, 1 (1.1%) - -# CHECK: Total ROB Entries: 192 -# CHECK-NEXT: Max Used ROB Entries: 100 ( 52.1% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 61 ( 31.8% ) - -# CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 110 -# CHECK-NEXT: Max number of mappings used: 100 - # CHECK: Resources: # CHECK-NEXT: [0] - GenericOOODIV # CHECK-NEXT: [1.0] - GenericOOOFPU @@ -423,24 +378,24 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: - - 1.00 - - - - - fcvt.lu.s a1, ft1 # CHECK-NEXT: - 1.00 - - - - - - fcvt.s.l ft2, a2 # CHECK-NEXT: - 1.00 - - - - - - fcvt.s.lu ft3, a3 -# CHECK-NEXT: - - 1.00 - - - - - fmv.x.w a2, fs7 -# CHECK-NEXT: - 1.00 - - - - - - fmv.w.x ft1, a6 -# CHECK-NEXT: - - 1.00 - - - - - fsgnj.s fs1, fa0, fa1 -# CHECK-NEXT: - 1.00 - - - - - - fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: - 1.00 - - - - - - fmv.x.w a2, fs7 +# CHECK-NEXT: - - 1.00 - - - - - fmv.w.x ft1, a6 +# CHECK-NEXT: - 1.00 - - - - - - fsgnj.s fs1, fa0, fa1 +# CHECK-NEXT: - - 1.00 - - - - - fsgnjn.s fa1, fa3, fa4 # CHECK-NEXT: - 1.00 - - - - - - fcvt.wu.d a4, ft11 # CHECK-NEXT: - - 1.00 - - - - - fcvt.w.d a4, ft11 # CHECK-NEXT: - 1.00 - - - - - - fcvt.d.w ft0, a5 # CHECK-NEXT: - 1.00 - - - - - - fcvt.d.wu ft1, a6 # CHECK-NEXT: - - 1.00 - - - - - fcvt.s.d fs5, fs6 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.s fs7, fs8 -# CHECK-NEXT: - - 1.00 - - - - - fcvt.l.d a0, ft0 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.l.d a0, ft0 # CHECK-NEXT: - - 1.00 - - - - - fcvt.lu.d a1, ft1 # CHECK-NEXT: - 1.00 - - - - - - fcvt.d.l ft3, a3 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.lu ft4, a4 -# CHECK-NEXT: - - 1.00 - - - - - fmv.x.d a2, ft2 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.d.lu ft4, a4 +# CHECK-NEXT: - 1.00 - - - - - - fmv.x.d a2, ft2 # CHECK-NEXT: - 1.00 - - - - - - fmv.d.x ft5, a5 # CHECK-NEXT: - - 1.00 - - - - - fsgnj.d fs1, fa0, fa1 -# CHECK-NEXT: - 1.00 - - - - - - fsgnjn.d fa1, fa3, fa4 +# CHECK-NEXT: - - 1.00 - - - - - fsgnjn.d fa1, fa3, fa4 # CHECK-NEXT: - 1.00 - - - - - - feq.h a1, fs8, fs9 # CHECK-NEXT: - - 1.00 - - - - - flt.h a2, fs10, fs11 # CHECK-NEXT: - 1.00 - - - - - - fle.h a3, ft8, ft9 diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s index f58a859885f58..40e524c3f5f7d 100644 --- a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zbc,+zbkb,+zbkx -mcpu=generic-ooo --all-stats -iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=riscv64 -mattr=+rva23u64,+zbc,+zbkb,+zbkx -mcpu=generic-ooo -iterations=1 < %s | FileCheck %s # Integer Register-Immediate Instructions addi a0, a0, 1 @@ -326,51 +326,6 @@ xperm4 a0, a1, a2 # CHECK-NEXT: 1 1 0.25 xperm8 a0, a1, a2 # CHECK-NEXT: 1 1 0.25 xperm4 a0, a1, a2 -# CHECK: Dynamic Dispatch Stall Cycles: -# CHECK-NEXT: RAT - Register unavailable: 0 -# CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 0 -# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 -# CHECK-NEXT: USH - Uncategorised Structural Hazard: 0 - -# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: -# CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 134 (86.5%) -# CHECK-NEXT: 1, 1 (0.6%) -# CHECK-NEXT: 6, 20 (12.9%) - -# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: -# CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 102 (65.8%) -# CHECK-NEXT: 1, 27 (17.4%) -# CHECK-NEXT: 2, 4 (2.6%) -# CHECK-NEXT: 3, 13 (8.4%) -# CHECK-NEXT: 4, 3 (1.9%) -# CHECK-NEXT: 5, 1 (0.6%) -# CHECK-NEXT: 6, 5 (3.2%) - -# CHECK: Scheduler's queue usage: -# CHECK-NEXT: No scheduler resources used. - -# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: -# CHECK-NEXT: [# retired], [# cycles] -# CHECK-NEXT: 0, 132 (85.2%) -# CHECK-NEXT: 1, 19 (12.3%) -# CHECK-NEXT: 2, 1 (0.6%) -# CHECK-NEXT: 13, 1 (0.6%) -# CHECK-NEXT: 28, 1 (0.6%) -# CHECK-NEXT: 59, 1 (0.6%) - -# CHECK: Total ROB Entries: 192 -# CHECK-NEXT: Max Used ROB Entries: 91 ( 47.4% ) -# CHECK-NEXT: Average Used ROB Entries per cy: 75 ( 39.1% ) - -# CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 111 -# CHECK-NEXT: Max number of mappings used: 81 - # CHECK: Resources: # CHECK-NEXT: [0] - GenericOOODIV # CHECK-NEXT: [1.0] - GenericOOOFPU From 06d9ce24a8970779aaba060ae49d035a6b97dbd8 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 6 Feb 2025 17:33:28 +0800 Subject: [PATCH 4/7] Adjust branch/mul/div units --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 62 ++-- .../tools/llvm-mca/RISCV/GenericOOO/atomic.s | 342 +++++++++--------- .../RISCV/GenericOOO/floating-point.s | 168 ++++----- .../tools/llvm-mca/RISCV/GenericOOO/integer.s | 250 ++++++------- 4 files changed, 414 insertions(+), 408 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td index 06e36a062a46e..05721552e3090 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td +++ b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td @@ -10,7 +10,7 @@ // We assume that: // * 6-issue out-of-order CPU with 192 ROB entries. // * Units: -// * IXU (Integer ALU Unit): 4 units, only one can execute division. +// * IXU (Integer ALU Unit): 4 units, only one can execute mul/div. // * FXU (Floating-point Unit): 2 units. // * LSU (Load/Store Unit): 2 units. // * VXU (Vector Unit): 1 unit. @@ -39,20 +39,24 @@ let SchedModel = GenericOOOModel in { //===----------------------------------------------------------------------===// // Resource groups //===----------------------------------------------------------------------===// -def GenericOOODIV : ProcResource<1>; -def GenericOOOIXU : ProcResource<3>; -def GenericOOOALU : ProcResGroup<[GenericOOODIV, GenericOOOIXU]>; +def GenericOOOBranch : ProcResource<1>; +def GenericOOOMulDiv : ProcResource<1>; +def GenericOOOInt : ProcResource<2>; +def GenericOOOALU + : ProcResGroup<[GenericOOOBranch, GenericOOOMulDiv, GenericOOOInt]>; def GenericOOOLSU : ProcResource<2>; -def GenericOOOFPU : ProcResource<2>; +def GenericOOOFMulDiv : ProcResource<1>; +def GenericOOOFloat : ProcResource<1>; +def GenericOOOFPU : ProcResGroup<[GenericOOOFMulDiv, GenericOOOFloat]>; // TODO: Add vector scheduling. // def GenericOOOVXU : ProcResource<1>; //===----------------------------------------------------------------------===// // Branches //===----------------------------------------------------------------------===// -def : WriteRes; -def : WriteRes; -def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; //===----------------------------------------------------------------------===// // Integer arithmetic and logic @@ -68,26 +72,26 @@ def : WriteRes; // Integer multiplication //===----------------------------------------------------------------------===// let Latency = 4 in { - def : WriteRes; - def : WriteRes; + def : WriteRes; + def : WriteRes; } //===----------------------------------------------------------------------===// // Integer division //===----------------------------------------------------------------------===// -def : WriteRes { +def : WriteRes { let Latency = 13; let ReleaseAtCycles = [13]; } -def : WriteRes { +def : WriteRes { let Latency = 21; let ReleaseAtCycles = [21]; } -def : WriteRes { +def : WriteRes { let Latency = 13; let ReleaseAtCycles = [13]; } -def : WriteRes { +def : WriteRes { let Latency = 21; let ReleaseAtCycles = [21]; } @@ -144,16 +148,6 @@ let Latency = 2 in { def : WriteRes; } -let Latency = 4 in { - def : WriteRes; - def : WriteRes; -} - -let Latency = 6 in { - def : WriteRes; - def : WriteRes; -} - def : WriteRes; def : WriteRes; def : WriteRes; @@ -165,15 +159,27 @@ let Latency = 2 in { def : WriteRes; } +// Multiplication +let Latency = 4 in { + def : WriteRes; + def : WriteRes; +} + +// FMA +let Latency = 6 in { + def : WriteRes; + def : WriteRes; +} + // Division let Latency = 13, ReleaseAtCycles = [13] in { - def : WriteRes; - def : WriteRes; + def : WriteRes; + def : WriteRes; } let Latency = 17, ReleaseAtCycles = [17] in { - def : WriteRes; - def : WriteRes; + def : WriteRes; + def : WriteRes; } // Conversions diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s index 60532fae10f2f..2f8710175a6e9 100644 --- a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/atomic.s @@ -385,178 +385,178 @@ amomaxu.h.aqrl s5, s4, (s3) # CHECK-NEXT: 1 5 0.50 * * amomaxu.h.aqrl s5, s4, (s3) # CHECK: Resources: -# CHECK-NEXT: [0] - GenericOOODIV -# CHECK-NEXT: [1.0] - GenericOOOFPU -# CHECK-NEXT: [1.1] - GenericOOOFPU -# CHECK-NEXT: [2.0] - GenericOOOIXU -# CHECK-NEXT: [2.1] - GenericOOOIXU -# CHECK-NEXT: [2.2] - GenericOOOIXU -# CHECK-NEXT: [3.0] - GenericOOOLSU -# CHECK-NEXT: [3.1] - GenericOOOLSU +# CHECK-NEXT: [0] - GenericOOOBranch +# CHECK-NEXT: [1] - GenericOOOFMulDiv +# CHECK-NEXT: [2] - GenericOOOFloat +# CHECK-NEXT: [3.0] - GenericOOOInt +# CHECK-NEXT: [3.1] - GenericOOOInt +# CHECK-NEXT: [4.0] - GenericOOOLSU +# CHECK-NEXT: [4.1] - GenericOOOLSU +# CHECK-NEXT: [5] - GenericOOOMulDiv # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] -# CHECK-NEXT: - - - - - - 80.00 80.00 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5] +# CHECK-NEXT: - - - - - 80.00 80.00 - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] Instructions: -# CHECK-NEXT: - - - - - - - 1.00 lr.w t0, (t1) -# CHECK-NEXT: - - - - - - 1.00 - lr.w.aq t1, (t2) -# CHECK-NEXT: - - - - - - - 1.00 lr.w.rl t2, (t3) -# CHECK-NEXT: - - - - - - 1.00 - lr.w.aqrl t3, (t4) -# CHECK-NEXT: - - - - - - - 1.00 sc.w t6, t5, (t4) -# CHECK-NEXT: - - - - - - - 1.00 sc.w.aq t5, t4, (t3) -# CHECK-NEXT: - - - - - - 1.00 - sc.w.rl t4, t3, (t2) -# CHECK-NEXT: - - - - - - - 1.00 sc.w.aqrl t3, t2, (t1) -# CHECK-NEXT: - - - - - - 1.00 - lr.d t0, (t1) -# CHECK-NEXT: - - - - - - 1.00 - lr.d.aq t1, (t2) -# CHECK-NEXT: - - - - - - - 1.00 lr.d.rl t2, (t3) -# CHECK-NEXT: - - - - - - 1.00 - lr.d.aqrl t3, (t4) -# CHECK-NEXT: - - - - - - - 1.00 sc.d t6, t5, (t4) -# CHECK-NEXT: - - - - - - 1.00 - sc.d.aq t5, t4, (t3) -# CHECK-NEXT: - - - - - - - 1.00 sc.d.rl t4, t3, (t2) -# CHECK-NEXT: - - - - - - 1.00 - sc.d.aqrl t3, t2, (t1) -# CHECK-NEXT: - - - - - - - 1.00 amoswap.w a4, ra, (s0) -# CHECK-NEXT: - - - - - - 1.00 - amoadd.w a1, a2, (a3) -# CHECK-NEXT: - - - - - - - 1.00 amoxor.w a2, a3, (a4) -# CHECK-NEXT: - - - - - - 1.00 - amoand.w a3, a4, (a5) -# CHECK-NEXT: - - - - - - - 1.00 amoor.w a4, a5, (a6) -# CHECK-NEXT: - - - - - - 1.00 - amomin.w a5, a6, (a7) -# CHECK-NEXT: - - - - - - - 1.00 amomax.w s7, s6, (s5) -# CHECK-NEXT: - - - - - - 1.00 - amominu.w s6, s5, (s4) -# CHECK-NEXT: - - - - - - - 1.00 amomaxu.w s5, s4, (s3) -# CHECK-NEXT: - - - - - - 1.00 - amoswap.w.aq a4, ra, (s0) -# CHECK-NEXT: - - - - - - - 1.00 amoadd.w.aq a1, a2, (a3) -# CHECK-NEXT: - - - - - - 1.00 - amoxor.w.aq a2, a3, (a4) -# CHECK-NEXT: - - - - - - - 1.00 amoand.w.aq a3, a4, (a5) -# CHECK-NEXT: - - - - - - 1.00 - amoor.w.aq a4, a5, (a6) -# CHECK-NEXT: - - - - - - - 1.00 amomin.w.aq a5, a6, (a7) -# CHECK-NEXT: - - - - - - 1.00 - amomax.w.aq s7, s6, (s5) -# CHECK-NEXT: - - - - - - - 1.00 amominu.w.aq s6, s5, (s4) -# CHECK-NEXT: - - - - - - 1.00 - amomaxu.w.aq s5, s4, (s3) -# CHECK-NEXT: - - - - - - - 1.00 amoswap.w.rl a4, ra, (s0) -# CHECK-NEXT: - - - - - - 1.00 - amoadd.w.rl a1, a2, (a3) -# CHECK-NEXT: - - - - - - - 1.00 amoxor.w.rl a2, a3, (a4) -# CHECK-NEXT: - - - - - - 1.00 - amoand.w.rl a3, a4, (a5) -# CHECK-NEXT: - - - - - - - 1.00 amoor.w.rl a4, a5, (a6) -# CHECK-NEXT: - - - - - - 1.00 - amomin.w.rl a5, a6, (a7) -# CHECK-NEXT: - - - - - - - 1.00 amomax.w.rl s7, s6, (s5) -# CHECK-NEXT: - - - - - - 1.00 - amominu.w.rl s6, s5, (s4) -# CHECK-NEXT: - - - - - - - 1.00 amomaxu.w.rl s5, s4, (s3) -# CHECK-NEXT: - - - - - - 1.00 - amoswap.w.aqrl a4, ra, (s0) -# CHECK-NEXT: - - - - - - - 1.00 amoadd.w.aqrl a1, a2, (a3) -# CHECK-NEXT: - - - - - - 1.00 - amoxor.w.aqrl a2, a3, (a4) -# CHECK-NEXT: - - - - - - - 1.00 amoand.w.aqrl a3, a4, (a5) -# CHECK-NEXT: - - - - - - 1.00 - amoor.w.aqrl a4, a5, (a6) -# CHECK-NEXT: - - - - - - - 1.00 amomin.w.aqrl a5, a6, (a7) -# CHECK-NEXT: - - - - - - 1.00 - amomax.w.aqrl s7, s6, (s5) -# CHECK-NEXT: - - - - - - - 1.00 amominu.w.aqrl s6, s5, (s4) -# CHECK-NEXT: - - - - - - 1.00 - amomaxu.w.aqrl s5, s4, (s3) -# CHECK-NEXT: - - - - - - - 1.00 amoswap.d a4, ra, (s0) -# CHECK-NEXT: - - - - - - 1.00 - amoadd.d a1, a2, (a3) -# CHECK-NEXT: - - - - - - - 1.00 amoxor.d a2, a3, (a4) -# CHECK-NEXT: - - - - - - 1.00 - amoand.d a3, a4, (a5) -# CHECK-NEXT: - - - - - - - 1.00 amoor.d a4, a5, (a6) -# CHECK-NEXT: - - - - - - 1.00 - amomin.d a5, a6, (a7) -# CHECK-NEXT: - - - - - - - 1.00 amomax.d s7, s6, (s5) -# CHECK-NEXT: - - - - - - 1.00 - amominu.d s6, s5, (s4) -# CHECK-NEXT: - - - - - - - 1.00 amomaxu.d s5, s4, (s3) -# CHECK-NEXT: - - - - - - 1.00 - amoswap.d.aq a4, ra, (s0) -# CHECK-NEXT: - - - - - - - 1.00 amoadd.d.aq a1, a2, (a3) -# CHECK-NEXT: - - - - - - 1.00 - amoxor.d.aq a2, a3, (a4) -# CHECK-NEXT: - - - - - - - 1.00 amoand.d.aq a3, a4, (a5) -# CHECK-NEXT: - - - - - - 1.00 - amoor.d.aq a4, a5, (a6) -# CHECK-NEXT: - - - - - - - 1.00 amomin.d.aq a5, a6, (a7) -# CHECK-NEXT: - - - - - - 1.00 - amomax.d.aq s7, s6, (s5) -# CHECK-NEXT: - - - - - - - 1.00 amominu.d.aq s6, s5, (s4) -# CHECK-NEXT: - - - - - - 1.00 - amomaxu.d.aq s5, s4, (s3) -# CHECK-NEXT: - - - - - - - 1.00 amoswap.d.rl a4, ra, (s0) -# CHECK-NEXT: - - - - - - 1.00 - amoadd.d.rl a1, a2, (a3) -# CHECK-NEXT: - - - - - - - 1.00 amoxor.d.rl a2, a3, (a4) -# CHECK-NEXT: - - - - - - 1.00 - amoand.d.rl a3, a4, (a5) -# CHECK-NEXT: - - - - - - - 1.00 amoor.d.rl a4, a5, (a6) -# CHECK-NEXT: - - - - - - 1.00 - amomin.d.rl a5, a6, (a7) -# CHECK-NEXT: - - - - - - - 1.00 amomax.d.rl s7, s6, (s5) -# CHECK-NEXT: - - - - - - 1.00 - amominu.d.rl s6, s5, (s4) -# CHECK-NEXT: - - - - - - - 1.00 amomaxu.d.rl s5, s4, (s3) -# CHECK-NEXT: - - - - - - 1.00 - amoswap.d.aqrl a4, ra, (s0) -# CHECK-NEXT: - - - - - - - 1.00 amoadd.d.aqrl a1, a2, (a3) -# CHECK-NEXT: - - - - - - 1.00 - amoxor.d.aqrl a2, a3, (a4) -# CHECK-NEXT: - - - - - - - 1.00 amoand.d.aqrl a3, a4, (a5) -# CHECK-NEXT: - - - - - - 1.00 - amoor.d.aqrl a4, a5, (a6) -# CHECK-NEXT: - - - - - - - 1.00 amomin.d.aqrl a5, a6, (a7) -# CHECK-NEXT: - - - - - - 1.00 - amomax.d.aqrl s7, s6, (s5) -# CHECK-NEXT: - - - - - - - 1.00 amominu.d.aqrl s6, s5, (s4) -# CHECK-NEXT: - - - - - - 1.00 - amomaxu.d.aqrl s5, s4, (s3) -# CHECK-NEXT: - - - - - - - 1.00 amoswap.b a4, ra, (s0) -# CHECK-NEXT: - - - - - - 1.00 - amoadd.b a1, a2, (a3) -# CHECK-NEXT: - - - - - - - 1.00 amoxor.b a2, a3, (a4) -# CHECK-NEXT: - - - - - - 1.00 - amoand.b a3, a4, (a5) -# CHECK-NEXT: - - - - - - - 1.00 amoor.b a4, a5, (a6) -# CHECK-NEXT: - - - - - - 1.00 - amomin.b a5, a6, (a7) -# CHECK-NEXT: - - - - - - - 1.00 amomax.b s7, s6, (s5) -# CHECK-NEXT: - - - - - - 1.00 - amominu.b s6, s5, (s4) -# CHECK-NEXT: - - - - - - - 1.00 amomaxu.b s5, s4, (s3) -# CHECK-NEXT: - - - - - - 1.00 - amoswap.b.aq a4, ra, (s0) -# CHECK-NEXT: - - - - - - - 1.00 amoadd.b.aq a1, a2, (a3) -# CHECK-NEXT: - - - - - - 1.00 - amoxor.b.aq a2, a3, (a4) -# CHECK-NEXT: - - - - - - - 1.00 amoand.b.aq a3, a4, (a5) -# CHECK-NEXT: - - - - - - 1.00 - amoor.b.aq a4, a5, (a6) -# CHECK-NEXT: - - - - - - - 1.00 amomin.b.aq a5, a6, (a7) -# CHECK-NEXT: - - - - - - 1.00 - amomax.b.aq s7, s6, (s5) -# CHECK-NEXT: - - - - - - - 1.00 amominu.b.aq s6, s5, (s4) -# CHECK-NEXT: - - - - - - 1.00 - amomaxu.b.aq s5, s4, (s3) -# CHECK-NEXT: - - - - - - - 1.00 amoswap.b.rl a4, ra, (s0) -# CHECK-NEXT: - - - - - - 1.00 - amoadd.b.rl a1, a2, (a3) -# CHECK-NEXT: - - - - - - - 1.00 amoxor.b.rl a2, a3, (a4) -# CHECK-NEXT: - - - - - - 1.00 - amoand.b.rl a3, a4, (a5) -# CHECK-NEXT: - - - - - - - 1.00 amoor.b.rl a4, a5, (a6) -# CHECK-NEXT: - - - - - - 1.00 - amomin.b.rl a5, a6, (a7) -# CHECK-NEXT: - - - - - - - 1.00 amomax.b.rl s7, s6, (s5) -# CHECK-NEXT: - - - - - - 1.00 - amominu.b.rl s6, s5, (s4) -# CHECK-NEXT: - - - - - - - 1.00 amomaxu.b.rl s5, s4, (s3) -# CHECK-NEXT: - - - - - - 1.00 - amoswap.b.aqrl a4, ra, (s0) -# CHECK-NEXT: - - - - - - - 1.00 amoadd.b.aqrl a1, a2, (a3) -# CHECK-NEXT: - - - - - - 1.00 - amoxor.b.aqrl a2, a3, (a4) -# CHECK-NEXT: - - - - - - - 1.00 amoand.b.aqrl a3, a4, (a5) -# CHECK-NEXT: - - - - - - 1.00 - amoor.b.aqrl a4, a5, (a6) -# CHECK-NEXT: - - - - - - - 1.00 amomin.b.aqrl a5, a6, (a7) -# CHECK-NEXT: - - - - - - 1.00 - amomax.b.aqrl s7, s6, (s5) -# CHECK-NEXT: - - - - - - - 1.00 amominu.b.aqrl s6, s5, (s4) -# CHECK-NEXT: - - - - - - 1.00 - amomaxu.b.aqrl s5, s4, (s3) -# CHECK-NEXT: - - - - - - - 1.00 amoswap.h a4, ra, (s0) -# CHECK-NEXT: - - - - - - 1.00 - amoadd.h a1, a2, (a3) -# CHECK-NEXT: - - - - - - - 1.00 amoxor.h a2, a3, (a4) -# CHECK-NEXT: - - - - - - 1.00 - amoand.h a3, a4, (a5) -# CHECK-NEXT: - - - - - - - 1.00 amoor.h a4, a5, (a6) -# CHECK-NEXT: - - - - - - 1.00 - amomin.h a5, a6, (a7) -# CHECK-NEXT: - - - - - - - 1.00 amomax.h s7, s6, (s5) -# CHECK-NEXT: - - - - - - 1.00 - amominu.h s6, s5, (s4) -# CHECK-NEXT: - - - - - - - 1.00 amomaxu.h s5, s4, (s3) -# CHECK-NEXT: - - - - - - 1.00 - amoswap.h.aq a4, ra, (s0) -# CHECK-NEXT: - - - - - - - 1.00 amoadd.h.aq a1, a2, (a3) -# CHECK-NEXT: - - - - - - 1.00 - amoxor.h.aq a2, a3, (a4) -# CHECK-NEXT: - - - - - - - 1.00 amoand.h.aq a3, a4, (a5) -# CHECK-NEXT: - - - - - - 1.00 - amoor.h.aq a4, a5, (a6) -# CHECK-NEXT: - - - - - - - 1.00 amomin.h.aq a5, a6, (a7) -# CHECK-NEXT: - - - - - - 1.00 - amomax.h.aq s7, s6, (s5) -# CHECK-NEXT: - - - - - - - 1.00 amominu.h.aq s6, s5, (s4) -# CHECK-NEXT: - - - - - - 1.00 - amomaxu.h.aq s5, s4, (s3) -# CHECK-NEXT: - - - - - - - 1.00 amoswap.h.rl a4, ra, (s0) -# CHECK-NEXT: - - - - - - 1.00 - amoadd.h.rl a1, a2, (a3) -# CHECK-NEXT: - - - - - - - 1.00 amoxor.h.rl a2, a3, (a4) -# CHECK-NEXT: - - - - - - 1.00 - amoand.h.rl a3, a4, (a5) -# CHECK-NEXT: - - - - - - - 1.00 amoor.h.rl a4, a5, (a6) -# CHECK-NEXT: - - - - - - 1.00 - amomin.h.rl a5, a6, (a7) -# CHECK-NEXT: - - - - - - - 1.00 amomax.h.rl s7, s6, (s5) -# CHECK-NEXT: - - - - - - 1.00 - amominu.h.rl s6, s5, (s4) -# CHECK-NEXT: - - - - - - - 1.00 amomaxu.h.rl s5, s4, (s3) -# CHECK-NEXT: - - - - - - 1.00 - amoswap.h.aqrl a4, ra, (s0) -# CHECK-NEXT: - - - - - - - 1.00 amoadd.h.aqrl a1, a2, (a3) -# CHECK-NEXT: - - - - - - 1.00 - amoxor.h.aqrl a2, a3, (a4) -# CHECK-NEXT: - - - - - - - 1.00 amoand.h.aqrl a3, a4, (a5) -# CHECK-NEXT: - - - - - - 1.00 - amoor.h.aqrl a4, a5, (a6) -# CHECK-NEXT: - - - - - - - 1.00 amomin.h.aqrl a5, a6, (a7) -# CHECK-NEXT: - - - - - - 1.00 - amomax.h.aqrl s7, s6, (s5) -# CHECK-NEXT: - - - - - - - 1.00 amominu.h.aqrl s6, s5, (s4) -# CHECK-NEXT: - - - - - - 1.00 - amomaxu.h.aqrl s5, s4, (s3) +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5] Instructions: +# CHECK-NEXT: - - - - - - 1.00 - lr.w t0, (t1) +# CHECK-NEXT: - - - - - 1.00 - - lr.w.aq t1, (t2) +# CHECK-NEXT: - - - - - - 1.00 - lr.w.rl t2, (t3) +# CHECK-NEXT: - - - - - 1.00 - - lr.w.aqrl t3, (t4) +# CHECK-NEXT: - - - - - - 1.00 - sc.w t6, t5, (t4) +# CHECK-NEXT: - - - - - - 1.00 - sc.w.aq t5, t4, (t3) +# CHECK-NEXT: - - - - - 1.00 - - sc.w.rl t4, t3, (t2) +# CHECK-NEXT: - - - - - - 1.00 - sc.w.aqrl t3, t2, (t1) +# CHECK-NEXT: - - - - - 1.00 - - lr.d t0, (t1) +# CHECK-NEXT: - - - - - 1.00 - - lr.d.aq t1, (t2) +# CHECK-NEXT: - - - - - - 1.00 - lr.d.rl t2, (t3) +# CHECK-NEXT: - - - - - 1.00 - - lr.d.aqrl t3, (t4) +# CHECK-NEXT: - - - - - - 1.00 - sc.d t6, t5, (t4) +# CHECK-NEXT: - - - - - 1.00 - - sc.d.aq t5, t4, (t3) +# CHECK-NEXT: - - - - - - 1.00 - sc.d.rl t4, t3, (t2) +# CHECK-NEXT: - - - - - 1.00 - - sc.d.aqrl t3, t2, (t1) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.w a4, ra, (s0) +# CHECK-NEXT: - - - - - 1.00 - - amoadd.w a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.w a2, a3, (a4) +# CHECK-NEXT: - - - - - 1.00 - - amoand.w a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.w a4, a5, (a6) +# CHECK-NEXT: - - - - - 1.00 - - amomin.w a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.w s7, s6, (s5) +# CHECK-NEXT: - - - - - 1.00 - - amominu.w s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.w s5, s4, (s3) +# CHECK-NEXT: - - - - - 1.00 - - amoswap.w.aq a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.w.aq a1, a2, (a3) +# CHECK-NEXT: - - - - - 1.00 - - amoxor.w.aq a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.w.aq a3, a4, (a5) +# CHECK-NEXT: - - - - - 1.00 - - amoor.w.aq a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.w.aq a5, a6, (a7) +# CHECK-NEXT: - - - - - 1.00 - - amomax.w.aq s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.w.aq s6, s5, (s4) +# CHECK-NEXT: - - - - - 1.00 - - amomaxu.w.aq s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.w.rl a4, ra, (s0) +# CHECK-NEXT: - - - - - 1.00 - - amoadd.w.rl a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.w.rl a2, a3, (a4) +# CHECK-NEXT: - - - - - 1.00 - - amoand.w.rl a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.w.rl a4, a5, (a6) +# CHECK-NEXT: - - - - - 1.00 - - amomin.w.rl a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.w.rl s7, s6, (s5) +# CHECK-NEXT: - - - - - 1.00 - - amominu.w.rl s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.w.rl s5, s4, (s3) +# CHECK-NEXT: - - - - - 1.00 - - amoswap.w.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.w.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - - - 1.00 - - amoxor.w.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.w.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - - - 1.00 - - amoor.w.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.w.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - - - 1.00 - - amomax.w.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.w.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - - - 1.00 - - amomaxu.w.aqrl s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.d a4, ra, (s0) +# CHECK-NEXT: - - - - - 1.00 - - amoadd.d a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.d a2, a3, (a4) +# CHECK-NEXT: - - - - - 1.00 - - amoand.d a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.d a4, a5, (a6) +# CHECK-NEXT: - - - - - 1.00 - - amomin.d a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.d s7, s6, (s5) +# CHECK-NEXT: - - - - - 1.00 - - amominu.d s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.d s5, s4, (s3) +# CHECK-NEXT: - - - - - 1.00 - - amoswap.d.aq a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.d.aq a1, a2, (a3) +# CHECK-NEXT: - - - - - 1.00 - - amoxor.d.aq a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.d.aq a3, a4, (a5) +# CHECK-NEXT: - - - - - 1.00 - - amoor.d.aq a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.d.aq a5, a6, (a7) +# CHECK-NEXT: - - - - - 1.00 - - amomax.d.aq s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.d.aq s6, s5, (s4) +# CHECK-NEXT: - - - - - 1.00 - - amomaxu.d.aq s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.d.rl a4, ra, (s0) +# CHECK-NEXT: - - - - - 1.00 - - amoadd.d.rl a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.d.rl a2, a3, (a4) +# CHECK-NEXT: - - - - - 1.00 - - amoand.d.rl a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.d.rl a4, a5, (a6) +# CHECK-NEXT: - - - - - 1.00 - - amomin.d.rl a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.d.rl s7, s6, (s5) +# CHECK-NEXT: - - - - - 1.00 - - amominu.d.rl s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.d.rl s5, s4, (s3) +# CHECK-NEXT: - - - - - 1.00 - - amoswap.d.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.d.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - - - 1.00 - - amoxor.d.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.d.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - - - 1.00 - - amoor.d.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.d.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - - - 1.00 - - amomax.d.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.d.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - - - 1.00 - - amomaxu.d.aqrl s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.b a4, ra, (s0) +# CHECK-NEXT: - - - - - 1.00 - - amoadd.b a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.b a2, a3, (a4) +# CHECK-NEXT: - - - - - 1.00 - - amoand.b a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.b a4, a5, (a6) +# CHECK-NEXT: - - - - - 1.00 - - amomin.b a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.b s7, s6, (s5) +# CHECK-NEXT: - - - - - 1.00 - - amominu.b s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.b s5, s4, (s3) +# CHECK-NEXT: - - - - - 1.00 - - amoswap.b.aq a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.b.aq a1, a2, (a3) +# CHECK-NEXT: - - - - - 1.00 - - amoxor.b.aq a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.b.aq a3, a4, (a5) +# CHECK-NEXT: - - - - - 1.00 - - amoor.b.aq a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.b.aq a5, a6, (a7) +# CHECK-NEXT: - - - - - 1.00 - - amomax.b.aq s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.b.aq s6, s5, (s4) +# CHECK-NEXT: - - - - - 1.00 - - amomaxu.b.aq s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.b.rl a4, ra, (s0) +# CHECK-NEXT: - - - - - 1.00 - - amoadd.b.rl a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.b.rl a2, a3, (a4) +# CHECK-NEXT: - - - - - 1.00 - - amoand.b.rl a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.b.rl a4, a5, (a6) +# CHECK-NEXT: - - - - - 1.00 - - amomin.b.rl a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.b.rl s7, s6, (s5) +# CHECK-NEXT: - - - - - 1.00 - - amominu.b.rl s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.b.rl s5, s4, (s3) +# CHECK-NEXT: - - - - - 1.00 - - amoswap.b.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.b.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - - - 1.00 - - amoxor.b.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.b.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - - - 1.00 - - amoor.b.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.b.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - - - 1.00 - - amomax.b.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.b.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - - - 1.00 - - amomaxu.b.aqrl s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.h a4, ra, (s0) +# CHECK-NEXT: - - - - - 1.00 - - amoadd.h a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.h a2, a3, (a4) +# CHECK-NEXT: - - - - - 1.00 - - amoand.h a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.h a4, a5, (a6) +# CHECK-NEXT: - - - - - 1.00 - - amomin.h a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.h s7, s6, (s5) +# CHECK-NEXT: - - - - - 1.00 - - amominu.h s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.h s5, s4, (s3) +# CHECK-NEXT: - - - - - 1.00 - - amoswap.h.aq a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.h.aq a1, a2, (a3) +# CHECK-NEXT: - - - - - 1.00 - - amoxor.h.aq a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.h.aq a3, a4, (a5) +# CHECK-NEXT: - - - - - 1.00 - - amoor.h.aq a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.h.aq a5, a6, (a7) +# CHECK-NEXT: - - - - - 1.00 - - amomax.h.aq s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.h.aq s6, s5, (s4) +# CHECK-NEXT: - - - - - 1.00 - - amomaxu.h.aq s5, s4, (s3) +# CHECK-NEXT: - - - - - - 1.00 - amoswap.h.rl a4, ra, (s0) +# CHECK-NEXT: - - - - - 1.00 - - amoadd.h.rl a1, a2, (a3) +# CHECK-NEXT: - - - - - - 1.00 - amoxor.h.rl a2, a3, (a4) +# CHECK-NEXT: - - - - - 1.00 - - amoand.h.rl a3, a4, (a5) +# CHECK-NEXT: - - - - - - 1.00 - amoor.h.rl a4, a5, (a6) +# CHECK-NEXT: - - - - - 1.00 - - amomin.h.rl a5, a6, (a7) +# CHECK-NEXT: - - - - - - 1.00 - amomax.h.rl s7, s6, (s5) +# CHECK-NEXT: - - - - - 1.00 - - amominu.h.rl s6, s5, (s4) +# CHECK-NEXT: - - - - - - 1.00 - amomaxu.h.rl s5, s4, (s3) +# CHECK-NEXT: - - - - - 1.00 - - amoswap.h.aqrl a4, ra, (s0) +# CHECK-NEXT: - - - - - - 1.00 - amoadd.h.aqrl a1, a2, (a3) +# CHECK-NEXT: - - - - - 1.00 - - amoxor.h.aqrl a2, a3, (a4) +# CHECK-NEXT: - - - - - - 1.00 - amoand.h.aqrl a3, a4, (a5) +# CHECK-NEXT: - - - - - 1.00 - - amoor.h.aqrl a4, a5, (a6) +# CHECK-NEXT: - - - - - - 1.00 - amomin.h.aqrl a5, a6, (a7) +# CHECK-NEXT: - - - - - 1.00 - - amomax.h.aqrl s7, s6, (s5) +# CHECK-NEXT: - - - - - - 1.00 - amominu.h.aqrl s6, s5, (s4) +# CHECK-NEXT: - - - - - 1.00 - - amomaxu.h.aqrl s5, s4, (s3) diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s index 23efebe3f9a26..50e1d5a89a7f6 100644 --- a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s @@ -176,13 +176,13 @@ fgeq.d a1, ft1, ft2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 113 -# CHECK-NEXT: Total Cycles: 95 +# CHECK-NEXT: Total Cycles: 97 # CHECK-NEXT: Total uOps: 113 # CHECK: Dispatch Width: 6 -# CHECK-NEXT: uOps Per Cycle: 1.19 -# CHECK-NEXT: IPC: 1.19 -# CHECK-NEXT: Block RThroughput: 89.5 +# CHECK-NEXT: uOps Per Cycle: 1.16 +# CHECK-NEXT: IPC: 1.16 +# CHECK-NEXT: Block RThroughput: 70.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -212,26 +212,26 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: 1 6 0.50 fnmadd.h fs6, fs7, fs8, fs9 # CHECK-NEXT: 1 2 0.50 fadd.s fs10, fs11, ft8 # CHECK-NEXT: 1 2 0.50 fsub.s ft9, ft10, ft11 -# CHECK-NEXT: 1 4 0.50 fmul.s ft0, ft1, ft2 -# CHECK-NEXT: 1 13 6.50 fdiv.s ft3, ft4, ft5 -# CHECK-NEXT: 1 13 6.50 fsqrt.s ft6, ft7 +# CHECK-NEXT: 1 4 1.00 fmul.s ft0, ft1, ft2 +# CHECK-NEXT: 1 13 13.00 fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: 1 13 13.00 fsqrt.s ft6, ft7 # CHECK-NEXT: 1 1 0.50 fmin.s fa5, fa6, fa7 # CHECK-NEXT: 1 1 0.50 fmax.s fs2, fs3, fs4 -# CHECK-NEXT: 1 6 0.50 fmadd.s fa0, fa1, fa2, ft11 -# CHECK-NEXT: 1 6 0.50 fmsub.s fa4, fa5, fa6, fa7 -# CHECK-NEXT: 1 6 0.50 fnmsub.s fs2, fs3, fs4, fs5 -# CHECK-NEXT: 1 6 0.50 fnmadd.s fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 6 1.00 fmadd.s fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 6 1.00 fmsub.s fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 6 1.00 fnmsub.s fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 6 1.00 fnmadd.s fs6, fs7, fs8, fs9 # CHECK-NEXT: 1 2 0.50 fadd.d fs10, fs11, ft8 # CHECK-NEXT: 1 2 0.50 fsub.d ft9, ft10, ft11 -# CHECK-NEXT: 1 4 0.50 fmul.d ft0, ft1, ft2 -# CHECK-NEXT: 1 17 8.50 fdiv.d ft3, ft4, ft5 -# CHECK-NEXT: 1 17 8.50 fsqrt.d ft6, ft7 +# CHECK-NEXT: 1 4 1.00 fmul.d ft0, ft1, ft2 +# CHECK-NEXT: 1 17 17.00 fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: 1 17 17.00 fsqrt.d ft6, ft7 # CHECK-NEXT: 1 1 0.50 fmin.d fa5, fa6, fa7 # CHECK-NEXT: 1 1 0.50 fmax.d fs2, fs3, fs4 -# CHECK-NEXT: 1 6 0.50 fmadd.d fa0, fa1, fa2, ft11 -# CHECK-NEXT: 1 6 0.50 fmsub.d fa4, fa5, fa6, fa7 -# CHECK-NEXT: 1 6 0.50 fnmsub.d fs2, fs3, fs4, fs5 -# CHECK-NEXT: 1 6 0.50 fnmadd.d fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 6 1.00 fmadd.d fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 6 1.00 fmsub.d fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 6 1.00 fnmsub.d fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 6 1.00 fnmadd.d fs6, fs7, fs8, fs9 # CHECK-NEXT: 1 2 0.50 fmv.x.h a2, fs7 # CHECK-NEXT: 1 2 0.50 fmv.h.x ft1, a6 # CHECK-NEXT: 1 2 0.50 fcvt.s.h fa0, ft0 @@ -308,27 +308,27 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: 1 2 0.50 fleq.d a1, ft2, ft1 # CHECK: Resources: -# CHECK-NEXT: [0] - GenericOOODIV -# CHECK-NEXT: [1.0] - GenericOOOFPU -# CHECK-NEXT: [1.1] - GenericOOOFPU -# CHECK-NEXT: [2.0] - GenericOOOIXU -# CHECK-NEXT: [2.1] - GenericOOOIXU -# CHECK-NEXT: [2.2] - GenericOOOIXU -# CHECK-NEXT: [3.0] - GenericOOOLSU -# CHECK-NEXT: [3.1] - GenericOOOLSU +# CHECK-NEXT: [0] - GenericOOOBranch +# CHECK-NEXT: [1] - GenericOOOFMulDiv +# CHECK-NEXT: [2] - GenericOOOFloat +# CHECK-NEXT: [3.0] - GenericOOOInt +# CHECK-NEXT: [3.1] - GenericOOOInt +# CHECK-NEXT: [4.0] - GenericOOOLSU +# CHECK-NEXT: [4.1] - GenericOOOLSU +# CHECK-NEXT: [5] - GenericOOOMulDiv # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] -# CHECK-NEXT: - 89.00 90.00 - - - 3.00 3.00 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5] +# CHECK-NEXT: - 90.00 89.00 - - 3.00 3.00 - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] Instructions: -# CHECK-NEXT: - - - - - - - 1.00 flh ft0, 0(a0) -# CHECK-NEXT: - - - - - - 1.00 - fsh ft0, 0(a0) -# CHECK-NEXT: - - - - - - 1.00 - flw ft0, 0(a0) -# CHECK-NEXT: - - - - - - - 1.00 fsw ft0, 0(a0) -# CHECK-NEXT: - - - - - - - 1.00 fld ft0, 0(a0) -# CHECK-NEXT: - - - - - - 1.00 - fsd ft0, 0(a0) +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5] Instructions: +# CHECK-NEXT: - - - - - - 1.00 - flh ft0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - fsh ft0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - flw ft0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - fsw ft0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - fld ft0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - fsd ft0, 0(a0) # CHECK-NEXT: - - 1.00 - - - - - fadd.h fs10, fs11, ft8 # CHECK-NEXT: - 1.00 - - - - - - fsub.h ft9, ft10, ft11 # CHECK-NEXT: - - 1.00 - - - - - fmul.h ft0, ft1, ft2 @@ -343,96 +343,96 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: - 1.00 - - - - - - fadd.s fs10, fs11, ft8 # CHECK-NEXT: - - 1.00 - - - - - fsub.s ft9, ft10, ft11 # CHECK-NEXT: - 1.00 - - - - - - fmul.s ft0, ft1, ft2 -# CHECK-NEXT: - - 13.00 - - - - - fdiv.s ft3, ft4, ft5 +# CHECK-NEXT: - 13.00 - - - - - - fdiv.s ft3, ft4, ft5 # CHECK-NEXT: - 13.00 - - - - - - fsqrt.s ft6, ft7 # CHECK-NEXT: - - 1.00 - - - - - fmin.s fa5, fa6, fa7 -# CHECK-NEXT: - 1.00 - - - - - - fmax.s fs2, fs3, fs4 -# CHECK-NEXT: - - 1.00 - - - - - fmadd.s fa0, fa1, fa2, ft11 +# CHECK-NEXT: - - 1.00 - - - - - fmax.s fs2, fs3, fs4 +# CHECK-NEXT: - 1.00 - - - - - - fmadd.s fa0, fa1, fa2, ft11 # CHECK-NEXT: - 1.00 - - - - - - fmsub.s fa4, fa5, fa6, fa7 -# CHECK-NEXT: - - 1.00 - - - - - fnmsub.s fs2, fs3, fs4, fs5 +# CHECK-NEXT: - 1.00 - - - - - - fnmsub.s fs2, fs3, fs4, fs5 # CHECK-NEXT: - 1.00 - - - - - - fnmadd.s fs6, fs7, fs8, fs9 # CHECK-NEXT: - - 1.00 - - - - - fadd.d fs10, fs11, ft8 -# CHECK-NEXT: - 1.00 - - - - - - fsub.d ft9, ft10, ft11 -# CHECK-NEXT: - - 1.00 - - - - - fmul.d ft0, ft1, ft2 -# CHECK-NEXT: - - 17.00 - - - - - fdiv.d ft3, ft4, ft5 +# CHECK-NEXT: - - 1.00 - - - - - fsub.d ft9, ft10, ft11 +# CHECK-NEXT: - 1.00 - - - - - - fmul.d ft0, ft1, ft2 +# CHECK-NEXT: - 17.00 - - - - - - fdiv.d ft3, ft4, ft5 # CHECK-NEXT: - 17.00 - - - - - - fsqrt.d ft6, ft7 # CHECK-NEXT: - - 1.00 - - - - - fmin.d fa5, fa6, fa7 -# CHECK-NEXT: - 1.00 - - - - - - fmax.d fs2, fs3, fs4 +# CHECK-NEXT: - - 1.00 - - - - - fmax.d fs2, fs3, fs4 # CHECK-NEXT: - 1.00 - - - - - - fmadd.d fa0, fa1, fa2, ft11 -# CHECK-NEXT: - - 1.00 - - - - - fmsub.d fa4, fa5, fa6, fa7 +# CHECK-NEXT: - 1.00 - - - - - - fmsub.d fa4, fa5, fa6, fa7 # CHECK-NEXT: - 1.00 - - - - - - fnmsub.d fs2, fs3, fs4, fs5 -# CHECK-NEXT: - - 1.00 - - - - - fnmadd.d fs6, fs7, fs8, fs9 -# CHECK-NEXT: - 1.00 - - - - - - fmv.x.h a2, fs7 +# CHECK-NEXT: - 1.00 - - - - - - fnmadd.d fs6, fs7, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - fmv.x.h a2, fs7 # CHECK-NEXT: - - 1.00 - - - - - fmv.h.x ft1, a6 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.h fa0, ft0 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.s.h fa0, ft0 # CHECK-NEXT: - - 1.00 - - - - - fcvt.s.h fa0, ft0, rup -# CHECK-NEXT: - 1.00 - - - - - - fcvt.h.s ft2, fa2 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.h fa0, ft0 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.h.s ft2, fa2 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.d.h fa0, ft0 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.h fa0, ft0, rup # CHECK-NEXT: - - 1.00 - - - - - fcvt.h.d ft2, fa2 # CHECK-NEXT: - - 1.00 - - - - - fcvt.w.s a0, fs5 -# CHECK-NEXT: - - 1.00 - - - - - fcvt.wu.s a1, fs6 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.w ft11, a4 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.wu ft0, a5 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.wu.s a1, fs6 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.s.w ft11, a4 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.s.wu ft0, a5 # CHECK-NEXT: - - 1.00 - - - - - fcvt.l.s a0, ft0 # CHECK-NEXT: - - 1.00 - - - - - fcvt.lu.s a1, ft1 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.l ft2, a2 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.lu ft3, a3 -# CHECK-NEXT: - 1.00 - - - - - - fmv.x.w a2, fs7 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.s.l ft2, a2 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.s.lu ft3, a3 +# CHECK-NEXT: - - 1.00 - - - - - fmv.x.w a2, fs7 # CHECK-NEXT: - - 1.00 - - - - - fmv.w.x ft1, a6 -# CHECK-NEXT: - 1.00 - - - - - - fsgnj.s fs1, fa0, fa1 -# CHECK-NEXT: - - 1.00 - - - - - fsgnjn.s fa1, fa3, fa4 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.wu.d a4, ft11 +# CHECK-NEXT: - - 1.00 - - - - - fsgnj.s fs1, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.wu.d a4, ft11 # CHECK-NEXT: - - 1.00 - - - - - fcvt.w.d a4, ft11 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.w ft0, a5 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.wu ft1, a6 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.d.w ft0, a5 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.d.wu ft1, a6 # CHECK-NEXT: - - 1.00 - - - - - fcvt.s.d fs5, fs6 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.s fs7, fs8 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.l.d a0, ft0 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.l.d a0, ft0 # CHECK-NEXT: - - 1.00 - - - - - fcvt.lu.d a1, ft1 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.d.l ft3, a3 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.d.l ft3, a3 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.lu ft4, a4 -# CHECK-NEXT: - 1.00 - - - - - - fmv.x.d a2, ft2 -# CHECK-NEXT: - 1.00 - - - - - - fmv.d.x ft5, a5 -# CHECK-NEXT: - - 1.00 - - - - - fsgnj.d fs1, fa0, fa1 +# CHECK-NEXT: - - 1.00 - - - - - fmv.x.d a2, ft2 +# CHECK-NEXT: - - 1.00 - - - - - fmv.d.x ft5, a5 +# CHECK-NEXT: - 1.00 - - - - - - fsgnj.d fs1, fa0, fa1 # CHECK-NEXT: - - 1.00 - - - - - fsgnjn.d fa1, fa3, fa4 -# CHECK-NEXT: - 1.00 - - - - - - feq.h a1, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - feq.h a1, fs8, fs9 # CHECK-NEXT: - - 1.00 - - - - - flt.h a2, fs10, fs11 -# CHECK-NEXT: - 1.00 - - - - - - fle.h a3, ft8, ft9 +# CHECK-NEXT: - - 1.00 - - - - - fle.h a3, ft8, ft9 # CHECK-NEXT: - - 1.00 - - - - - feq.s a1, fs8, fs9 -# CHECK-NEXT: - 1.00 - - - - - - flt.s a2, fs10, fs11 +# CHECK-NEXT: - - 1.00 - - - - - flt.s a2, fs10, fs11 # CHECK-NEXT: - - 1.00 - - - - - fle.s a3, ft8, ft9 -# CHECK-NEXT: - 1.00 - - - - - - feq.d a1, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - feq.d a1, fs8, fs9 # CHECK-NEXT: - - 1.00 - - - - - flt.d a2, fs10, fs11 # CHECK-NEXT: - - 1.00 - - - - - fle.d a3, ft8, ft9 -# CHECK-NEXT: - 1.00 - - - - - - fclass.s a3, ft10 # CHECK-NEXT: - - 1.00 - - - - - fclass.s a3, ft10 -# CHECK-NEXT: - 1.00 - - - - - - fclass.d a3, ft10 +# CHECK-NEXT: - - 1.00 - - - - - fclass.s a3, ft10 +# CHECK-NEXT: - - 1.00 - - - - - fclass.d a3, ft10 # CHECK-NEXT: - - 1.00 - - - - - fli.h ft1, -1.0 -# CHECK-NEXT: - 1.00 - - - - - - fli.s ft1, -1.0 -# CHECK-NEXT: - 1.00 - - - - - - fli.d ft1, -1.0 +# CHECK-NEXT: - - 1.00 - - - - - fli.s ft1, -1.0 +# CHECK-NEXT: - - 1.00 - - - - - fli.d ft1, -1.0 # CHECK-NEXT: - - 1.00 - - - - - fminm.h fa0, fa1, fa2 # CHECK-NEXT: - 1.00 - - - - - - fmaxm.h fs3, fs4, fs5 -# CHECK-NEXT: - - 1.00 - - - - - fminm.s fa0, fa1, fa2 -# CHECK-NEXT: - 1.00 - - - - - - fmaxm.s fs3, fs4, fs5 +# CHECK-NEXT: - 1.00 - - - - - - fminm.s fa0, fa1, fa2 +# CHECK-NEXT: - - 1.00 - - - - - fmaxm.s fs3, fs4, fs5 # CHECK-NEXT: - - 1.00 - - - - - fminm.d fa0, fa1, fa2 # CHECK-NEXT: - 1.00 - - - - - - fmaxm.d fs3, fs4, fs5 -# CHECK-NEXT: - 1.00 - - - - - - fround.h fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fround.h fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - froundnx.h fs1, fs2 -# CHECK-NEXT: - 1.00 - - - - - - fround.s fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fround.s fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - froundnx.s fs1, fs2 -# CHECK-NEXT: - 1.00 - - - - - - fround.d fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fround.d fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - froundnx.d fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - fcvtmod.w.d a1, ft1, rtz -# CHECK-NEXT: - 1.00 - - - - - - fltq.h a1, fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fltq.h a1, fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - fleq.h a1, ft1, ft2 -# CHECK-NEXT: - 1.00 - - - - - - fltq.h a1, fs2, fs1 +# CHECK-NEXT: - - 1.00 - - - - - fltq.h a1, fs2, fs1 # CHECK-NEXT: - - 1.00 - - - - - fleq.h a1, ft2, ft1 -# CHECK-NEXT: - 1.00 - - - - - - fltq.s a1, fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fltq.s a1, fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - fleq.s a1, ft1, ft2 -# CHECK-NEXT: - 1.00 - - - - - - fltq.s a1, fs2, fs1 +# CHECK-NEXT: - - 1.00 - - - - - fltq.s a1, fs2, fs1 # CHECK-NEXT: - - 1.00 - - - - - fleq.s a1, ft2, ft1 -# CHECK-NEXT: - 1.00 - - - - - - fltq.d a1, fs1, fs2 +# CHECK-NEXT: - - 1.00 - - - - - fltq.d a1, fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - fleq.d a1, ft1, ft2 -# CHECK-NEXT: - 1.00 - - - - - - fltq.d a1, fs2, fs1 +# CHECK-NEXT: - - 1.00 - - - - - fltq.d a1, fs2, fs1 # CHECK-NEXT: - - 1.00 - - - - - fleq.d a1, ft2, ft1 diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s index 40e524c3f5f7d..7c23a7845c508 100644 --- a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/integer.s @@ -187,13 +187,13 @@ xperm4 a0, a1, a2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 121 -# CHECK-NEXT: Total Cycles: 155 +# CHECK-NEXT: Total Cycles: 153 # CHECK-NEXT: Total uOps: 121 # CHECK: Dispatch Width: 6 -# CHECK-NEXT: uOps Per Cycle: 0.78 -# CHECK-NEXT: IPC: 0.78 -# CHECK-NEXT: Block RThroughput: 136.0 +# CHECK-NEXT: uOps Per Cycle: 0.79 +# CHECK-NEXT: IPC: 0.79 +# CHECK-NEXT: Block RThroughput: 141.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -234,14 +234,14 @@ xperm4 a0, a1, a2 # CHECK-NEXT: 1 1 0.25 sraw a0, a0, a0 # CHECK-NEXT: 1 1 0.25 sub a0, a0, a0 # CHECK-NEXT: 1 1 0.25 subw a0, a0, a0 -# CHECK-NEXT: 1 1 0.25 jal a0, .Ltmp0 -# CHECK-NEXT: 1 1 0.25 jalr a0 -# CHECK-NEXT: 1 1 0.25 beq a0, a0, .Ltmp1 -# CHECK-NEXT: 1 1 0.25 bne a0, a0, .Ltmp2 -# CHECK-NEXT: 1 1 0.25 blt a0, a0, .Ltmp3 -# CHECK-NEXT: 1 1 0.25 bltu a0, a0, .Ltmp4 -# CHECK-NEXT: 1 1 0.25 bge a0, a0, .Ltmp5 -# CHECK-NEXT: 1 1 0.25 bgeu a0, a0, .Ltmp6 +# CHECK-NEXT: 1 1 1.00 jal a0, .Ltmp0 +# CHECK-NEXT: 1 1 1.00 jalr a0 +# CHECK-NEXT: 1 1 1.00 beq a0, a0, .Ltmp1 +# CHECK-NEXT: 1 1 1.00 bne a0, a0, .Ltmp2 +# CHECK-NEXT: 1 1 1.00 blt a0, a0, .Ltmp3 +# CHECK-NEXT: 1 1 1.00 bltu a0, a0, .Ltmp4 +# CHECK-NEXT: 1 1 1.00 bge a0, a0, .Ltmp5 +# CHECK-NEXT: 1 1 1.00 bgeu a0, a0, .Ltmp6 # CHECK-NEXT: 1 1 0.25 add a0, a0, a0 # CHECK-NEXT: 1 4 0.50 * lb t0, 0(a0) # CHECK-NEXT: 1 4 0.50 * lbu t0, 0(a0) @@ -254,11 +254,11 @@ xperm4 a0, a1, a2 # CHECK-NEXT: 1 1 0.50 * sh t0, 0(a0) # CHECK-NEXT: 1 1 0.50 * sw t0, 0(a0) # CHECK-NEXT: 1 1 0.50 * sd t0, 0(a0) -# CHECK-NEXT: 1 4 0.25 mul a0, a0, a0 -# CHECK-NEXT: 1 4 0.25 mulh a0, a0, a0 -# CHECK-NEXT: 1 4 0.25 mulhu a0, a0, a0 -# CHECK-NEXT: 1 4 0.25 mulhsu a0, a0, a0 -# CHECK-NEXT: 1 4 0.25 mulw a0, a0, a0 +# CHECK-NEXT: 1 4 1.00 mul a0, a0, a0 +# CHECK-NEXT: 1 4 1.00 mulh a0, a0, a0 +# CHECK-NEXT: 1 4 1.00 mulhu a0, a0, a0 +# CHECK-NEXT: 1 4 1.00 mulhsu a0, a0, a0 +# CHECK-NEXT: 1 4 1.00 mulw a0, a0, a0 # CHECK-NEXT: 1 21 21.00 div a0, a1, a2 # CHECK-NEXT: 1 21 21.00 divu a0, a1, a2 # CHECK-NEXT: 1 21 21.00 rem a0, a1, a2 @@ -327,139 +327,139 @@ xperm4 a0, a1, a2 # CHECK-NEXT: 1 1 0.25 xperm4 a0, a1, a2 # CHECK: Resources: -# CHECK-NEXT: [0] - GenericOOODIV -# CHECK-NEXT: [1.0] - GenericOOOFPU -# CHECK-NEXT: [1.1] - GenericOOOFPU -# CHECK-NEXT: [2.0] - GenericOOOIXU -# CHECK-NEXT: [2.1] - GenericOOOIXU -# CHECK-NEXT: [2.2] - GenericOOOIXU -# CHECK-NEXT: [3.0] - GenericOOOLSU -# CHECK-NEXT: [3.1] - GenericOOOLSU +# CHECK-NEXT: [0] - GenericOOOBranch +# CHECK-NEXT: [1] - GenericOOOFMulDiv +# CHECK-NEXT: [2] - GenericOOOFloat +# CHECK-NEXT: [3.0] - GenericOOOInt +# CHECK-NEXT: [3.1] - GenericOOOInt +# CHECK-NEXT: [4.0] - GenericOOOLSU +# CHECK-NEXT: [4.1] - GenericOOOLSU +# CHECK-NEXT: [5] - GenericOOOMulDiv # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] -# CHECK-NEXT: 147.00 - - 30.00 30.00 31.00 5.00 6.00 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5] +# CHECK-NEXT: 23.00 - - 34.00 35.00 5.00 6.00 146.00 # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1.0] [1.1] [2.0] [2.1] [2.2] [3.0] [3.1] Instructions: -# CHECK-NEXT: - - - - - 1.00 - - addi a0, a0, 1 +# CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5] Instructions: +# CHECK-NEXT: - - - - - - - 1.00 addi a0, a0, 1 # CHECK-NEXT: - - - - 1.00 - - - addiw a0, a0, 1 # CHECK-NEXT: - - - 1.00 - - - - slti a0, a0, 1 -# CHECK-NEXT: 1.00 - - - - - - - seqz a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - andi a0, a0, 1 -# CHECK-NEXT: - - - 1.00 - - - - ori a0, a0, 1 -# CHECK-NEXT: 1.00 - - - - - - - xori a0, a0, 1 -# CHECK-NEXT: 1.00 - - - - - - - slli a0, a0, 1 -# CHECK-NEXT: 1.00 - - - - - - - srli a0, a0, 1 -# CHECK-NEXT: 1.00 - - - - - - - srai a0, a0, 1 -# CHECK-NEXT: 1.00 - - - - - - - slliw a0, a0, 1 -# CHECK-NEXT: 1.00 - - - - - - - srliw a0, a0, 1 -# CHECK-NEXT: 1.00 - - - - - - - sraiw a0, a0, 1 -# CHECK-NEXT: - - - - - 1.00 - - lui a0, 1 -# CHECK-NEXT: - - - - 1.00 - - - auipc a1, 1 +# CHECK-NEXT: - - - - - - - 1.00 seqz a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - andi a0, a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - ori a0, a0, 1 +# CHECK-NEXT: - - - - 1.00 - - - xori a0, a0, 1 +# CHECK-NEXT: - - - - - - - 1.00 slli a0, a0, 1 +# CHECK-NEXT: - - - - - - - 1.00 srli a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - srai a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - slliw a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - srliw a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - sraiw a0, a0, 1 +# CHECK-NEXT: - - - - 1.00 - - - lui a0, 1 +# CHECK-NEXT: 1.00 - - - - - - - auipc a1, 1 # CHECK-NEXT: - - - 1.00 - - - - add a0, a0, a1 -# CHECK-NEXT: - - - - 1.00 - - - addw a0, a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - slt a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - addw a0, a0, a0 +# CHECK-NEXT: - - - - - - - 1.00 slt a0, a0, a0 # CHECK-NEXT: - - - 1.00 - - - - sltu a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - and a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - or a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - xor a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - sll a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - srl a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - sra a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - sllw a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - srlw a0, a0, a0 -# CHECK-NEXT: 1.00 - - - - - - - sraw a0, a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - sub a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - and a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - or a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - xor a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - sll a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - srl a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - sra a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sllw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - srlw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sraw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sub a0, a0, a0 # CHECK-NEXT: - - - 1.00 - - - - subw a0, a0, a0 -# CHECK-NEXT: - - - - 1.00 - - - jal a0, .Ltmp0 -# CHECK-NEXT: - - - - - 1.00 - - jalr a0 -# CHECK-NEXT: - - - - 1.00 - - - beq a0, a0, .Ltmp1 -# CHECK-NEXT: - - - - 1.00 - - - bne a0, a0, .Ltmp2 -# CHECK-NEXT: - - - - - 1.00 - - blt a0, a0, .Ltmp3 -# CHECK-NEXT: - - - - 1.00 - - - bltu a0, a0, .Ltmp4 -# CHECK-NEXT: - - - - - 1.00 - - bge a0, a0, .Ltmp5 -# CHECK-NEXT: - - - - 1.00 - - - bgeu a0, a0, .Ltmp6 -# CHECK-NEXT: - - - - - 1.00 - - add a0, a0, a0 -# CHECK-NEXT: - - - - - - - 1.00 lb t0, 0(a0) -# CHECK-NEXT: - - - - - - 1.00 - lbu t0, 0(a0) -# CHECK-NEXT: - - - - - - - 1.00 lh t0, 0(a0) -# CHECK-NEXT: - - - - - - - 1.00 lhu t0, 0(a0) -# CHECK-NEXT: - - - - - - 1.00 - lw t0, 0(a0) -# CHECK-NEXT: - - - - - - - 1.00 lwu t0, 0(a0) -# CHECK-NEXT: - - - - - - 1.00 - ld t0, 0(a0) -# CHECK-NEXT: - - - - - - 1.00 - sb t0, 0(a0) -# CHECK-NEXT: - - - - - - - 1.00 sh t0, 0(a0) -# CHECK-NEXT: - - - - - - 1.00 - sw t0, 0(a0) -# CHECK-NEXT: - - - - - - - 1.00 sd t0, 0(a0) -# CHECK-NEXT: - - - - - 1.00 - - mul a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - mulh a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - mulhu a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - mulhsu a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - mulw a0, a0, a0 -# CHECK-NEXT: 21.00 - - - - - - - div a0, a1, a2 -# CHECK-NEXT: 21.00 - - - - - - - divu a0, a1, a2 -# CHECK-NEXT: 21.00 - - - - - - - rem a0, a1, a2 -# CHECK-NEXT: 21.00 - - - - - - - remu a0, a1, a2 -# CHECK-NEXT: 13.00 - - - - - - - divw a0, a1, a2 -# CHECK-NEXT: 13.00 - - - - - - - divuw a0, a1, a2 -# CHECK-NEXT: 13.00 - - - - - - - remw a0, a1, a2 -# CHECK-NEXT: 13.00 - - - - - - - remuw a0, a1, a2 +# CHECK-NEXT: 1.00 - - - - - - - jal a0, .Ltmp0 +# CHECK-NEXT: 1.00 - - - - - - - jalr a0 +# CHECK-NEXT: 1.00 - - - - - - - beq a0, a0, .Ltmp1 +# CHECK-NEXT: 1.00 - - - - - - - bne a0, a0, .Ltmp2 +# CHECK-NEXT: 1.00 - - - - - - - blt a0, a0, .Ltmp3 +# CHECK-NEXT: 1.00 - - - - - - - bltu a0, a0, .Ltmp4 +# CHECK-NEXT: 1.00 - - - - - - - bge a0, a0, .Ltmp5 +# CHECK-NEXT: 1.00 - - - - - - - bgeu a0, a0, .Ltmp6 +# CHECK-NEXT: - - - 1.00 - - - - add a0, a0, a0 +# CHECK-NEXT: - - - - - - 1.00 - lb t0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - lbu t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - lh t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - lhu t0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - lw t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - lwu t0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - ld t0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - sb t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - sh t0, 0(a0) +# CHECK-NEXT: - - - - - 1.00 - - sw t0, 0(a0) +# CHECK-NEXT: - - - - - - 1.00 - sd t0, 0(a0) +# CHECK-NEXT: - - - - - - - 1.00 mul a0, a0, a0 +# CHECK-NEXT: - - - - - - - 1.00 mulh a0, a0, a0 +# CHECK-NEXT: - - - - - - - 1.00 mulhu a0, a0, a0 +# CHECK-NEXT: - - - - - - - 1.00 mulhsu a0, a0, a0 +# CHECK-NEXT: - - - - - - - 1.00 mulw a0, a0, a0 +# CHECK-NEXT: - - - - - - - 21.00 div a0, a1, a2 +# CHECK-NEXT: - - - - - - - 21.00 divu a0, a1, a2 +# CHECK-NEXT: - - - - - - - 21.00 rem a0, a1, a2 +# CHECK-NEXT: - - - - - - - 21.00 remu a0, a1, a2 +# CHECK-NEXT: - - - - - - - 13.00 divw a0, a1, a2 +# CHECK-NEXT: - - - - - - - 13.00 divuw a0, a1, a2 +# CHECK-NEXT: - - - - - - - 13.00 remw a0, a1, a2 +# CHECK-NEXT: - - - - - - - 13.00 remuw a0, a1, a2 # CHECK-NEXT: - - - - 1.00 - - - csrrw t0, 4095, t1 -# CHECK-NEXT: - - - - - 1.00 - - csrrs s3, fflags, s5 -# CHECK-NEXT: - - - - 1.00 - - - csrrc sp, 0, ra -# CHECK-NEXT: - - - - - 1.00 - - csrrwi a5, 0, 0 -# CHECK-NEXT: - - - - 1.00 - - - csrrsi t2, 4095, 31 -# CHECK-NEXT: - - - 1.00 - - - - csrrci t1, sscratch, 5 -# CHECK-NEXT: - - - - - 1.00 - - czero.eqz a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - csrrs s3, fflags, s5 +# CHECK-NEXT: 1.00 - - - - - - - csrrc sp, 0, ra +# CHECK-NEXT: - - - - 1.00 - - - csrrwi a5, 0, 0 +# CHECK-NEXT: 1.00 - - - - - - - csrrsi t2, 4095, 31 +# CHECK-NEXT: - - - - 1.00 - - - csrrci t1, sscratch, 5 +# CHECK-NEXT: 1.00 - - - - - - - czero.eqz a0, a1, a2 # CHECK-NEXT: - - - - 1.00 - - - czero.nez a0, a1, a2 -# CHECK-NEXT: - - - - 1.00 - - - czero.eqz a0, a1, a2 -# CHECK-NEXT: - - - - - 1.00 - - czero.nez a0, a1, a2 -# CHECK-NEXT: - - - 1.00 - - - - add.uw a0, a0, a0 -# CHECK-NEXT: - - - - 1.00 - - - slli.uw a0, a0, 1 -# CHECK-NEXT: - - - - - 1.00 - - sh1add.uw a0, a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - sh2add.uw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - czero.eqz a0, a1, a2 +# CHECK-NEXT: 1.00 - - - - - - - czero.nez a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - add.uw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - slli.uw a0, a0, 1 +# CHECK-NEXT: - - - 1.00 - - - - sh1add.uw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sh2add.uw a0, a0, a0 # CHECK-NEXT: - - - 1.00 - - - - sh3add.uw a0, a0, a0 # CHECK-NEXT: - - - 1.00 - - - - sh1add a0, a0, a0 # CHECK-NEXT: - - - 1.00 - - - - sh2add a0, a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - sh3add a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sh3add a0, a0, a0 # CHECK-NEXT: - - - 1.00 - - - - andn a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - orn a0, a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - orn a0, a0, a0 # CHECK-NEXT: - - - 1.00 - - - - xnor a0, a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - clz a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - clz a0, a0 # CHECK-NEXT: - - - 1.00 - - - - clzw a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - ctz a0, a0 -# CHECK-NEXT: - - - - 1.00 - - - ctzw a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - cpop a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - cpopw a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - ctz a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - ctzw a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - cpop a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - cpopw a0, a0 # CHECK-NEXT: - - - - 1.00 - - - min a0, a0, a0 # CHECK-NEXT: - - - 1.00 - - - - minu a0, a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - max a0, a0, a0 -# CHECK-NEXT: - - - - 1.00 - - - maxu a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - sext.b a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - sext.h a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - max a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - maxu a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - sext.b a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - sext.h a0, a0 # CHECK-NEXT: - - - - 1.00 - - - zext.h a0, a0 # CHECK-NEXT: - - - 1.00 - - - - rol a0, a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - rolw a0, a0, a0 -# CHECK-NEXT: - - - - 1.00 - - - ror a0, a0, a0 -# CHECK-NEXT: - - - 1.00 - - - - rorw a0, a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - rori a0, a0, 1 +# CHECK-NEXT: - - - - 1.00 - - - rolw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - ror a0, a0, a0 +# CHECK-NEXT: - - - - 1.00 - - - rorw a0, a0, a0 +# CHECK-NEXT: - - - 1.00 - - - - rori a0, a0, 1 # CHECK-NEXT: - - - - 1.00 - - - roriw a0, a0, 1 # CHECK-NEXT: - - - 1.00 - - - - orc.b a0, a0 -# CHECK-NEXT: - - - - - 1.00 - - rev8 a0, a0 -# CHECK-NEXT: - - - - 1.00 - - - clmul a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - rev8 a0, a0 +# CHECK-NEXT: 1.00 - - - - - - - clmul a0, a1, a2 # CHECK-NEXT: - - - - 1.00 - - - clmulh a0, a1, a2 -# CHECK-NEXT: - - - - - 1.00 - - clmulr a0, a1, a2 +# CHECK-NEXT: 1.00 - - - - - - - clmulr a0, a1, a2 # CHECK-NEXT: - - - - 1.00 - - - bclr a0, a1, a2 -# CHECK-NEXT: - - - - - 1.00 - - bclri a0, a1, 1 +# CHECK-NEXT: 1.00 - - - - - - - bclri a0, a1, 1 # CHECK-NEXT: - - - - 1.00 - - - bext a0, a1, a2 -# CHECK-NEXT: - - - - - 1.00 - - bexti a0, a1, 1 +# CHECK-NEXT: 1.00 - - - - - - - bexti a0, a1, 1 # CHECK-NEXT: - - - - 1.00 - - - binv a0, a1, a2 -# CHECK-NEXT: - - - - 1.00 - - - binvi a0, a1, 1 -# CHECK-NEXT: - - - - - 1.00 - - bset a0, a1, a2 -# CHECK-NEXT: - - - - 1.00 - - - bseti a0, a1, 1 -# CHECK-NEXT: - - - - - 1.00 - - pack a0, a1, a2 -# CHECK-NEXT: - - - - 1.00 - - - packh a0, a1, a2 -# CHECK-NEXT: - - - - - 1.00 - - brev8 a0, a1 -# CHECK-NEXT: - - - - 1.00 - - - xperm8 a0, a1, a2 +# CHECK-NEXT: 1.00 - - - - - - - binvi a0, a1, 1 +# CHECK-NEXT: - - - - 1.00 - - - bset a0, a1, a2 +# CHECK-NEXT: 1.00 - - - - - - - bseti a0, a1, 1 +# CHECK-NEXT: - - - - 1.00 - - - pack a0, a1, a2 +# CHECK-NEXT: 1.00 - - - - - - - packh a0, a1, a2 +# CHECK-NEXT: - - - - 1.00 - - - brev8 a0, a1 +# CHECK-NEXT: 1.00 - - - - - - - xperm8 a0, a1, a2 # CHECK-NEXT: - - - - 1.00 - - - xperm4 a0, a1, a2 From 5e13d85bf178b6bec94e15cc64592c9af8b36047 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Fri, 7 Feb 2025 11:54:19 +0800 Subject: [PATCH 5/7] Add more comments and adjust Zfh --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 21 ++++--- .../RISCV/GenericOOO/floating-point.s | 58 +++++++++---------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td index 05721552e3090..9672cfff2da31 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td +++ b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td @@ -17,8 +17,9 @@ // * Latency: // * Integer instructions: 1 cycle. // * Multiplication instructions: 4 cycles. -// * Multiplication/Division instructions: 7-13 cycles. +// * Division instructions: 13-21 cycles. // * Floating-point instructions: 2-6 cycles. +// * Floating-point fdiv/fsqrt instructions: 9-21 cycles. // * Vector instructions: 2-6 cycles. // * Load/Store: // * IXU: 4 cycles. @@ -327,12 +328,6 @@ let Latency = 2 in { let Latency = 2 in def : WriteRes; -let Latency = 4 in -def : WriteRes; - -let Latency = 6 in -def : WriteRes; - def : WriteRes; def : WriteRes; @@ -340,10 +335,18 @@ def : WriteRes; let Latency = 2 in def : WriteRes; +// Multiplication +let Latency = 4 in +def : WriteRes; + +// FMA +let Latency = 6 in +def : WriteRes; + // Division let Latency = 9, ReleaseAtCycles = [9] in { - def : WriteRes; - def : WriteRes; + def : WriteRes; + def : WriteRes; } // Classify diff --git a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s index 50e1d5a89a7f6..f7fe78694995c 100644 --- a/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s +++ b/llvm/test/tools/llvm-mca/RISCV/GenericOOO/floating-point.s @@ -176,13 +176,13 @@ fgeq.d a1, ft1, ft2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 113 -# CHECK-NEXT: Total Cycles: 97 +# CHECK-NEXT: Total Cycles: 107 # CHECK-NEXT: Total uOps: 113 # CHECK: Dispatch Width: 6 -# CHECK-NEXT: uOps Per Cycle: 1.16 -# CHECK-NEXT: IPC: 1.16 -# CHECK-NEXT: Block RThroughput: 70.0 +# CHECK-NEXT: uOps Per Cycle: 1.06 +# CHECK-NEXT: IPC: 1.06 +# CHECK-NEXT: Block RThroughput: 93.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -201,15 +201,15 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: 1 1 0.50 * fsd ft0, 0(a0) # CHECK-NEXT: 1 2 0.50 fadd.h fs10, fs11, ft8 # CHECK-NEXT: 1 2 0.50 fsub.h ft9, ft10, ft11 -# CHECK-NEXT: 1 4 0.50 fmul.h ft0, ft1, ft2 -# CHECK-NEXT: 1 9 4.50 fdiv.h ft3, ft4, ft5 -# CHECK-NEXT: 1 9 4.50 fsqrt.h ft6, ft7 +# CHECK-NEXT: 1 4 1.00 fmul.h ft0, ft1, ft2 +# CHECK-NEXT: 1 9 9.00 fdiv.h ft3, ft4, ft5 +# CHECK-NEXT: 1 9 9.00 fsqrt.h ft6, ft7 # CHECK-NEXT: 1 1 0.50 fmin.h fa5, fa6, fa7 # CHECK-NEXT: 1 1 0.50 fmax.h fs2, fs3, fs4 -# CHECK-NEXT: 1 6 0.50 fmadd.h fa0, fa1, fa2, ft11 -# CHECK-NEXT: 1 6 0.50 fmsub.h fa4, fa5, fa6, fa7 -# CHECK-NEXT: 1 6 0.50 fnmsub.h fs2, fs3, fs4, fs5 -# CHECK-NEXT: 1 6 0.50 fnmadd.h fs6, fs7, fs8, fs9 +# CHECK-NEXT: 1 6 1.00 fmadd.h fa0, fa1, fa2, ft11 +# CHECK-NEXT: 1 6 1.00 fmsub.h fa4, fa5, fa6, fa7 +# CHECK-NEXT: 1 6 1.00 fnmsub.h fs2, fs3, fs4, fs5 +# CHECK-NEXT: 1 6 1.00 fnmadd.h fs6, fs7, fs8, fs9 # CHECK-NEXT: 1 2 0.50 fadd.s fs10, fs11, ft8 # CHECK-NEXT: 1 2 0.50 fsub.s ft9, ft10, ft11 # CHECK-NEXT: 1 4 1.00 fmul.s ft0, ft1, ft2 @@ -319,7 +319,7 @@ fgeq.d a1, ft1, ft2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5] -# CHECK-NEXT: - 90.00 89.00 - - 3.00 3.00 - +# CHECK-NEXT: - 99.00 80.00 - - 3.00 3.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3.0] [3.1] [4.0] [4.1] [5] Instructions: @@ -331,16 +331,16 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: - - - - - 1.00 - - fsd ft0, 0(a0) # CHECK-NEXT: - - 1.00 - - - - - fadd.h fs10, fs11, ft8 # CHECK-NEXT: - 1.00 - - - - - - fsub.h ft9, ft10, ft11 -# CHECK-NEXT: - - 1.00 - - - - - fmul.h ft0, ft1, ft2 +# CHECK-NEXT: - 1.00 - - - - - - fmul.h ft0, ft1, ft2 # CHECK-NEXT: - 9.00 - - - - - - fdiv.h ft3, ft4, ft5 -# CHECK-NEXT: - - 9.00 - - - - - fsqrt.h ft6, ft7 -# CHECK-NEXT: - 1.00 - - - - - - fmin.h fa5, fa6, fa7 +# CHECK-NEXT: - 9.00 - - - - - - fsqrt.h ft6, ft7 +# CHECK-NEXT: - - 1.00 - - - - - fmin.h fa5, fa6, fa7 # CHECK-NEXT: - - 1.00 - - - - - fmax.h fs2, fs3, fs4 # CHECK-NEXT: - 1.00 - - - - - - fmadd.h fa0, fa1, fa2, ft11 -# CHECK-NEXT: - - 1.00 - - - - - fmsub.h fa4, fa5, fa6, fa7 +# CHECK-NEXT: - 1.00 - - - - - - fmsub.h fa4, fa5, fa6, fa7 # CHECK-NEXT: - 1.00 - - - - - - fnmsub.h fs2, fs3, fs4, fs5 -# CHECK-NEXT: - - 1.00 - - - - - fnmadd.h fs6, fs7, fs8, fs9 -# CHECK-NEXT: - 1.00 - - - - - - fadd.s fs10, fs11, ft8 +# CHECK-NEXT: - 1.00 - - - - - - fnmadd.h fs6, fs7, fs8, fs9 +# CHECK-NEXT: - - 1.00 - - - - - fadd.s fs10, fs11, ft8 # CHECK-NEXT: - - 1.00 - - - - - fsub.s ft9, ft10, ft11 # CHECK-NEXT: - 1.00 - - - - - - fmul.s ft0, ft1, ft2 # CHECK-NEXT: - 13.00 - - - - - - fdiv.s ft3, ft4, ft5 @@ -371,7 +371,7 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.h fa0, ft0, rup # CHECK-NEXT: - - 1.00 - - - - - fcvt.h.d ft2, fa2 # CHECK-NEXT: - - 1.00 - - - - - fcvt.w.s a0, fs5 -# CHECK-NEXT: - 1.00 - - - - - - fcvt.wu.s a1, fs6 +# CHECK-NEXT: - - 1.00 - - - - - fcvt.wu.s a1, fs6 # CHECK-NEXT: - - 1.00 - - - - - fcvt.s.w ft11, a4 # CHECK-NEXT: - - 1.00 - - - - - fcvt.s.wu ft0, a5 # CHECK-NEXT: - - 1.00 - - - - - fcvt.l.s a0, ft0 @@ -381,12 +381,12 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: - - 1.00 - - - - - fmv.x.w a2, fs7 # CHECK-NEXT: - - 1.00 - - - - - fmv.w.x ft1, a6 # CHECK-NEXT: - - 1.00 - - - - - fsgnj.s fs1, fa0, fa1 -# CHECK-NEXT: - 1.00 - - - - - - fsgnjn.s fa1, fa3, fa4 +# CHECK-NEXT: - - 1.00 - - - - - fsgnjn.s fa1, fa3, fa4 # CHECK-NEXT: - - 1.00 - - - - - fcvt.wu.d a4, ft11 # CHECK-NEXT: - - 1.00 - - - - - fcvt.w.d a4, ft11 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.w ft0, a5 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.wu ft1, a6 -# CHECK-NEXT: - - 1.00 - - - - - fcvt.s.d fs5, fs6 +# CHECK-NEXT: - 1.00 - - - - - - fcvt.s.d fs5, fs6 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.s fs7, fs8 # CHECK-NEXT: - - 1.00 - - - - - fcvt.l.d a0, ft0 # CHECK-NEXT: - - 1.00 - - - - - fcvt.lu.d a1, ft1 @@ -394,8 +394,8 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: - - 1.00 - - - - - fcvt.d.lu ft4, a4 # CHECK-NEXT: - - 1.00 - - - - - fmv.x.d a2, ft2 # CHECK-NEXT: - - 1.00 - - - - - fmv.d.x ft5, a5 -# CHECK-NEXT: - 1.00 - - - - - - fsgnj.d fs1, fa0, fa1 -# CHECK-NEXT: - - 1.00 - - - - - fsgnjn.d fa1, fa3, fa4 +# CHECK-NEXT: - - 1.00 - - - - - fsgnj.d fs1, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - fsgnjn.d fa1, fa3, fa4 # CHECK-NEXT: - - 1.00 - - - - - feq.h a1, fs8, fs9 # CHECK-NEXT: - - 1.00 - - - - - flt.h a2, fs10, fs11 # CHECK-NEXT: - - 1.00 - - - - - fle.h a3, ft8, ft9 @@ -411,12 +411,12 @@ fgeq.d a1, ft1, ft2 # CHECK-NEXT: - - 1.00 - - - - - fli.h ft1, -1.0 # CHECK-NEXT: - - 1.00 - - - - - fli.s ft1, -1.0 # CHECK-NEXT: - - 1.00 - - - - - fli.d ft1, -1.0 -# CHECK-NEXT: - - 1.00 - - - - - fminm.h fa0, fa1, fa2 -# CHECK-NEXT: - 1.00 - - - - - - fmaxm.h fs3, fs4, fs5 -# CHECK-NEXT: - 1.00 - - - - - - fminm.s fa0, fa1, fa2 -# CHECK-NEXT: - - 1.00 - - - - - fmaxm.s fs3, fs4, fs5 -# CHECK-NEXT: - - 1.00 - - - - - fminm.d fa0, fa1, fa2 -# CHECK-NEXT: - 1.00 - - - - - - fmaxm.d fs3, fs4, fs5 +# CHECK-NEXT: - 1.00 - - - - - - fminm.h fa0, fa1, fa2 +# CHECK-NEXT: - - 1.00 - - - - - fmaxm.h fs3, fs4, fs5 +# CHECK-NEXT: - - 1.00 - - - - - fminm.s fa0, fa1, fa2 +# CHECK-NEXT: - 1.00 - - - - - - fmaxm.s fs3, fs4, fs5 +# CHECK-NEXT: - 1.00 - - - - - - fminm.d fa0, fa1, fa2 +# CHECK-NEXT: - - 1.00 - - - - - fmaxm.d fs3, fs4, fs5 # CHECK-NEXT: - - 1.00 - - - - - fround.h fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - froundnx.h fs1, fs2 # CHECK-NEXT: - - 1.00 - - - - - fround.s fs1, fs2 From 90d771147c012e9e7d068a31e06c72927b72f4dc Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Fri, 14 Feb 2025 11:59:16 +0800 Subject: [PATCH 6/7] Remove vector and address comments --- llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td index 9672cfff2da31..f35babffead15 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td +++ b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td @@ -1,4 +1,4 @@ -//===-- RISCVSchedGenericOOO.td - Generic O3 Processor -----*- tablegen -*-===// +//===-- RISCVSchedGenericOOO.td - Generic OOO Processor ----*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -13,19 +13,18 @@ // * IXU (Integer ALU Unit): 4 units, only one can execute mul/div. // * FXU (Floating-point Unit): 2 units. // * LSU (Load/Store Unit): 2 units. -// * VXU (Vector Unit): 1 unit. // * Latency: // * Integer instructions: 1 cycle. // * Multiplication instructions: 4 cycles. // * Division instructions: 13-21 cycles. // * Floating-point instructions: 2-6 cycles. // * Floating-point fdiv/fsqrt instructions: 9-21 cycles. -// * Vector instructions: 2-6 cycles. // * Load/Store: // * IXU: 4 cycles. // * FXU: 4 cycles. -// * VXU: 6 cycles. // * Integer/floating-point/vector div/rem/sqrt/... are non-pipelined. +// +// TODO: Add vector scheduling. //===----------------------------------------------------------------------===// def GenericOOOModel : SchedMachineModel { @@ -49,8 +48,6 @@ def GenericOOOLSU : ProcResource<2>; def GenericOOOFMulDiv : ProcResource<1>; def GenericOOOFloat : ProcResource<1>; def GenericOOOFPU : ProcResGroup<[GenericOOOFMulDiv, GenericOOOFloat]>; -// TODO: Add vector scheduling. -// def GenericOOOVXU : ProcResource<1>; //===----------------------------------------------------------------------===// // Branches @@ -318,10 +315,10 @@ let Latency = 2 in { // Other than Zfhmin let Latency = 2 in { - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; } // Arithmetic and logic From e73182024b982e7fce9e2e62dd3b7cb635048783 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Fri, 14 Feb 2025 16:33:28 +0800 Subject: [PATCH 7/7] Fix errors after rebasing and add a release note --- clang/docs/ReleaseNotes.rst | 2 ++ llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 03bddbe3e983a..5c69415d16489 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -217,6 +217,8 @@ LoongArch Support RISC-V Support ^^^^^^^^^^^^^^ +- Add support for `-mtune=generic-ooo` (a generic out-of-order model). + CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td index f35babffead15..be9c4ddf7cf48 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td +++ b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td @@ -494,7 +494,6 @@ def : ReadAdvance; //===----------------------------------------------------------------------===// defm : UnsupportedSchedV; defm : UnsupportedSchedZvk; -defm : UnsupportedSchedZvkned; defm : UnsupportedSchedSFB; defm : UnsupportedSchedXsfvcp; }