diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 00c3d702e12a2..5d4b919a956db 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -53,6 +53,7 @@ include "RISCVSchedSiFiveP600.td" include "RISCVSchedSyntacoreSCR1.td" include "RISCVSchedSyntacoreSCR345.td" include "RISCVSchedSyntacoreSCR7.td" +include "RISCVSchedTTAscalonD8.td" include "RISCVSchedXiangShanNanHu.td" //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 445e084d07686..84ef9faf7a37e 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -454,7 +454,7 @@ def SYNTACORE_SCR7 : RISCVProcessorModel<"syntacore-scr7", [TuneNoDefaultUnroll, TunePostRAScheduler]>; def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8", - NoSchedModel, + TTAscalonD8Model, !listconcat(RVA23S64Features, [FeatureStdExtSmaia, FeatureStdExtSsaia, diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td new file mode 100644 index 0000000000000..2afe02552974e --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td @@ -0,0 +1,330 @@ +//=- RISCVSchedTTAscalonD8.td - TT Ascalon D8 Sched Defs -----*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +def TTAscalonD8Model : SchedMachineModel { + let IssueWidth = 8; // 8-way decode and dispatch + let MicroOpBufferSize = 256; // 256 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch + + let CompleteModel = 0; + + // TODO: supported, but haven't added scheduling info yet. + let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, + HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, + HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, + HasStdExtZkr, HasVInstructions, HasVInstructionsI64]; +} + +let SchedModel = TTAscalonD8Model in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +let BufferSize = 16 in { + def AscalonLS : ProcResource<3>; + def AscalonFXA : ProcResource<1>; // ALU, FP/VEC -> INT, MUL, DIV, CSR + def AscalonFXB : ProcResource<1>; // ALU, INT -> FP/VEC + def AscalonFXC : ProcResource<2>; // ALU, BR + def AscalonFXD : ProcResource<2>; // ALU + def AscalonFP : ProcResource<2>; + // TODO: two vector units with vector scheduling model. +} + +def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>; + +//===----------------------------------------------------------------------===// + +// Branching +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer arithmetic and logic +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer multiplication +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +} + +// Integer division +// Worst case latency is used. + +let Latency = 7, ReleaseAtCycles = [7] in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Bitmanip +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +def : WriteRes; + +def : WriteRes; + +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +// Single-bit instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Memory +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Atomic memory +def : WriteRes; +def : WriteRes; + +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Half precision. +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 7, ReleaseAtCycles = [7] in { +def : WriteRes; +def : WriteRes; +} + +// Single precision. +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 7, ReleaseAtCycles = [7] in { +def : WriteRes; +def : WriteRes; +} + +// Double precision +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 12, ReleaseAtCycles = [12] in { +def : WriteRes; +def : WriteRes; +} + +// Conversions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Others +def : WriteRes; +def : WriteRes; + +def : InstRW<[WriteIALU], (instrs COPY)>; + +//===----------------------------------------------------------------------===// +// Bypass and advance +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Bitmanip +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +// Single-bit instructions +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; +defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZabha; +defm : UnsupportedSchedZbc; +defm : UnsupportedSchedZbkb; +defm : UnsupportedSchedZbkx; +defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZvk; +defm : UnsupportedSchedSFB; +} diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s new file mode 100644 index 0000000000000..9fd16e1ffc1d6 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s @@ -0,0 +1,79 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64-unknown-unknown -mcpu=tt-ascalon-d8 --iterations=1 < %s | FileCheck %s + +fmin.s ft0, fa0, fa1 +fmax.s ft1, fa0, fa1 +fmin.d ft2, ft4, ft5 +fmax.d ft3, ft4, ft5 +fmadd.s fs0, fs0, fs8, fs9 +fmsub.s fs1, fs1, fs8, fs9 +fmul.s fs3, fs3, fs4 +fdiv.s fs2, fs3, fs4 +fmul.d ft4, ft4, ft5 +fdiv.d fs4, fa3, ft5 +fsqrt.s ft1, fa2 +fsqrt.d ft2, fa3 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 12 +# CHECK-NEXT: Total Cycles: 31 +# CHECK-NEXT: Total uOps: 12 + +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 0.39 +# CHECK-NEXT: IPC: 0.39 +# CHECK-NEXT: Block RThroughput: 23.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 0.50 fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 1 3 0.50 fmax.s ft1, fa0, fa1 +# CHECK-NEXT: 1 3 0.50 fmin.d ft2, ft4, ft5 +# CHECK-NEXT: 1 3 0.50 fmax.d ft3, ft4, ft5 +# CHECK-NEXT: 1 3 0.50 fmadd.s fs0, fs0, fs8, fs9 +# CHECK-NEXT: 1 3 0.50 fmsub.s fs1, fs1, fs8, fs9 +# CHECK-NEXT: 1 3 0.50 fmul.s fs3, fs3, fs4 +# CHECK-NEXT: 1 7 3.50 fdiv.s fs2, fs3, fs4 +# CHECK-NEXT: 1 3 0.50 fmul.d ft4, ft4, ft5 +# CHECK-NEXT: 1 12 6.00 fdiv.d fs4, fa3, ft5 +# CHECK-NEXT: 1 7 3.50 fsqrt.s ft1, fa2 +# CHECK-NEXT: 1 12 6.00 fsqrt.d ft2, fa3 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - AscalonFP +# CHECK-NEXT: [0.1] - AscalonFP +# CHECK-NEXT: [1] - AscalonFXA +# CHECK-NEXT: [2] - AscalonFXB +# CHECK-NEXT: [3.0] - AscalonFXC +# CHECK-NEXT: [3.1] - AscalonFXC +# CHECK-NEXT: [4.0] - AscalonFXD +# CHECK-NEXT: [4.1] - AscalonFXD +# CHECK-NEXT: [5.0] - AscalonLS +# CHECK-NEXT: [5.1] - AscalonLS +# CHECK-NEXT: [5.2] - AscalonLS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] +# CHECK-NEXT: 18.00 28.00 - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - - - - fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 1.00 - - - - - - - - - - fmax.s ft1, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - - - - fmin.d ft2, ft4, ft5 +# CHECK-NEXT: 1.00 - - - - - - - - - - fmax.d ft3, ft4, ft5 +# CHECK-NEXT: - 1.00 - - - - - - - - - fmadd.s fs0, fs0, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - - - - fmsub.s fs1, fs1, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - - - - fmul.s fs3, fs3, fs4 +# CHECK-NEXT: 7.00 - - - - - - - - - - fdiv.s fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - - - - - - - fmul.d ft4, ft4, ft5 +# CHECK-NEXT: - 12.00 - - - - - - - - - fdiv.d fs4, fa3, ft5 +# CHECK-NEXT: 7.00 - - - - - - - - - - fsqrt.s ft1, fa2 +# CHECK-NEXT: - 12.00 - - - - - - - - - fsqrt.d ft2, fa3 diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s new file mode 100644 index 0000000000000..46cb4c6b8ae24 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s @@ -0,0 +1,79 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64-unknown-unknown -mcpu=tt-ascalon-d8 --iterations=1 < %s | FileCheck %s + +mul t0, a0, t0 +sub s2, a2, a3 +div t1, t2, t3 +add t1, a4, x3 +div a1, a2, a3 +add t1, a0, t0 +mul s0, a5, s0 +add t2, t2, t2 +sub s1, s0, s1 +fcvt.s.w f1, t3 +add s2, s2, s2 +fcvt.w.s t5, f3 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 12 +# CHECK-NEXT: Total Cycles: 22 +# CHECK-NEXT: Total uOps: 12 + +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 0.55 +# CHECK-NEXT: IPC: 0.55 +# CHECK-NEXT: Block RThroughput: 17.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 mul t0, a0, t0 +# CHECK-NEXT: 1 1 0.17 sub s2, a2, a3 +# CHECK-NEXT: 1 7 7.00 div t1, t2, t3 +# CHECK-NEXT: 1 1 0.17 add t1, a4, gp +# CHECK-NEXT: 1 7 7.00 div a1, a2, a3 +# CHECK-NEXT: 1 1 0.17 add t1, a0, t0 +# CHECK-NEXT: 1 3 1.00 mul s0, s0, a5 +# CHECK-NEXT: 1 1 0.17 add t2, t2, t2 +# CHECK-NEXT: 1 1 0.17 sub s1, s0, s1 +# CHECK-NEXT: 1 1 1.00 fcvt.s.w ft1, t3 +# CHECK-NEXT: 1 1 0.17 add s2, s2, s2 +# CHECK-NEXT: 1 1 1.00 fcvt.w.s t5, ft3 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - AscalonFP +# CHECK-NEXT: [0.1] - AscalonFP +# CHECK-NEXT: [1] - AscalonFXA +# CHECK-NEXT: [2] - AscalonFXB +# CHECK-NEXT: [3.0] - AscalonFXC +# CHECK-NEXT: [3.1] - AscalonFXC +# CHECK-NEXT: [4.0] - AscalonFXD +# CHECK-NEXT: [4.1] - AscalonFXD +# CHECK-NEXT: [5.0] - AscalonLS +# CHECK-NEXT: [5.1] - AscalonLS +# CHECK-NEXT: [5.2] - AscalonLS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] +# CHECK-NEXT: - - 17.00 1.00 2.00 2.00 1.00 1.00 - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - - - - mul t0, a0, t0 +# CHECK-NEXT: - - - - - - - 1.00 - - - sub s2, a2, a3 +# CHECK-NEXT: - - 7.00 - - - - - - - - div t1, t2, t3 +# CHECK-NEXT: - - - - - - 1.00 - - - - add t1, a4, gp +# CHECK-NEXT: - - 7.00 - - - - - - - - div a1, a2, a3 +# CHECK-NEXT: - - - - - 1.00 - - - - - add t1, a0, t0 +# CHECK-NEXT: - - 1.00 - - - - - - - - mul s0, s0, a5 +# CHECK-NEXT: - - - - - 1.00 - - - - - add t2, t2, t2 +# CHECK-NEXT: - - - - 1.00 - - - - - - sub s1, s0, s1 +# CHECK-NEXT: - - - 1.00 - - - - - - - fcvt.s.w ft1, t3 +# CHECK-NEXT: - - - - 1.00 - - - - - - add s2, s2, s2 +# CHECK-NEXT: - - 1.00 - - - - - - - - fcvt.w.s t5, ft3