From 9cc030dccb6a2bc4a57aedd06110c2f694efcc4b Mon Sep 17 00:00:00 2001 From: Petr Penzin Date: Tue, 10 Dec 2024 17:44:47 -0600 Subject: [PATCH 1/5] [RISC-V] Base scheduling model for tt-ascalon-d8 First part of tt-ascalon-d8 scheduling model, only containing scalar ops. Scheduling for vector instructions will be added in a follow-up patch. Co-authored-by: Anton Blanchard --- llvm/lib/Target/RISCV/RISCV.td | 1 + llvm/lib/Target/RISCV/RISCVProcessors.td | 2 +- .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 333 ++++++++++++++++++ .../tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s | 81 +++++ .../tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s | 81 +++++ 5 files changed, 497 insertions(+), 1 deletion(-) create mode 100644 llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td create mode 100644 llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s create mode 100644 llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 00c3d702e12a2..e5578a5447638 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -54,6 +54,7 @@ include "RISCVSchedSyntacoreSCR1.td" include "RISCVSchedSyntacoreSCR345.td" include "RISCVSchedSyntacoreSCR7.td" include "RISCVSchedXiangShanNanHu.td" +include "RISCVSchedTTAscalonD8.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 445e084d07686..84ef9faf7a37e 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -454,7 +454,7 @@ def SYNTACORE_SCR7 : RISCVProcessorModel<"syntacore-scr7", [TuneNoDefaultUnroll, TunePostRAScheduler]>; def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8", - NoSchedModel, + TTAscalonD8Model, !listconcat(RVA23S64Features, [FeatureStdExtSmaia, FeatureStdExtSsaia, diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td new file mode 100644 index 0000000000000..764e546beee18 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td @@ -0,0 +1,333 @@ +//=- RISCVSchedTTAscalonD8.td - Tenstorrent Ascalon Scheduling Defs -----*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +def TTAscalonD8Model : SchedMachineModel { + let IssueWidth = 8; // 8-way decode and dispatch + let MicroOpBufferSize = 256; // 256 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. + let LoopMicroOpBufferSize = 16; + + let CompleteModel = 0; + + // TODO supported, but haven't added scheduling info yet + let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, + HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, + HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, + HasStdExtZkr, HasVInstructions, HasVInstructionsI64]; +} + +let SchedModel = TTAscalonD8Model in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +let BufferSize = 16 in { + def AscalonLS : ProcResource<3>; + def AscalonFXA : ProcResource<1>; // ALU, FP/VEC -> INT, MUL, DIV, CSR + def AscalonFXB : ProcResource<1>; // ALU, INT -> FP/VEC + def AscalonFXC : ProcResource<2>; // ALU, BR + def AscalonFXD : ProcResource<2>; // ALU + def AscalonFP : ProcResource<2>; + def AscalonV : ProcResource<2>; +} + +def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>; + +//===----------------------------------------------------------------------===// + +// Branching +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer arithmetic and logic +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Integer multiplication +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +} + +// Integer division +// Worst case latency is used. + +let Latency = 7, ReleaseAtCycles = [7] in { + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; +} + +// Bitmanip +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +def : WriteRes; + +def : WriteRes; + +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +// Single-bit instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Memory +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Atomic memory +def : WriteRes; +def : WriteRes; + +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// Half precision. +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 7, ReleaseAtCycles = [7] in { +def : WriteRes; +def : WriteRes; +} + +// Single precision. +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 7, ReleaseAtCycles = [7] in { +def : WriteRes; +def : WriteRes; +} + +// Double precision +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 12, ReleaseAtCycles = [12] in { +def : WriteRes; +def : WriteRes; +} + +// Conversions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Others +def : WriteRes; +def : WriteRes; + +def : InstRW<[WriteIALU], (instrs COPY)>; + +//===----------------------------------------------------------------------===// +// Bypass and advance +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Bitmanip +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +// Single-bit instructions +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; +defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZabha; +defm : UnsupportedSchedZbc; +defm : UnsupportedSchedZbkb; +defm : UnsupportedSchedZbkx; +defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZvk; +defm : UnsupportedSchedSFB; +} diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s new file mode 100644 index 0000000000000..801db0edf83ef --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s @@ -0,0 +1,81 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64-unknown-unknown -mcpu=tt-ascalon-d8 --iterations=1 < %s | FileCheck %s + +fmin.s ft0, fa0, fa1 +fmax.s ft1, fa0, fa1 +fmin.d ft2, ft4, ft5 +fmax.d ft3, ft4, ft5 +fmadd.s fs0, fs0, fs8, fs9 +fmsub.s fs1, fs1, fs8, fs9 +fmul.s fs3, fs3, fs4 +fdiv.s fs2, fs3, fs4 +fmul.d ft4, ft4, ft5 +fdiv.d fs4, fa3, ft5 +fsqrt.s ft1, fa2 +fsqrt.d ft2, fa3 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 12 +# CHECK-NEXT: Total Cycles: 31 +# CHECK-NEXT: Total uOps: 12 + +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 0.39 +# CHECK-NEXT: IPC: 0.39 +# CHECK-NEXT: Block RThroughput: 23.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 0.50 fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 1 3 0.50 fmax.s ft1, fa0, fa1 +# CHECK-NEXT: 1 3 0.50 fmin.d ft2, ft4, ft5 +# CHECK-NEXT: 1 3 0.50 fmax.d ft3, ft4, ft5 +# CHECK-NEXT: 1 3 0.50 fmadd.s fs0, fs0, fs8, fs9 +# CHECK-NEXT: 1 3 0.50 fmsub.s fs1, fs1, fs8, fs9 +# CHECK-NEXT: 1 3 0.50 fmul.s fs3, fs3, fs4 +# CHECK-NEXT: 1 7 3.50 fdiv.s fs2, fs3, fs4 +# CHECK-NEXT: 1 3 0.50 fmul.d ft4, ft4, ft5 +# CHECK-NEXT: 1 12 6.00 fdiv.d fs4, fa3, ft5 +# CHECK-NEXT: 1 7 3.50 fsqrt.s ft1, fa2 +# CHECK-NEXT: 1 12 6.00 fsqrt.d ft2, fa3 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - AscalonFP +# CHECK-NEXT: [0.1] - AscalonFP +# CHECK-NEXT: [1] - AscalonFXA +# CHECK-NEXT: [2] - AscalonFXB +# CHECK-NEXT: [3.0] - AscalonFXC +# CHECK-NEXT: [3.1] - AscalonFXC +# CHECK-NEXT: [4.0] - AscalonFXD +# CHECK-NEXT: [4.1] - AscalonFXD +# CHECK-NEXT: [5.0] - AscalonLS +# CHECK-NEXT: [5.1] - AscalonLS +# CHECK-NEXT: [5.2] - AscalonLS +# CHECK-NEXT: [6.0] - AscalonV +# CHECK-NEXT: [6.1] - AscalonV + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] [6.0] [6.1] +# CHECK-NEXT: 18.00 28.00 - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] [6.0] [6.1] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmax.s ft1, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmin.d ft2, ft4, ft5 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmax.d ft3, ft4, ft5 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmadd.s fs0, fs0, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmsub.s fs1, fs1, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmul.s fs3, fs3, fs4 +# CHECK-NEXT: 7.00 - - - - - - - - - - - - fdiv.s fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmul.d ft4, ft4, ft5 +# CHECK-NEXT: - 12.00 - - - - - - - - - - - fdiv.d fs4, fa3, ft5 +# CHECK-NEXT: 7.00 - - - - - - - - - - - - fsqrt.s ft1, fa2 +# CHECK-NEXT: - 12.00 - - - - - - - - - - - fsqrt.d ft2, fa3 diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s new file mode 100644 index 0000000000000..62827eb662815 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s @@ -0,0 +1,81 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64-unknown-unknown -mcpu=tt-ascalon-d8 --iterations=1 < %s | FileCheck %s + +mul t0, a0, t0 +sub s2, a2, a3 +div t1, t2, t3 +add t1, a4, x3 +div a1, a2, a3 +add t1, a0, t0 +mul s0, a5, s0 +add t2, t2, t2 +sub s1, s0, s1 +fcvt.s.w f1, t3 +add s2, s2, s2 +fcvt.w.s t5, f3 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 12 +# CHECK-NEXT: Total Cycles: 22 +# CHECK-NEXT: Total uOps: 12 + +# CHECK: Dispatch Width: 8 +# CHECK-NEXT: uOps Per Cycle: 0.55 +# CHECK-NEXT: IPC: 0.55 +# CHECK-NEXT: Block RThroughput: 17.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 mul t0, a0, t0 +# CHECK-NEXT: 1 1 0.17 sub s2, a2, a3 +# CHECK-NEXT: 1 7 7.00 div t1, t2, t3 +# CHECK-NEXT: 1 1 0.17 add t1, a4, gp +# CHECK-NEXT: 1 7 7.00 div a1, a2, a3 +# CHECK-NEXT: 1 1 0.17 add t1, a0, t0 +# CHECK-NEXT: 1 3 1.00 mul s0, s0, a5 +# CHECK-NEXT: 1 1 0.17 add t2, t2, t2 +# CHECK-NEXT: 1 1 0.17 sub s1, s0, s1 +# CHECK-NEXT: 1 1 1.00 fcvt.s.w ft1, t3 +# CHECK-NEXT: 1 1 0.17 add s2, s2, s2 +# CHECK-NEXT: 1 1 1.00 fcvt.w.s t5, ft3 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - AscalonFP +# CHECK-NEXT: [0.1] - AscalonFP +# CHECK-NEXT: [1] - AscalonFXA +# CHECK-NEXT: [2] - AscalonFXB +# CHECK-NEXT: [3.0] - AscalonFXC +# CHECK-NEXT: [3.1] - AscalonFXC +# CHECK-NEXT: [4.0] - AscalonFXD +# CHECK-NEXT: [4.1] - AscalonFXD +# CHECK-NEXT: [5.0] - AscalonLS +# CHECK-NEXT: [5.1] - AscalonLS +# CHECK-NEXT: [5.2] - AscalonLS +# CHECK-NEXT: [6.0] - AscalonV +# CHECK-NEXT: [6.1] - AscalonV + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] [6.0] [6.1] +# CHECK-NEXT: - - 17.00 1.00 2.00 2.00 1.00 1.00 - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - - - - - - mul t0, a0, t0 +# CHECK-NEXT: - - - - - - - 1.00 - - - - - sub s2, a2, a3 +# CHECK-NEXT: - - 7.00 - - - - - - - - - - div t1, t2, t3 +# CHECK-NEXT: - - - - - - 1.00 - - - - - - add t1, a4, gp +# CHECK-NEXT: - - 7.00 - - - - - - - - - - div a1, a2, a3 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - add t1, a0, t0 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - mul s0, s0, a5 +# CHECK-NEXT: - - - - - 1.00 - - - - - - - add t2, t2, t2 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - sub s1, s0, s1 +# CHECK-NEXT: - - - 1.00 - - - - - - - - - fcvt.s.w ft1, t3 +# CHECK-NEXT: - - - - 1.00 - - - - - - - - add s2, s2, s2 +# CHECK-NEXT: - - 1.00 - - - - - - - - - - fcvt.w.s t5, ft3 From 10b416c01b5ac14277c6b8b0bcbb53b57b81ee59 Mon Sep 17 00:00:00 2001 From: Petr Penzin Date: Mon, 16 Dec 2024 18:04:48 -0600 Subject: [PATCH 2/5] Reorder model includes in RISCV.td --- llvm/lib/Target/RISCV/RISCV.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index e5578a5447638..5d4b919a956db 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -53,8 +53,8 @@ include "RISCVSchedSiFiveP600.td" include "RISCVSchedSyntacoreSCR1.td" include "RISCVSchedSyntacoreSCR345.td" include "RISCVSchedSyntacoreSCR7.td" -include "RISCVSchedXiangShanNanHu.td" include "RISCVSchedTTAscalonD8.td" +include "RISCVSchedXiangShanNanHu.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. From 9bff8401967f4fdf907e8ec13f67cbcd63e483f0 Mon Sep 17 00:00:00 2001 From: Petr Penzin Date: Mon, 16 Dec 2024 18:19:07 -0600 Subject: [PATCH 3/5] Text width --- llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td index 764e546beee18..c7958dfadcfe5 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td +++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td @@ -1,4 +1,4 @@ -//=- RISCVSchedTTAscalonD8.td - Tenstorrent Ascalon Scheduling Defs -----*- tablegen -*-=// +//=- RISCVSchedTTAscalonD8.td - TT Ascalon D8 Sched Defs -----*- tablegen -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 6d2e9869943f611a0347cad8607bf9a962921717 Mon Sep 17 00:00:00 2001 From: Petr Penzin Date: Tue, 17 Dec 2024 13:52:52 -0600 Subject: [PATCH 4/5] Remove loop uOp buf size and unused units Changing processor resources changes llvm-mca output --- .../lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 5 +-- .../tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s | 32 +++++++++---------- .../tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s | 32 +++++++++---------- 3 files changed, 31 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td index c7958dfadcfe5..eb748b84d73ab 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td +++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td @@ -14,9 +14,6 @@ def TTAscalonD8Model : SchedMachineModel { let LoadLatency = 4; // Optimistic load latency let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch - // Enable partial & runtime unrolling. - let LoopMicroOpBufferSize = 16; - let CompleteModel = 0; // TODO supported, but haven't added scheduling info yet @@ -38,7 +35,7 @@ let BufferSize = 16 in { def AscalonFXC : ProcResource<2>; // ALU, BR def AscalonFXD : ProcResource<2>; // ALU def AscalonFP : ProcResource<2>; - def AscalonV : ProcResource<2>; + // TODO two vector units with vector scheduling model } def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>; diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s index 801db0edf83ef..9fd16e1ffc1d6 100644 --- a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fp.s @@ -58,24 +58,22 @@ fsqrt.d ft2, fa3 # CHECK-NEXT: [5.0] - AscalonLS # CHECK-NEXT: [5.1] - AscalonLS # CHECK-NEXT: [5.2] - AscalonLS -# CHECK-NEXT: [6.0] - AscalonV -# CHECK-NEXT: [6.1] - AscalonV # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] [6.0] [6.1] -# CHECK-NEXT: 18.00 28.00 - - - - - - - - - - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] +# CHECK-NEXT: 18.00 28.00 - - - - - - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] [6.0] [6.1] Instructions: -# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmin.s ft0, fa0, fa1 -# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmax.s ft1, fa0, fa1 -# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmin.d ft2, ft4, ft5 -# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmax.d ft3, ft4, ft5 -# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmadd.s fs0, fs0, fs8, fs9 -# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmsub.s fs1, fs1, fs8, fs9 -# CHECK-NEXT: - 1.00 - - - - - - - - - - - fmul.s fs3, fs3, fs4 -# CHECK-NEXT: 7.00 - - - - - - - - - - - - fdiv.s fs2, fs3, fs4 -# CHECK-NEXT: 1.00 - - - - - - - - - - - - fmul.d ft4, ft4, ft5 -# CHECK-NEXT: - 12.00 - - - - - - - - - - - fdiv.d fs4, fa3, ft5 -# CHECK-NEXT: 7.00 - - - - - - - - - - - - fsqrt.s ft1, fa2 -# CHECK-NEXT: - 12.00 - - - - - - - - - - - fsqrt.d ft2, fa3 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - - - - fmin.s ft0, fa0, fa1 +# CHECK-NEXT: 1.00 - - - - - - - - - - fmax.s ft1, fa0, fa1 +# CHECK-NEXT: - 1.00 - - - - - - - - - fmin.d ft2, ft4, ft5 +# CHECK-NEXT: 1.00 - - - - - - - - - - fmax.d ft3, ft4, ft5 +# CHECK-NEXT: - 1.00 - - - - - - - - - fmadd.s fs0, fs0, fs8, fs9 +# CHECK-NEXT: 1.00 - - - - - - - - - - fmsub.s fs1, fs1, fs8, fs9 +# CHECK-NEXT: - 1.00 - - - - - - - - - fmul.s fs3, fs3, fs4 +# CHECK-NEXT: 7.00 - - - - - - - - - - fdiv.s fs2, fs3, fs4 +# CHECK-NEXT: 1.00 - - - - - - - - - - fmul.d ft4, ft4, ft5 +# CHECK-NEXT: - 12.00 - - - - - - - - - fdiv.d fs4, fa3, ft5 +# CHECK-NEXT: 7.00 - - - - - - - - - - fsqrt.s ft1, fa2 +# CHECK-NEXT: - 12.00 - - - - - - - - - fsqrt.d ft2, fa3 diff --git a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s index 62827eb662815..46cb4c6b8ae24 100644 --- a/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s +++ b/llvm/test/tools/llvm-mca/RISCV/tt-ascalon-d8/fx.s @@ -58,24 +58,22 @@ fcvt.w.s t5, f3 # CHECK-NEXT: [5.0] - AscalonLS # CHECK-NEXT: [5.1] - AscalonLS # CHECK-NEXT: [5.2] - AscalonLS -# CHECK-NEXT: [6.0] - AscalonV -# CHECK-NEXT: [6.1] - AscalonV # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] [6.0] [6.1] -# CHECK-NEXT: - - 17.00 1.00 2.00 2.00 1.00 1.00 - - - - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] +# CHECK-NEXT: - - 17.00 1.00 2.00 2.00 1.00 1.00 - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - 1.00 - - - - - - - - - - mul t0, a0, t0 -# CHECK-NEXT: - - - - - - - 1.00 - - - - - sub s2, a2, a3 -# CHECK-NEXT: - - 7.00 - - - - - - - - - - div t1, t2, t3 -# CHECK-NEXT: - - - - - - 1.00 - - - - - - add t1, a4, gp -# CHECK-NEXT: - - 7.00 - - - - - - - - - - div a1, a2, a3 -# CHECK-NEXT: - - - - - 1.00 - - - - - - - add t1, a0, t0 -# CHECK-NEXT: - - 1.00 - - - - - - - - - - mul s0, s0, a5 -# CHECK-NEXT: - - - - - 1.00 - - - - - - - add t2, t2, t2 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - sub s1, s0, s1 -# CHECK-NEXT: - - - 1.00 - - - - - - - - - fcvt.s.w ft1, t3 -# CHECK-NEXT: - - - - 1.00 - - - - - - - - add s2, s2, s2 -# CHECK-NEXT: - - 1.00 - - - - - - - - - - fcvt.w.s t5, ft3 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4.0] [4.1] [5.0] [5.1] [5.2] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - - - - mul t0, a0, t0 +# CHECK-NEXT: - - - - - - - 1.00 - - - sub s2, a2, a3 +# CHECK-NEXT: - - 7.00 - - - - - - - - div t1, t2, t3 +# CHECK-NEXT: - - - - - - 1.00 - - - - add t1, a4, gp +# CHECK-NEXT: - - 7.00 - - - - - - - - div a1, a2, a3 +# CHECK-NEXT: - - - - - 1.00 - - - - - add t1, a0, t0 +# CHECK-NEXT: - - 1.00 - - - - - - - - mul s0, s0, a5 +# CHECK-NEXT: - - - - - 1.00 - - - - - add t2, t2, t2 +# CHECK-NEXT: - - - - 1.00 - - - - - - sub s1, s0, s1 +# CHECK-NEXT: - - - 1.00 - - - - - - - fcvt.s.w ft1, t3 +# CHECK-NEXT: - - - - 1.00 - - - - - - add s2, s2, s2 +# CHECK-NEXT: - - 1.00 - - - - - - - - fcvt.w.s t5, ft3 From 56452edeed0ac7a34d8d73cdc468ca75fecd75ca Mon Sep 17 00:00:00 2001 From: Petr Penzin Date: Wed, 18 Dec 2024 09:47:36 -0800 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Pengcheng Wang --- llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td index eb748b84d73ab..2afe02552974e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td +++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td @@ -16,7 +16,7 @@ def TTAscalonD8Model : SchedMachineModel { let CompleteModel = 0; - // TODO supported, but haven't added scheduling info yet + // TODO: supported, but haven't added scheduling info yet. let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, @@ -35,7 +35,7 @@ let BufferSize = 16 in { def AscalonFXC : ProcResource<2>; // ALU, BR def AscalonFXD : ProcResource<2>; // ALU def AscalonFP : ProcResource<2>; - // TODO two vector units with vector scheduling model + // TODO: two vector units with vector scheduling model. } def AscalonFX : ProcResGroup<[AscalonFXA, AscalonFXB, AscalonFXC, AscalonFXD]>;