diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 93379f64..0679d35b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -16,18 +16,21 @@ jobs: strategy: matrix: os: - - ubuntu-20.04 - - macOS-11 - - windows-2019 + - ubuntu-22.04 + - macOS-13 + - windows-2022 steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 + - name: Install Dependencies (macOS) + if: runner.os == 'macOS' + run: brew install haskell-stack - name: Build run: make - name: Prepare Artifact shell: bash run: cp LICENSE NOTICE README.md CHANGELOG.md bin - name: Upload Artifact - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: ${{ runner.os }} path: bin @@ -37,23 +40,23 @@ jobs: strategy: matrix: os: - - ubuntu-20.04 - - macOS-11 + - ubuntu-22.04 + - macOS-13 needs: build env: - IVERILOG_REF: 8ee1d56e1acbc130aa63da3c8ef0d535a551cf28 + IVERILOG_REF: ef7f0a8f38782dfc0872b1e352ccf32343c10bb8 steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Install Dependencies (macOS) if: runner.os == 'macOS' run: | - brew install shunit2 bison autoconf + brew install bison autoconf echo "$(brew --prefix bison)/bin" >> $GITHUB_PATH - name: Install Dependencies (Linux) if: runner.os == 'Linux' - run: sudo apt-get install -y shunit2 flex bison autoconf gperf + run: sudo apt-get install -y flex bison autoconf gperf - name: Cache iverilog - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.local key: ${{ runner.OS }}-${{ env.IVERILOG_REF }} @@ -70,19 +73,21 @@ jobs: make install cd .. fi + curl -L https://raw.githubusercontent.com/kward/shunit2/v2.1.8/shunit2 > ~/.local/bin/shunit2 + chmod +x ~/.local/bin/shunit2 + echo "$HOME/.local/bin" >> $GITHUB_PATH - name: Download Artifact - uses: actions/download-artifact@v1 + uses: actions/download-artifact@v4 with: name: ${{ runner.os }} path: bin - name: Test run: | chmod +x bin/sv2v - export PATH="$PATH:$HOME/.local/bin" make test release: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: name: [macOS, Linux, Windows] @@ -90,7 +95,7 @@ jobs: if: github.event_name == 'release' steps: - name: Download Artifact - uses: actions/download-artifact@v1 + uses: actions/download-artifact@v4 with: name: ${{ matrix.name }} path: sv2v-${{ matrix.name }} @@ -99,7 +104,7 @@ jobs: - name: Create ZIP run: zip -r sv2v-${{ matrix.name }} ./sv2v-${{ matrix.name }} - name: Upload Release Asset - uses: actions/upload-release-asset@v1.0.1 + uses: actions/upload-release-asset@v1.0.2 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.github/workflows/notice.yaml b/.github/workflows/notice.yaml index ec0093d4..8c417f22 100644 --- a/.github/workflows/notice.yaml +++ b/.github/workflows/notice.yaml @@ -11,9 +11,9 @@ on: - NOTICE jobs: notice: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - name: Regenerate NOTICE run: ./notice.sh > NOTICE - name: Validate NOTICE diff --git a/CHANGELOG.md b/CHANGELOG.md index 588f3208..3f5af1a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ ### Bug Fixes * Fixed an issue that prevented parsing tasks and functions with `inout` ports +* Fixed signed unsized literals with a leading 1 bit (e.g., `'sb1`, `'sh8f`) + incorrectly sign-extending in size and type casts * Fixed conflicting genvar names when inlining interfaces and modules that use them; all genvars are now given a design-wide unique name * Fixed byte order of strings in size casts @@ -24,6 +26,7 @@ * Fixed unconverted multidimensional struct fields within dimension queries * Fixed non-typenames (e.g., from packages or subsequent declarations) improperly shadowing the names of `struct` pattern fields +* Fixed shadowing of interface array indices passed to port connections * Fixed failure to resolve typenames suffixed with dimensions in contexts permitting both types and expressions, e.g., `$bits(T[W-1:0])` * Fixed errant constant folding of shadowed non-trivial localparams diff --git a/NOTICE b/NOTICE index a8ea7ab1..bb7e0700 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ ================================================================================ -Dependency: alex-3.2.7.1 +Dependency: alex-3.3.0.0 ================================================================================ Copyright (c) 1995-2011, Chris Dornan and Simon Marlow @@ -123,7 +123,7 @@ Dependency: array-0.5.4.0 ================================================================================ -Dependency: base-4.14.3.0 +Dependency: base-4.17.2.1 ================================================================================ This library (libraries/base) is derived from code from several @@ -211,7 +211,7 @@ Dependency: base-4.14.3.0 ----------------------------------------------------------------------------- ================================================================================ -Dependency: binary-0.8.8.0 +Dependency: binary-0.8.9.1 ================================================================================ Copyright (c) Lennart Kolmodin @@ -246,13 +246,14 @@ Dependency: binary-0.8.8.0 POSSIBILITY OF SUCH DAMAGE. ================================================================================ -Dependency: bytestring-0.10.12.0 +Dependency: bytestring-0.11.5.3 ================================================================================ Copyright (c) Don Stewart 2005-2009 (c) Duncan Coutts 2006-2015 (c) David Roundy 2003-2005 (c) Simon Meier 2010-2011 + (c) Koz Ross 2021 All rights reserved. @@ -281,10 +282,10 @@ Dependency: bytestring-0.10.12.0 SUCH DAMAGE. ================================================================================ -Dependency: cmdargs-0.10.21 +Dependency: cmdargs-0.10.22 ================================================================================ - Copyright Neil Mitchell 2009-2021. + Copyright Neil Mitchell 2009-2023. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -316,7 +317,7 @@ Dependency: cmdargs-0.10.21 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================================================ -Dependency: containers-0.6.5.1 +Dependency: containers-0.6.7 ================================================================================ The Glasgow Haskell Compiler License @@ -352,7 +353,7 @@ Dependency: containers-0.6.5.1 DAMAGE. ================================================================================ -Dependency: deepseq-1.4.4.0 +Dependency: deepseq-1.4.8.0 ================================================================================ This library (deepseq) is derived from code from the GHC project which @@ -396,7 +397,7 @@ Dependency: deepseq-1.4.4.0 ----------------------------------------------------------------------------- ================================================================================ -Dependency: directory-1.3.6.0 +Dependency: directory-1.3.7.1 ================================================================================ This library (libraries/base) is derived from code from two @@ -464,10 +465,10 @@ Dependency: directory-1.3.6.0 ----------------------------------------------------------------------------- ================================================================================ -Dependency: filepath-1.4.2.1 +Dependency: filepath-1.4.2.2 ================================================================================ - Copyright Neil Mitchell 2005-2018. + Copyright Neil Mitchell 2005-2020. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -499,27 +500,12 @@ Dependency: filepath-1.4.2.1 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================================================ -Dependency: ghc-prim-0.6.1 +Dependency: ghc-bignum-1.3 ================================================================================ - This library (libraries/ghc-prim) is derived from code from several - sources: - - * Code from the GHC project which is largely (c) The University of - Glasgow, and distributable under a BSD-style license (see below), - - * Code from the Haskell 98 Report which is (c) Simon Peyton Jones - and freely redistributable (but see the full license for - restrictions). - - The full text of these licenses is reproduced below. All of the - licenses are BSD-style or compatible. - - ----------------------------------------------------------------------------- - The Glasgow Haskell Compiler License - Copyright 2004, The University Court of the University of Glasgow. + Copyright 2020, The University Court of the University of Glasgow. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -549,24 +535,14 @@ Dependency: ghc-prim-0.6.1 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ----------------------------------------------------------------------------- - - Code derived from the document "Report on the Programming Language - Haskell 98", is distributed under the following license: - - Copyright (c) 2002 Simon Peyton Jones - - The authors intend this Report to belong to the entire Haskell - community, and so we grant permission to copy and distribute it for - any purpose, provided that it is reproduced in its entirety, - including this Notice. Modified versions of this Report may also be - copied and distributed for any purpose, provided that the modified - version is clearly presented as such, and that it does not claim to - be a definition of the Haskell 98 Language. +================================================================================ +Dependency: ghc-prim-0.9.1 +================================================================================ + Package not found: No such package version for ghc-prim ================================================================================ -Dependency: githash-0.1.6.2 +Dependency: githash-0.1.7.0 ================================================================================ Copyright (c) 2018, Michael Snoyman, 2015, Adam C. Foltzer @@ -598,7 +574,7 @@ Dependency: githash-0.1.6.2 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================================================ -Dependency: happy-1.20.0 +Dependency: happy-1.20.1.1 ================================================================================ The Happy License @@ -633,7 +609,7 @@ Dependency: happy-1.20.0 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================================================ -Dependency: hashable-1.3.0.0 +Dependency: hashable-1.4.3.0 ================================================================================ Copyright Milan Straka 2010 @@ -667,41 +643,6 @@ Dependency: hashable-1.3.0.0 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -================================================================================ -Dependency: integer-gmp-1.0.3.0 -================================================================================ - - Copyright (c) 2014, Herbert Valerio Riedel - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of Herbert Valerio Riedel nor the names of other - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ================================================================================ Dependency: mtl-2.2.2 ================================================================================ @@ -783,7 +724,7 @@ Dependency: pretty-1.1.3.6 ----------------------------------------------------------------------------- ================================================================================ -Dependency: primitive-0.7.3.0 +Dependency: primitive-0.8.0.0 ================================================================================ Copyright (c) 2008-2009, Roman Leshchinskiy @@ -818,7 +759,7 @@ Dependency: primitive-0.7.3.0 ================================================================================ -Dependency: process-1.6.13.2 +Dependency: process-1.6.18.0 ================================================================================ This library (libraries/process) is derived from code from two @@ -886,7 +827,7 @@ Dependency: process-1.6.13.2 ----------------------------------------------------------------------------- ================================================================================ -Dependency: template-haskell-2.16.0.0 +Dependency: template-haskell-2.19.0.0 ================================================================================ @@ -924,7 +865,7 @@ Dependency: template-haskell-2.16.0.0 ================================================================================ -Dependency: text-1.2.4.1 +Dependency: text-2.0.2 ================================================================================ Copyright (c) 2008-2009, Tom Harper @@ -955,7 +896,7 @@ Dependency: text-1.2.4.1 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================================================ -Dependency: th-compat-0.1.3 +Dependency: th-compat-0.1.4 ================================================================================ Copyright (c) 2020, Ryan Scott @@ -990,10 +931,10 @@ Dependency: th-compat-0.1.3 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================================================ -Dependency: time-1.9.3 +Dependency: time-1.12.2 ================================================================================ - TimeLib is Copyright (c) Ashley Yakeley, 2004-2014. All rights reserved. + TimeLib is Copyright (c) Ashley Yakeley and contributors, 2004-2022. All rights reserved. Certain sections are Copyright 2004, The University Court of the University of Glasgow. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -1041,7 +982,7 @@ Dependency: transformers-0.5.6.2 DAMAGE. ================================================================================ -Dependency: unix-2.7.2.2 +Dependency: unix-2.7.3 ================================================================================ The Glasgow Haskell Compiler License @@ -1077,10 +1018,13 @@ Dependency: unix-2.7.2.2 DAMAGE. ================================================================================ -Dependency: vector-0.12.3.1 +Dependency: vector-0.13.1.0 ================================================================================ Copyright (c) 2008-2012, Roman Leshchinskiy + 2020-2022, Alexey Kuleshevich + 2020-2022, Aleksey Khudyakov + 2020-2022, Andrew Lelechenko All rights reserved. Redistribution and use in source and binary forms, with or without @@ -1110,4 +1054,40 @@ Dependency: vector-0.12.3.1 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +================================================================================ +Dependency: vector-stream-0.1.0.0 +================================================================================ + + Copyright (c) 2008-2012, Roman Leshchinskiy + 2020-2022, Alexey Kuleshevich + 2020-2022, Aleksey Khudyakov + 2020-2022, Andrew Lelechenko + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + - Neither name of the University nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY COURT OF THE UNIVERSITY OF + GLASGOW AND THE CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + UNIVERSITY COURT OF THE UNIVERSITY OF GLASGOW OR THE CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. diff --git a/src/Convert/Interface.hs b/src/Convert/Interface.hs index 90d7cfaa..b2be2813 100644 --- a/src/Convert/Interface.hs +++ b/src/Convert/Interface.hs @@ -222,7 +222,10 @@ convertDescription tops parts (Part att ext Module lif name ports items) = then Nothing else Just (portName, modportBinding) where - modportBinding = (substitutions, replaceBit modportE) + modportBinding = + ( substitutions + , scopeExprWithScopes modports $ replaceBit modportE + ) substitutions = genSubstitutions modports base instanceE modportE maybeInfo = diff --git a/src/Convert/Scoper.hs b/src/Convert/Scoper.hs index 51a14fa4..f297311c 100644 --- a/src/Convert/Scoper.hs +++ b/src/Convert/Scoper.hs @@ -38,6 +38,8 @@ module Convert.Scoper , replaceInExpr , scopeExpr , scopeType + , scopeExprWithScopes + , scopeTypeWithScopes , insertElem , removeElem , injectItem @@ -209,6 +211,12 @@ scopeType = traverseNestedTypesM $ traverseTypeExprsM scopeExpr {-# INLINABLE scopeExpr #-} {-# INLINABLE scopeType #-} +scopeExprWithScopes :: Scopes a -> Expr -> Expr +scopeExprWithScopes scopes = flip evalState scopes . scopeExpr + +scopeTypeWithScopes :: Scopes a -> Type -> Type +scopeTypeWithScopes scopes = flip evalState scopes . scopeType + class ScopePath k where toTiers :: Scopes a -> k -> [Tier] diff --git a/src/Language/SystemVerilog/AST/Number.hs b/src/Language/SystemVerilog/AST/Number.hs index e5f2418b..6744590b 100644 --- a/src/Language/SystemVerilog/AST/Number.hs +++ b/src/Language/SystemVerilog/AST/Number.hs @@ -153,12 +153,17 @@ parseNormalized oversizedNumbers str = -- high-order X or Z is extended up to the size of the literal leadDigit = head digits - numDigits = length digits + numDigits = length digits + if isSignedUnsizedWithLeading1 then 1 else 0 leadDigitIsXZ = elem leadDigit xzDigits digitsExtended = if leadDigitIsXZ then replicate (sizeDigits - numDigits) leadDigit ++ digits else digits + isSignedUnsizedWithLeading1 = + maybeBase /= Nothing && + not leadDigitIsXZ && + signed && + digitToInt leadDigit >= div (baseSize base) 2 -- determine the number of digits needed based on the size sizeDigits = ((abs size) `div` bitsPerDigit) + sizeExtraDigit diff --git a/stack.yaml b/stack.yaml index 495ac63c..7c2ed354 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,4 +1,4 @@ -resolver: lts-18.24 +resolver: lts-21.25 pvp-bounds: both ghc-options: $locals: -j2 diff --git a/stack.yaml.lock b/stack.yaml.lock index c6a8b846..f823d294 100644 --- a/stack.yaml.lock +++ b/stack.yaml.lock @@ -3,10 +3,10 @@ # For more information, please see the documentation at: # https://docs.haskellstack.org/en/stable/lock_files -snapshots: -- original: lts-18.24 - completed: - sha256: 06d844ba51e49907bd29cb58b4a5f86ee7587a4cd7e6cf395eeec16cba619ce8 - url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/24.yaml - size: 587821 packages: [] +snapshots: +- completed: + sha256: a81fb3877c4f9031e1325eb3935122e608d80715dc16b586eb11ddbff8671ecd + size: 640086 + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/21/25.yaml + original: lts-21.25 diff --git a/sv2v.cabal b/sv2v.cabal index 26c9ecdc..2c33d660 100644 --- a/sv2v.cabal +++ b/sv2v.cabal @@ -129,6 +129,7 @@ executable sv2v "-with-rtsopts=-N -A32m" -funbox-strict-fields -Wall + -Wno-incomplete-uni-patterns source-repository head type: git diff --git a/test/core/cast_literal.vh b/test/core/cast_literal.vh index 9152bac2..898387bf 100644 --- a/test/core/cast_literal.vh +++ b/test/core/cast_literal.vh @@ -47,6 +47,16 @@ `TEST_ALL(7'so0x) `TEST_ALL(7'sox1) `TEST_ALL(7'soz0) +`TEST_ALL('so7) +`TEST_ALL('so37) +`TEST_ALL('so47) +`TEST_ALL('so57) +`TEST_ALL('so07) +`TEST_ALL('o7) +`TEST_ALL('o37) +`TEST_ALL('o47) +`TEST_ALL('o57) +`TEST_ALL('o07) `TEST_ALL('bx) `TEST_ALL('ozx) @@ -58,6 +68,18 @@ `TEST_ALL('bzzz1) `TEST_ALL('ozzz1) `TEST_ALL('hzzz1) +`TEST_ALL('shf) +`TEST_ALL('sh6f) +`TEST_ALL('sh7f) +`TEST_ALL('sh8f) +`TEST_ALL('sh9f) +`TEST_ALL('sh0f) +`TEST_ALL('hf) +`TEST_ALL('h6f) +`TEST_ALL('h7f) +`TEST_ALL('h8f) +`TEST_ALL('h9f) +`TEST_ALL('h0f) `TEST_ALL(1'ox) `TEST_ALL(1'oz) diff --git a/test/core/multipack.v b/test/core/multipack.v index 538d3e96..90dc91a9 100644 --- a/test/core/multipack.v +++ b/test/core/multipack.v @@ -1,12 +1,5 @@ module Example; - initial - $monitor("%b %b %b %b %b %b %b %b %b", - arr1, arr2, arr3, - arr4, arr5, arr6, - arr7, arr8, arr9 - ); - reg [14:0] arr1; reg [14:0] arr2; reg [14:0] arr3; @@ -43,4 +36,11 @@ module Example; #1; arr9[(4-1)*3+:3] = arr9[(4-2)*3+:3]; end + initial + $monitor("%b %b %b %b %b %b %b %b %b", + arr1, arr2, arr3, + arr4, arr5, arr6, + arr7, arr8, arr9 + ); + endmodule diff --git a/test/core/package_order.v b/test/core/package_order.v index 0c93664c..34b3ed19 100644 --- a/test/core/package_order.v +++ b/test/core/package_order.v @@ -1,9 +1,8 @@ -module evil_mdl ( - output reg [evil_pkg_B-1:0] foo -); +module evil_mdl (foo); localparam evil_pkg_Z = 1; localparam evil_pkg_A = evil_pkg_Z; localparam evil_pkg_B = evil_pkg_Z; + output reg [evil_pkg_B-1:0] foo; initial foo = evil_pkg_A; endmodule diff --git a/test/core/reorder.v b/test/core/reorder.v index a41b3152..815f7812 100644 --- a/test/core/reorder.v +++ b/test/core/reorder.v @@ -9,8 +9,8 @@ module top; assign brr[0] = 1; initial $display("%b", brr); if (YES) begin : blk2 - assign crr[0] = 1; wire [19:0] crr; + assign crr[0] = 1; initial $display("%b", crr); end end diff --git a/test/help/check_usage.py b/test/help/check_usage.py new file mode 100644 index 00000000..54716aab --- /dev/null +++ b/test/help/check_usage.py @@ -0,0 +1,10 @@ +import subprocess + +with open("../../README.md", "r") as file: + text_readme = file.read() + +text_help = subprocess.check_output(["../../bin/sv2v", "--help"]).decode() +text_usage = "```\n" + "\n".join(text_help.split("\n")[2:-5]) + "\n```" + +if text_usage not in text_readme: + raise RuntimeError(f"'{text_usage}' not found in '{text_readme}'") diff --git a/test/help/run.sh b/test/help/run.sh index ebb353d3..579e6c69 100755 --- a/test/help/run.sh +++ b/test/help/run.sh @@ -5,14 +5,7 @@ test_help() { assertTrue "getting help should succeed" $result assertNotNull "stdout should not be empty" "$stdout" assertNull "stderr should be empty" "$stderr" - lines=`echo "$stdout" | wc -l` - usage=`echo "$stdout" | tail -n +3 | head -n $(expr $lines - 5)` - usage="\`\`\` -$usage -\`\`\`" - if [[ ! $(<../../README.md) = *"$usage"* ]]; then - fail "Did not find matching usage in README!" - fi + python3 check_usage.py } source ../lib/functions.sh diff --git a/test/run-all.sh b/test/run-all.sh index ee651c9a..cdf35682 100755 --- a/test/run-all.sh +++ b/test/run-all.sh @@ -1,5 +1,7 @@ #!/bin/bash +trap exit INT + cd `dirname "${BASH_SOURCE[0]}"` failures=0 diff --git a/test/type_param/common/local/util/instr_tracer.sv b/test/type_param/common/local/util/instr_tracer.sv new file mode 100644 index 00000000..17c11e5f --- /dev/null +++ b/test/type_param/common/local/util/instr_tracer.sv @@ -0,0 +1,223 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Main Class + +`ifndef VERILATOR +//pragma translate_off +`include "ex_trace_item.svh" +`include "instr_trace_item.svh" + +module instr_tracer ( + instr_tracer_if tracer_if, + input logic[riscv::XLEN-1:0] hart_id_i +); + + // keep the decoded instructions in a queue + logic [31:0] decode_queue [$]; + // keep the issued instructions in a queue + logic [31:0] issue_queue [$]; + // issue scoreboard entries + ariane_pkg::scoreboard_entry_t issue_sbe_queue [$]; + ariane_pkg::scoreboard_entry_t issue_sbe; + // store resolved branches, get (mis-)predictions + ariane_pkg::bp_resolve_t bp [$]; + // shadow copy of the register files + logic [63:0] gp_reg_file [32]; + logic [63:0] fp_reg_file [32]; + // 64 bit clock tick count + longint unsigned clk_ticks; + int f, commit_log; + // address mapping + // contains mappings of the form vaddr <-> paddr + logic [63:0] store_mapping[$], load_mapping[$], address_mapping; + + // static uvm_cmdline_processor uvcl = uvm_cmdline_processor::get_inst(); + + function void create_file(logic [63:0] hart_id); + string fn, fn_commit_log; + $sformat(fn, "trace_hart_%0.0f.log", hart_id); + $sformat(fn_commit_log, "trace_hart_%0.0f_commit.log", hart_id); + $display("[TRACER] Output filename is: %s", fn); + + f = $fopen(fn,"w"); + if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG) commit_log = $fopen(fn_commit_log, "w"); + endfunction : create_file + + task trace(); + automatic logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction; + automatic ariane_pkg::scoreboard_entry_t commit_instruction; + // initialize register 0 + gp_reg_file = '{default:0}; + fp_reg_file = '{default:0}; + + forever begin + automatic ariane_pkg::bp_resolve_t bp_instruction = '0; + // new cycle, we are only interested if reset is de-asserted + @(tracer_if.pck) if (tracer_if.pck.rstn !== 1'b1) begin + flush(); + continue; + end + + // increment clock tick + clk_ticks++; + + // ------------------- + // Instruction Decode + // ------------------- + // we are decoding an instruction + if (tracer_if.pck.fetch_valid && tracer_if.pck.fetch_ack) begin + decode_instruction = tracer_if.pck.instruction; + decode_queue.push_back(decode_instruction); + end + // ------------------- + // Instruction Issue + // ------------------- + // we got a new issue ack, so put the element from the decode queue to + // the issue queue + if (tracer_if.pck.issue_ack && !tracer_if.pck.flush_unissued) begin + issue_instruction = decode_queue.pop_front(); + issue_queue.push_back(issue_instruction); + // also save the scoreboard entry to a separate issue queue + issue_sbe_queue.push_back(ariane_pkg::scoreboard_entry_t'(tracer_if.pck.issue_sbe)); + end + + // -------------------- + // Address Translation + // -------------------- + if (tracer_if.pck.st_valid) begin + store_mapping.push_back(tracer_if.pck.st_paddr); + end + + if (tracer_if.pck.ld_valid && !tracer_if.pck.ld_kill) begin + load_mapping.push_back(tracer_if.pck.ld_paddr); + end + // ---------------------- + // Store predictions + // ---------------------- + if (tracer_if.pck.resolve_branch.valid) begin + bp.push_back(tracer_if.pck.resolve_branch); + end + // -------------- + // Commit + // -------------- + // we are committing an instruction + for (int i = 0; i < 2; i++) begin + if (tracer_if.pck.commit_ack[i]) begin + commit_instruction = ariane_pkg::scoreboard_entry_t'(tracer_if.pck.commit_instr[i]); + issue_commit_instruction = issue_queue.pop_front(); + issue_sbe = issue_sbe_queue.pop_front(); + // check if the instruction retiring is a load or store, get the physical address accordingly + if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::LOAD) + address_mapping = load_mapping.pop_front(); + else if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::STORE) + address_mapping = store_mapping.pop_front(); + + if (tracer_if.pck.commit_instr[i].fu == ariane_pkg::CTRL_FLOW) + bp_instruction = bp.pop_front(); + // the scoreboards issue entry still contains the immediate value as a result + // check if the write back is valid, if not we need to source the result from the register file + // as the most recent version of this register will be there. + if (tracer_if.pck.we_gpr[i] || tracer_if.pck.we_fpr[i]) begin + printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end else if (ariane_pkg::is_rd_fpr(commit_instruction.op)) begin + printInstr(issue_sbe, issue_commit_instruction, fp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end else begin + printInstr(issue_sbe, issue_commit_instruction, gp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end + end + end + // -------------- + // Exceptions + // -------------- + if (tracer_if.pck.exception.valid && !(tracer_if.pck.debug_mode && tracer_if.pck.exception.cause == riscv::BREAKPOINT)) begin + // print exception + printException(tracer_if.pck.commit_instr[0].pc, tracer_if.pck.exception.cause, tracer_if.pck.exception.tval); + end + // ---------------------- + // Commit Registers + // ---------------------- + // update shadow reg files here + for (int i = 0; i < 2; i++) begin + if (tracer_if.pck.we_gpr[i] && tracer_if.pck.waddr[i] != 5'b0) begin + gp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; + end else if (tracer_if.pck.we_fpr[i]) begin + fp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; + end + end + // -------------- + // Flush Signals + // -------------- + // flush un-issued instructions + if (tracer_if.pck.flush_unissued) begin + flushDecode(); + end + // flush whole pipeline + if (tracer_if.pck.flush) begin + flush(); + end + end + + endtask + + // flush all decoded instructions + function void flushDecode (); + decode_queue = {}; + endfunction + + // flush everything, we took an exception/interrupt + function void flush (); + flushDecode(); + // clear all elements in the queue + issue_queue = {}; + issue_sbe_queue = {}; + // also clear mappings + store_mapping = {}; + load_mapping = {}; + bp = {}; + endfunction + + function void printInstr(ariane_pkg::scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [riscv::PLEN-1:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, ariane_pkg::bp_resolve_t bp); + automatic instr_trace_item iti = new ($time, clk_ticks, sbe, instr, gp_reg_file, fp_reg_file, result, paddr, priv_lvl, debug_mode, bp); + // print instruction to console + automatic string print_instr = iti.printInstr(); + if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin + $fwrite(commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result, ariane_pkg::is_rd_fpr(sbe.op))); + end + $fwrite(f, {print_instr, "\n"}); + endfunction + + function void printException(logic [riscv::VLEN-1:0] pc, logic [63:0] cause, logic [63:0] tval); + automatic ex_trace_item eti = new (pc, cause, tval); + automatic string print_ex = eti.printException(); + $fwrite(f, {print_ex, "\n"}); + endfunction + + function void close(); + if (f) $fclose(f); + if (ariane_pkg::ENABLE_SPIKE_COMMIT_LOG && commit_log) $fclose(commit_log); + endfunction + + + initial begin + #15ns; + create_file(hart_id_i); + trace(); + end + + final begin + close(); + end + +endmodule : instr_tracer +//pragma translate_on +`endif diff --git a/test/type_param/common/local/util/instr_tracer_if.sv b/test/type_param/common/local/util/instr_tracer_if.sv new file mode 100644 index 00000000..5015cfd7 --- /dev/null +++ b/test/type_param/common/local/util/instr_tracer_if.sv @@ -0,0 +1,67 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Interface + +`ifndef VERILATOR +`ifndef INSTR_TRACER_IF_SV +`define INSTR_TRACER_IF_SV +interface instr_tracer_if ( + input clk + ); + + logic rstn; + logic flush_unissued; + logic flush; + // Decode + logic [31:0] instruction; + logic fetch_valid; + logic fetch_ack; + // Issue stage + logic issue_ack; // issue acknowledged + ariane_pkg::scoreboard_entry_t issue_sbe; // issue scoreboard entry + // WB stage + logic [1:0][4:0] waddr; + logic [1:0][63:0] wdata; + logic [1:0] we_gpr; + logic [1:0] we_fpr; + // commit stage + ariane_pkg::scoreboard_entry_t [1:0] commit_instr; // commit instruction + logic [1:0] commit_ack; + // address translation + // stores + logic st_valid; + logic [riscv::PLEN-1:0] st_paddr; + // loads + logic ld_valid; + logic ld_kill; + logic [riscv::PLEN-1:0] ld_paddr; + // misprediction + ariane_pkg::bp_resolve_t resolve_branch; + // exceptions + ariane_pkg::exception_t exception; + // current privilege level + riscv::priv_lvl_t priv_lvl; + logic debug_mode; + // the tracer just has a passive interface we do not drive anything with it + + //pragma translate_off + clocking pck @(posedge clk); + input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr, + st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch, + wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl, debug_mode; + endclocking + //pragma translate_on + +endinterface +`endif +`endif diff --git a/test/type_param/common/local/util/sram.sv b/test/type_param/common/local/util/sram.sv new file mode 100644 index 00000000..4c0f2d25 --- /dev/null +++ b/test/type_param/common/local/util/sram.sv @@ -0,0 +1,107 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: SRAM wrapper for FPGA (requires the fpga-support submodule) +// +// Note: the wrapped module contains two different implementations for +// ALTERA and XILINX tools, since these follow different coding styles for +// inferrable RAMS with byte enable. define `FPGA_TARGET_XILINX or +// `FPGA_TARGET_ALTERA in your build environment (default is ALTERA) + +module sram #( + parameter DATA_WIDTH = 64, + parameter USER_WIDTH = 1, + parameter USER_EN = 0, + parameter NUM_WORDS = 1024, + parameter SIM_INIT = "none", + parameter OUT_REGS = 0 // enables output registers in FPGA macro (read lat = 2) +)( + input logic clk_i, + input logic rst_ni, + input logic req_i, + input logic we_i, + input logic [$clog2(NUM_WORDS)-1:0] addr_i, + input logic [USER_WIDTH-1:0] wuser_i, + input logic [DATA_WIDTH-1:0] wdata_i, + input logic [(DATA_WIDTH+7)/8-1:0] be_i, + output logic [USER_WIDTH-1:0] ruser_o, + output logic [DATA_WIDTH-1:0] rdata_o +); + +localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64; +localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // To be fine tuned to reduce memory size +localparam BE_WIDTH_ALIGNED = (((DATA_WIDTH+7)/8+7)/8)*8; + +logic [DATA_WIDTH_ALIGNED-1:0] wdata_aligned; +logic [USER_WIDTH_ALIGNED-1:0] wuser_aligned; +logic [BE_WIDTH_ALIGNED-1:0] be_aligned; +logic [DATA_WIDTH_ALIGNED-1:0] rdata_aligned; +logic [USER_WIDTH_ALIGNED-1:0] ruser_aligned; + +// align to 64 bits for inferrable macro below +always_comb begin : p_align + wdata_aligned ='0; + wuser_aligned ='0; + be_aligned ='0; + wdata_aligned[DATA_WIDTH-1:0] = wdata_i; + wuser_aligned[USER_WIDTH-1:0] = wuser_i; + be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i; + + rdata_o = rdata_aligned[DATA_WIDTH-1:0]; + ruser_o = ruser_aligned[USER_WIDTH-1:0]; +end + + for (genvar k = 0; k<(DATA_WIDTH+63)/64; k++) begin : gen_cut + // unused byte-enable segments (8bits) are culled by the tool + tc_sram_wrapper #( + .NumWords(NUM_WORDS), // Number of Words in data array + .DataWidth(64), // Data signal width + .ByteWidth(32'd8), // Width of a data byte + .NumPorts(32'd1), // Number of read and write ports + .Latency(32'd1), // Latency when the read data is available + .SimInit(SIM_INIT), // Simulation initialization + .PrintSimCfg(1'b0) // Print configuration + ) i_tc_sram_wrapper ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req_i ), + .we_i ( we_i ), + .be_i ( be_aligned[k*8 +: 8] ), + .wdata_i ( wdata_aligned[k*64 +: 64] ), + .addr_i ( addr_i ), + .rdata_o ( rdata_aligned[k*64 +: 64] ) + ); + if (USER_EN > 0) begin : gen_mem_user + tc_sram_wrapper #( + .NumWords(NUM_WORDS), // Number of Words in data array + .DataWidth(64), // Data signal width + .ByteWidth(32'd8), // Width of a data byte + .NumPorts(32'd1), // Number of read and write ports + .Latency(32'd1), // Latency when the read data is available + .SimInit(SIM_INIT), // Simulation initialization + .PrintSimCfg(1'b0) // Print configuration + ) i_tc_sram_wrapper_user ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req_i ), + .we_i ( we_i ), + .be_i ( be_aligned[k*8 +: 8] ), + .wdata_i ( wuser_aligned[k*64 +: 64] ), + .addr_i ( addr_i ), + .rdata_o ( ruser_aligned[k*64 +: 64] ) + ); + end else begin + assign ruser_aligned[k*64 +: 64] = '0; + end + end +endmodule : sram diff --git a/test/type_param/common/local/util/tc_sram_wrapper.sv b/test/type_param/common/local/util/tc_sram_wrapper.sv new file mode 100644 index 00000000..ae3287d9 --- /dev/null +++ b/test/type_param/common/local/util/tc_sram_wrapper.sv @@ -0,0 +1,60 @@ +// Copyright 2022 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + +module tc_sram_wrapper #( + parameter int unsigned NumWords = 32'd1024, // Number of Words in data array + parameter int unsigned DataWidth = 32'd128, // Data signal width + parameter int unsigned ByteWidth = 32'd8, // Width of a data byte + parameter int unsigned NumPorts = 32'd2, // Number of read and write ports + parameter int unsigned Latency = 32'd1, // Latency when the read data is available + parameter SimInit = "none", // Simulation initialization + parameter bit PrintSimCfg = 1'b0, // Print configuration + // DEPENDENT PARAMETERS, DO NOT OVERWRITE! + parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1, + parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div + parameter type addr_t = logic [AddrWidth-1:0], + parameter type data_t = logic [DataWidth-1:0], + parameter type be_t = logic [BeWidth-1:0] +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // input ports + input logic [NumPorts-1:0] req_i, // request + input logic [NumPorts-1:0] we_i, // write enable + input addr_t [NumPorts-1:0] addr_i, // request address + input data_t [NumPorts-1:0] wdata_i, // write data + input be_t [NumPorts-1:0] be_i, // write byte enable + // output ports + output data_t [NumPorts-1:0] rdata_o // read data +); + +// synthesis translate_off + + tc_sram #( + .NumWords(NumWords), + .DataWidth(DataWidth), + .ByteWidth(ByteWidth), + .NumPorts(NumPorts), + .Latency(Latency), + .SimInit(SimInit), + .PrintSimCfg(PrintSimCfg) + ) i_tc_sram ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req_i ), + .we_i ( we_i ), + .be_i ( be_i ), + .wdata_i ( wdata_i ), + .addr_i ( addr_i ), + .rdata_o ( rdata_o ) + ); + +// synthesis translate_on + +endmodule diff --git a/test/type_param/core/acc_dispatcher.sv b/test/type_param/core/acc_dispatcher.sv new file mode 100644 index 00000000..8b5998ae --- /dev/null +++ b/test/type_param/core/acc_dispatcher.sv @@ -0,0 +1,423 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: Matheus Cavalcante, ETH Zurich +// Nils Wistoff, ETH Zurich +// Date: 20.11.2020 +// Description: Functional unit that dispatches CVA6 instructions to accelerators. + +module acc_dispatcher + import ariane_pkg::*; + import riscv::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type acc_req_t = acc_pkg::accelerator_req_t, + parameter type acc_resp_t = acc_pkg::accelerator_resp_t, + parameter type acc_cfg_t = logic, + parameter acc_cfg_t AccCfg = '0 +) ( + input logic clk_i, + input logic rst_ni, + // Interface with the CSR regfile + input logic acc_cons_en_i, // Accelerator memory consistent mode + output logic acc_fflags_valid_o, + output logic [4:0] acc_fflags_o, + // Interface with the CSRs + input priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][PLEN-3:0] pmpaddr_i, + input logic [2:0] fcsr_frm_i, + output logic dirty_v_state_o, + // Interface with the issue stage + input scoreboard_entry_t issue_instr_i, + input logic issue_instr_hs_i, + output logic issue_stall_o, + input fu_data_t fu_data_i, + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, + output logic [TRANS_ID_BITS-1:0] acc_trans_id_o, + output xlen_t acc_result_o, + output logic acc_valid_o, + output exception_t acc_exception_o, + // Interface with the execute stage + output logic acc_valid_ex_o, // FU executed + // Interface with the commit stage + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + input logic commit_st_barrier_i, // A store barrier was commited + // Interface with the load/store unit + output logic acc_stall_st_pending_o, + input logic acc_no_st_pending_i, + input dcache_req_i_t [2:0] dcache_req_ports_i, + // Interface with the controller + output logic ctrl_halt_o, + input logic flush_unissued_instr_i, + input logic flush_ex_i, + output logic flush_pipeline_o, + // Interface with cache subsystem + output dcache_req_i_t [1:0] acc_dcache_req_ports_o, + input dcache_req_o_t [1:0] acc_dcache_req_ports_i, + input logic inval_ready_i, + output logic inval_valid_o, + output logic [63:0] inval_addr_o, + // Accelerator interface + output acc_req_t acc_req_o, + input acc_resp_t acc_resp_i +); + + `include "common_cells/registers.svh" + + import cf_math_pkg::idx_width; + + /*********************** + * Common signals * + ***********************/ + + logic acc_ready; + logic acc_valid_d, acc_valid_q; + + /************************** + * Accelerator issue * + **************************/ + + // Issue accelerator instructions + `FF(acc_valid_q, acc_valid_d, '0) + + assign acc_valid_ex_o = acc_valid_q; + assign acc_valid_d = ~issue_instr_i.ex.valid & + issue_instr_hs_i & + (issue_instr_i.fu == ACCEL) & + ~flush_unissued_instr_i; + + // Accelerator load/store pending signals + logic acc_no_ld_pending; + logic acc_no_st_pending; + + // Stall issue stage in three cases: + always_comb begin : stall_issue + unique case (issue_instr_i.fu) + ACCEL: + // 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet + issue_stall_o = ~acc_ready; + LOAD: + // 2. We're issuing a scalar load but there is an inflight accelerator store. + issue_stall_o = acc_cons_en_i & ~acc_no_st_pending; + STORE: + // 3. We're issuing a scalar store but there is an inflight accelerator load or store. + issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending); + default: issue_stall_o = 1'b0; + endcase + end + + /*********************** + * Instruction queue * + ***********************/ + + localparam InstructionQueueDepth = 3; + + fu_data_t acc_data; + fu_data_t acc_insn_queue_o; + logic acc_insn_queue_pop; + logic acc_insn_queue_empty; + logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage; + logic acc_commit; + logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id; + + assign acc_data = acc_valid_ex_o ? fu_data_i : '0; + + fifo_v3 #( + .DEPTH (InstructionQueueDepth), + .FALL_THROUGH(1'b1), + .dtype (fu_data_t) + ) i_acc_insn_queue ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_ex_i), + .testmode_i(1'b0), + .data_i (fu_data_i), + .push_i (acc_valid_q), + .full_o ( /* Unused */), + .data_o (acc_insn_queue_o), + .pop_i (acc_insn_queue_pop), + .empty_o (acc_insn_queue_empty), + .usage_o (acc_insn_queue_usage) + ); + + // We are ready if the instruction queue is able to accept at least one more entry. + assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1); + + /********************************** + * Non-speculative instructions * + **********************************/ + + // Keep track of the instructions that were received by the dispatcher. + logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q; + `FF(insn_pending_q, insn_pending_d, '0) + + // Only non-speculative instructions can be issued to the accelerators. + // The following block keeps track of which transaction IDs reached the + // top of the scoreboard, and are therefore no longer speculative. + logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q; + `FF(insn_ready_q, insn_ready_d, '0) + + always_comb begin : p_non_speculative_ff + // Maintain state + insn_pending_d = insn_pending_q; + insn_ready_d = insn_ready_q; + + // We received a new instruction + if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1; + // Flush all received instructions + if (flush_ex_i) insn_pending_d = '0; + + // An accelerator instruction is no longer speculative. + if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin + insn_ready_d[acc_commit_trans_id] = 1'b1; + insn_pending_d[acc_commit_trans_id] = 1'b0; + end + + // An accelerator instruction was issued. + if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0; + end : p_non_speculative_ff + + /************************* + * Accelerator request * + *************************/ + + acc_pkg::accelerator_req_t acc_req; + logic acc_req_valid; + logic acc_req_ready; + + acc_pkg::accelerator_req_t acc_req_int; + fall_through_register #( + .T(acc_pkg::accelerator_req_t) + ) i_accelerator_req_register ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clr_i (1'b0), + .testmode_i(1'b0), + .data_i (acc_req), + .valid_i (acc_req_valid), + .ready_o (acc_req_ready), + .data_o (acc_req_int), + .valid_o (acc_req_o.req_valid), + .ready_i (acc_resp_i.req_ready) + ); + + assign acc_req_o.insn = acc_req_int.insn; + assign acc_req_o.rs1 = acc_req_int.rs1; + assign acc_req_o.rs2 = acc_req_int.rs2; + assign acc_req_o.frm = acc_req_int.frm; + assign acc_req_o.trans_id = acc_req_int.trans_id; + assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i; + assign acc_req_o.acc_cons_en = acc_cons_en_i; + assign acc_req_o.inval_ready = inval_ready_i; + + always_comb begin : accelerator_req_dispatcher + // Do not fetch from the instruction queue + acc_insn_queue_pop = 1'b0; + + // Default values + acc_req = '0; + acc_req_valid = 1'b0; + + // Unpack fu_data_t into accelerator_req_t + if (!acc_insn_queue_empty) begin + acc_req = '{ + // Instruction is forwarded from the decoder as an immediate + // - + // frm rounding information is up to date during a valid request to the accelerator + // The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes + // do not take place until the accelerator answers (Ariane commits in-order) + insn : + acc_insn_queue_o.imm[ + 31 + : + 0 + ], + rs1 : acc_insn_queue_o.operand_a, + rs2 : acc_insn_queue_o.operand_b, + frm : fpnew_pkg::roundmode_e'(fcsr_frm_i), + trans_id: acc_insn_queue_o.trans_id, + default: '0 + }; + // Wait until the instruction is no longer speculative. + acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] || + (acc_commit && insn_pending_q[acc_commit_trans_id]); + acc_insn_queue_pop = acc_req_valid && acc_req_ready; + end + end + + /************************** + * Accelerator response * + **************************/ + + logic acc_ld_disp; + logic acc_st_disp; + + // Unpack the accelerator response + assign acc_trans_id_o = acc_resp_i.trans_id; + assign acc_result_o = acc_resp_i.result; + assign acc_valid_o = acc_resp_i.resp_valid; + assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error}; + assign acc_fflags_valid_o = acc_resp_i.fflags_valid; + assign acc_fflags_o = acc_resp_i.fflags; + // Always ready to receive responses + assign acc_req_o.resp_ready = 1'b1; + + // Signal dispatched load/store to issue stage + assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); + assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); + + // Cache invalidation + assign inval_valid_o = acc_resp_i.inval_valid; + assign inval_addr_o = acc_resp_i.inval_addr; + + /************************** + * Accelerator commit * + **************************/ + + // Instruction can be issued to the (in-order) back-end if + // it reached the top of the scoreboard and it hasn't been + // issued yet + always_comb begin : accelerator_commit + acc_commit = 1'b0; + if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1; + if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL) + acc_commit = 1'b1; + end + + // Dirty the V state if we are committing anything related to the vector accelerator + always_comb begin : dirty_v_state + dirty_v_state_o = 1'b0; + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL); + end + end + + assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id + : commit_instr_i[1].trans_id; + + /************************** + * Accelerator barriers * + **************************/ + + // On a store barrier (i.e. any barrier that requires preceeding stores to complete + // before continuing execution), halt execution while there are pending stores in + // the accelerator pipeline. + logic wait_acc_store_d, wait_acc_store_q; + `FF(wait_acc_store_q, wait_acc_store_d, '0) + + // Set on store barrier. Clear when no store is pending. + assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending; + assign ctrl_halt_o = wait_acc_store_q; + + /************************** + * Load/Store tracking * + **************************/ + + // Loads + logic acc_spec_loads_overflow; + logic [2:0] acc_spec_loads_pending; + logic acc_disp_loads_overflow; + logic [2:0] acc_disp_loads_pending; + + assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0); + + // Count speculative loads. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_spec_loads ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (flush_ex_i), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp), + .load_i (1'b0), + .down_i (acc_ld_disp), + .d_i ('0), + .q_o (acc_spec_loads_pending), + .overflow_o(acc_spec_loads_overflow) + ); + + // Count dispatched loads. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_disp_loads ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (1'b0), + .en_i (acc_ld_disp ^ acc_resp_i.load_complete), + .load_i (1'b0), + .down_i (acc_resp_i.load_complete), + .d_i ('0), + .q_o (acc_disp_loads_pending), + .overflow_o(acc_disp_loads_overflow) + ); + + acc_dispatcher_no_load_overflow : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending loads."); + + // Stores + logic acc_spec_stores_overflow; + logic [2:0] acc_spec_stores_pending; + logic acc_disp_stores_overflow; + logic [2:0] acc_disp_stores_pending; + + assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0); + + // Count speculative stores. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_spec_stores ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (flush_ex_i), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp), + .load_i (1'b0), + .down_i (acc_st_disp), + .d_i ('0), + .q_o (acc_spec_stores_pending), + .overflow_o(acc_spec_stores_overflow) + ); + + // Count dispatched stores. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_disp_stores ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (1'b0), + .en_i (acc_st_disp ^ acc_resp_i.store_complete), + .load_i (1'b0), + .down_i (acc_resp_i.store_complete), + .d_i ('0), + .q_o (acc_disp_stores_pending), + .overflow_o(acc_disp_stores_overflow) + ); + + acc_dispatcher_no_store_overflow : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending stores."); + + /************************** + * Tie Off Unused Signals * + **************************/ + + assign acc_stall_st_pending_o = 1'b0; + assign flush_pipeline_o = 1'b0; + assign acc_dcache_req_ports_o = '0; + +endmodule : acc_dispatcher diff --git a/test/type_param/core/acc_dispatcher_corrected.sv b/test/type_param/core/acc_dispatcher_corrected.sv new file mode 100644 index 00000000..6f9c8fc2 --- /dev/null +++ b/test/type_param/core/acc_dispatcher_corrected.sv @@ -0,0 +1,423 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: Matheus Cavalcante, ETH Zurich +// Nils Wistoff, ETH Zurich +// Date: 20.11.2020 +// Description: Functional unit that dispatches CVA6 instructions to accelerators. + +module acc_dispatcher + import ariane_pkg::*; + import riscv::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type acc_req_t = acc_pkg::accelerator_req_t, + parameter type acc_resp_t = acc_pkg::accelerator_resp_t, + parameter type acc_cfg_t = logic, + parameter acc_cfg_t AccCfg = '0 +) ( + input logic clk_i, + input logic rst_ni, + // Interface with the CSR regfile + input logic acc_cons_en_i, // Accelerator memory consistent mode + output logic acc_fflags_valid_o, + output logic [4:0] acc_fflags_o, + // Interface with the CSRs + input priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][PLEN-3:0] pmpaddr_i, + input logic [2:0] fcsr_frm_i, + output logic dirty_v_state_o, + // Interface with the issue stage + input scoreboard_entry_t issue_instr_i, + input logic issue_instr_hs_i, + output logic issue_stall_o, + input fu_data_t fu_data_i, + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, + output logic [TRANS_ID_BITS-1:0] acc_trans_id_o, + output xlen_t acc_result_o, + output logic acc_valid_o, + output exception_t acc_exception_o, + // Interface with the execute stage + output logic acc_valid_ex_o, // FU executed + // Interface with the commit stage + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + input logic commit_st_barrier_i, // A store barrier was commited + // Interface with the load/store unit + output logic acc_stall_st_pending_o, + input logic acc_no_st_pending_i, + input dcache_req_i_t [2:0] dcache_req_ports_i, + // Interface with the controller + output logic ctrl_halt_o, + input logic flush_unissued_instr_i, + input logic flush_ex_i, + output logic flush_pipeline_o, + // Interface with cache subsystem + output dcache_req_i_t [1:0] acc_dcache_req_ports_o, + input dcache_req_o_t [1:0] acc_dcache_req_ports_i, + input logic inval_ready_i, + output logic inval_valid_o, + output logic [63:0] inval_addr_o, + // Accelerator interface + output acc_pkg::accelerator_req_t acc_req_o, + input acc_pkg::accelerator_resp_t acc_resp_i +); + + `include "common_cells/registers.svh" + + import cf_math_pkg::idx_width; + + /*********************** + * Common signals * + ***********************/ + + logic acc_ready; + logic acc_valid_d, acc_valid_q; + + /************************** + * Accelerator issue * + **************************/ + + // Issue accelerator instructions + `FF(acc_valid_q, acc_valid_d, '0) + + assign acc_valid_ex_o = acc_valid_q; + assign acc_valid_d = ~issue_instr_i.ex.valid & + issue_instr_hs_i & + (issue_instr_i.fu == ACCEL) & + ~flush_unissued_instr_i; + + // Accelerator load/store pending signals + logic acc_no_ld_pending; + logic acc_no_st_pending; + + // Stall issue stage in three cases: + always_comb begin : stall_issue + unique case (issue_instr_i.fu) + ACCEL: + // 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet + issue_stall_o = ~acc_ready; + LOAD: + // 2. We're issuing a scalar load but there is an inflight accelerator store. + issue_stall_o = acc_cons_en_i & ~acc_no_st_pending; + STORE: + // 3. We're issuing a scalar store but there is an inflight accelerator load or store. + issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending); + default: issue_stall_o = 1'b0; + endcase + end + + /*********************** + * Instruction queue * + ***********************/ + + localparam InstructionQueueDepth = 3; + + fu_data_t acc_data; + fu_data_t acc_insn_queue_o; + logic acc_insn_queue_pop; + logic acc_insn_queue_empty; + logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage; + logic acc_commit; + logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id; + + assign acc_data = acc_valid_ex_o ? fu_data_i : '0; + + fifo_v3 #( + .DEPTH (InstructionQueueDepth), + .FALL_THROUGH(1'b1), + .dtype (fu_data_t) + ) i_acc_insn_queue ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_ex_i), + .testmode_i(1'b0), + .data_i (fu_data_i), + .push_i (acc_valid_q), + .full_o ( /* Unused */), + .data_o (acc_insn_queue_o), + .pop_i (acc_insn_queue_pop), + .empty_o (acc_insn_queue_empty), + .usage_o (acc_insn_queue_usage) + ); + + // We are ready if the instruction queue is able to accept at least one more entry. + assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1); + + /********************************** + * Non-speculative instructions * + **********************************/ + + // Keep track of the instructions that were received by the dispatcher. + logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q; + `FF(insn_pending_q, insn_pending_d, '0) + + // Only non-speculative instructions can be issued to the accelerators. + // The following block keeps track of which transaction IDs reached the + // top of the scoreboard, and are therefore no longer speculative. + logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q; + `FF(insn_ready_q, insn_ready_d, '0) + + always_comb begin : p_non_speculative_ff + // Maintain state + insn_pending_d = insn_pending_q; + insn_ready_d = insn_ready_q; + + // We received a new instruction + if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1; + // Flush all received instructions + if (flush_ex_i) insn_pending_d = '0; + + // An accelerator instruction is no longer speculative. + if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin + insn_ready_d[acc_commit_trans_id] = 1'b1; + insn_pending_d[acc_commit_trans_id] = 1'b0; + end + + // An accelerator instruction was issued. + if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0; + end : p_non_speculative_ff + + /************************* + * Accelerator request * + *************************/ + + acc_pkg::accelerator_req_t acc_req; + logic acc_req_valid; + logic acc_req_ready; + + acc_pkg::accelerator_req_t acc_req_int; + fall_through_register #( + .T(acc_pkg::accelerator_req_t) + ) i_accelerator_req_register ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clr_i (1'b0), + .testmode_i(1'b0), + .data_i (acc_req), + .valid_i (acc_req_valid), + .ready_o (acc_req_ready), + .data_o (acc_req_int), + .valid_o (acc_req_o.req_valid), + .ready_i (acc_resp_i.req_ready) + ); + + assign acc_req_o.insn = acc_req_int.insn; + assign acc_req_o.rs1 = acc_req_int.rs1; + assign acc_req_o.rs2 = acc_req_int.rs2; + assign acc_req_o.frm = acc_req_int.frm; + assign acc_req_o.trans_id = acc_req_int.trans_id; + assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i; + assign acc_req_o.acc_cons_en = acc_cons_en_i; + assign acc_req_o.inval_ready = inval_ready_i; + + always_comb begin : accelerator_req_dispatcher + // Do not fetch from the instruction queue + acc_insn_queue_pop = 1'b0; + + // Default values + acc_req = '0; + acc_req_valid = 1'b0; + + // Unpack fu_data_t into accelerator_req_t + if (!acc_insn_queue_empty) begin + acc_req = '{ + // Instruction is forwarded from the decoder as an immediate + // - + // frm rounding information is up to date during a valid request to the accelerator + // The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes + // do not take place until the accelerator answers (Ariane commits in-order) + insn : + acc_insn_queue_o.imm[ + 31 + : + 0 + ], + rs1 : acc_insn_queue_o.operand_a, + rs2 : acc_insn_queue_o.operand_b, + frm : fpnew_pkg::roundmode_e'(fcsr_frm_i), + trans_id: acc_insn_queue_o.trans_id, + default: '0 + }; + // Wait until the instruction is no longer speculative. + acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] || + (acc_commit && insn_pending_q[acc_commit_trans_id]); + acc_insn_queue_pop = acc_req_valid && acc_req_ready; + end + end + + /************************** + * Accelerator response * + **************************/ + + logic acc_ld_disp; + logic acc_st_disp; + + // Unpack the accelerator response + assign acc_trans_id_o = acc_resp_i.trans_id; + assign acc_result_o = acc_resp_i.result; + assign acc_valid_o = acc_resp_i.resp_valid; + assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error}; + assign acc_fflags_valid_o = acc_resp_i.fflags_valid; + assign acc_fflags_o = acc_resp_i.fflags; + // Always ready to receive responses + assign acc_req_o.resp_ready = 1'b1; + + // Signal dispatched load/store to issue stage + assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); + assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); + + // Cache invalidation + assign inval_valid_o = acc_resp_i.inval_valid; + assign inval_addr_o = acc_resp_i.inval_addr; + + /************************** + * Accelerator commit * + **************************/ + + // Instruction can be issued to the (in-order) back-end if + // it reached the top of the scoreboard and it hasn't been + // issued yet + always_comb begin : accelerator_commit + acc_commit = 1'b0; + if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1; + if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL) + acc_commit = 1'b1; + end + + // Dirty the V state if we are committing anything related to the vector accelerator + always_comb begin : dirty_v_state + dirty_v_state_o = 1'b0; + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL); + end + end + + assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id + : commit_instr_i[1].trans_id; + + /************************** + * Accelerator barriers * + **************************/ + + // On a store barrier (i.e. any barrier that requires preceeding stores to complete + // before continuing execution), halt execution while there are pending stores in + // the accelerator pipeline. + logic wait_acc_store_d, wait_acc_store_q; + `FF(wait_acc_store_q, wait_acc_store_d, '0) + + // Set on store barrier. Clear when no store is pending. + assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending; + assign ctrl_halt_o = wait_acc_store_q; + + /************************** + * Load/Store tracking * + **************************/ + + // Loads + logic acc_spec_loads_overflow; + logic [2:0] acc_spec_loads_pending; + logic acc_disp_loads_overflow; + logic [2:0] acc_disp_loads_pending; + + assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0); + + // Count speculative loads. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_spec_loads ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (flush_ex_i), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp), + .load_i (1'b0), + .down_i (acc_ld_disp), + .d_i ('0), + .q_o (acc_spec_loads_pending), + .overflow_o(acc_spec_loads_overflow) + ); + + // Count dispatched loads. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_disp_loads ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (1'b0), + .en_i (acc_ld_disp ^ acc_resp_i.load_complete), + .load_i (1'b0), + .down_i (acc_resp_i.load_complete), + .d_i ('0), + .q_o (acc_disp_loads_pending), + .overflow_o(acc_disp_loads_overflow) + ); + + acc_dispatcher_no_load_overflow : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending loads."); + + // Stores + logic acc_spec_stores_overflow; + logic [2:0] acc_spec_stores_pending; + logic acc_disp_stores_overflow; + logic [2:0] acc_disp_stores_pending; + + assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0); + + // Count speculative stores. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_spec_stores ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (flush_ex_i), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp), + .load_i (1'b0), + .down_i (acc_st_disp), + .d_i ('0), + .q_o (acc_spec_stores_pending), + .overflow_o(acc_spec_stores_overflow) + ); + + // Count dispatched stores. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW(0) + ) i_acc_disp_stores ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (1'b0), + .en_i (acc_st_disp ^ acc_resp_i.store_complete), + .load_i (1'b0), + .down_i (acc_resp_i.store_complete), + .d_i ('0), + .q_o (acc_disp_stores_pending), + .overflow_o(acc_disp_stores_overflow) + ); + + acc_dispatcher_no_store_overflow : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending stores."); + + /************************** + * Tie Off Unused Signals * + **************************/ + + assign acc_stall_st_pending_o = 1'b0; + assign flush_pipeline_o = 1'b0; + assign acc_dcache_req_ports_o = '0; + +endmodule : acc_dispatcher diff --git a/test/type_param/core/alu.sv b/test/type_param/core/alu.sv new file mode 100644 index 00000000..a928725e --- /dev/null +++ b/test/type_param/core/alu.sv @@ -0,0 +1,359 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Matthias Baer +// Author: Igor Loi +// Author: Andreas Traber +// Author: Lukas Mueller +// Author: Florian Zaruba +// +// Date: 19.03.2017 +// Description: Ariane ALU based on RI5CY's ALU + + +module alu + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input fu_data_t fu_data_i, + output riscv::xlen_t result_o, + output logic alu_branch_res_o +); + + riscv::xlen_t operand_a_rev; + logic [ 31:0] operand_a_rev32; + logic [ riscv::XLEN:0] operand_b_neg; + logic [riscv::XLEN+1:0] adder_result_ext_o; + logic less; // handles both signed and unsigned forms + logic [ 31:0] rolw; // Rotate Left Word + logic [ 31:0] rorw; // Rotate Right Word + logic [31:0] orcbw, rev8w; + logic [ $clog2(riscv::XLEN) : 0] cpop; // Count Population + logic [$clog2(riscv::XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros + logic [ 4:0] lz_tz_wcount; // Count Leading Zeros Word + logic lz_tz_empty, lz_tz_wempty; + riscv::xlen_t orcbw_result, rev8w_result; + + // bit reverse operand_a for left shifts and bit counting + generate + genvar k; + for (k = 0; k < riscv::XLEN; k++) + assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k]; + + for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k]; + endgenerate + + // ------ + // Adder + // ------ + logic adder_op_b_negate; + logic adder_z_flag; + logic [riscv::XLEN:0] adder_in_a, adder_in_b; + riscv::xlen_t adder_result; + logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx; + + always_comb begin + adder_op_b_negate = 1'b0; + + unique case (fu_data_i.operation) + // ADDER OPS + EQ, NE, SUB, SUBW, ANDN, ORN, XNOR: adder_op_b_negate = 1'b1; + default: ; + endcase + end + + always_comb begin + operand_a_bitmanip = fu_data_i.operand_a; + + if (CVA6Cfg.RVB) begin + if (riscv::IS_XLEN64) begin + unique case (fu_data_i.operation) + SH1ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1; + SH2ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2; + SH3ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3; + CTZW: operand_a_bitmanip = operand_a_rev32; + ADDUW, CPOPW, CLZW: operand_a_bitmanip = fu_data_i.operand_a[31:0]; + default: ; + endcase + end + unique case (fu_data_i.operation) + SH1ADD: operand_a_bitmanip = fu_data_i.operand_a << 1; + SH2ADD: operand_a_bitmanip = fu_data_i.operand_a << 2; + SH3ADD: operand_a_bitmanip = fu_data_i.operand_a << 3; + CTZ: operand_a_bitmanip = operand_a_rev; + default: ; + endcase + end + end + + // prepare operand a + assign adder_in_a = {operand_a_bitmanip, 1'b1}; + + // prepare operand b + assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN + 1{adder_op_b_negate}}; + assign adder_in_b = operand_b_neg; + + // actual adder + assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b); + assign adder_result = adder_result_ext_o[riscv::XLEN:1]; + assign adder_z_flag = ~|adder_result; + + // get the right branch comparison result + always_comb begin : branch_resolve + // set comparison by default + alu_branch_res_o = 1'b1; + case (fu_data_i.operation) + EQ: alu_branch_res_o = adder_z_flag; + NE: alu_branch_res_o = ~adder_z_flag; + LTS, LTU: alu_branch_res_o = less; + GES, GEU: alu_branch_res_o = ~less; + default: alu_branch_res_o = 1'b1; + endcase + end + + // --------- + // Shifts + // --------- + + // TODO: this can probably optimized significantly + logic shift_left; // should we shift left + logic shift_arithmetic; + + riscv::xlen_t shift_amt; // amount of shift, to the right + riscv::xlen_t shift_op_a; // input of the shifter + logic [ 31:0] shift_op_a32; // input to the 32 bit shift operation + + riscv::xlen_t shift_result; + logic [ 31:0] shift_result32; + + logic [riscv::XLEN:0] shift_right_result; + logic [ 32:0] shift_right_result32; + + riscv::xlen_t shift_left_result; + logic [ 31:0] shift_left_result32; + + assign shift_amt = fu_data_i.operand_b; + + assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW); + + assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW); + + // right shifts, we let the synthesizer optimize this + logic [riscv::XLEN:0] shift_op_a_64; + logic [32:0] shift_op_a_32; + + // choose the bit reversed or the normal input for shift operand a + assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a; + assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0]; + + assign shift_op_a_64 = {shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a}; + assign shift_op_a_32 = {shift_arithmetic & shift_op_a[31], shift_op_a32}; + + assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]); + + assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]); + // bit reverse the shift_right_result for left shifts + genvar j; + generate + for (j = 0; j < riscv::XLEN; j++) + assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j]; + + for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j]; + + endgenerate + + assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0]; + assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0]; + + // ------------ + // Comparisons + // ------------ + + always_comb begin + logic sgn; + sgn = 1'b0; + + if ((fu_data_i.operation == SLTS) || + (fu_data_i.operation == LTS) || + (fu_data_i.operation == GES) || + (fu_data_i.operation == MAX) || + (fu_data_i.operation == MIN)) + sgn = 1'b1; + + less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) < + $signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b})); + end + + if (CVA6Cfg.RVB) begin : gen_bitmanip + // Count Population + Count population Word + + popcount #( + .INPUT_WIDTH(riscv::XLEN) + ) i_cpop_count ( + .data_i (operand_a_bitmanip), + .popcount_o(cpop) + ); + + // Count Leading/Trailing Zeros + // 64b + lzc #( + .WIDTH(riscv::XLEN), + .MODE (1) + ) i_clz_64b ( + .in_i(operand_a_bitmanip), + .cnt_o(lz_tz_count), + .empty_o(lz_tz_empty) + ); + if (riscv::IS_XLEN64) begin + //32b + lzc #( + .WIDTH(32), + .MODE (1) + ) i_clz_32b ( + .in_i(operand_a_bitmanip[31:0]), + .cnt_o(lz_tz_wcount), + .empty_o(lz_tz_wempty) + ); + end + end + + if (CVA6Cfg.RVB) begin : gen_orcbw_rev8w_results + assign orcbw = { + {8{|fu_data_i.operand_a[31:24]}}, + {8{|fu_data_i.operand_a[23:16]}}, + {8{|fu_data_i.operand_a[15:8]}}, + {8{|fu_data_i.operand_a[7:0]}} + }; + assign rev8w = { + {fu_data_i.operand_a[7:0]}, + {fu_data_i.operand_a[15:8]}, + {fu_data_i.operand_a[23:16]}, + {fu_data_i.operand_a[31:24]} + }; + if (riscv::IS_XLEN64) begin : gen_64b + assign orcbw_result = { + {8{|fu_data_i.operand_a[63:56]}}, + {8{|fu_data_i.operand_a[55:48]}}, + {8{|fu_data_i.operand_a[47:40]}}, + {8{|fu_data_i.operand_a[39:32]}}, + orcbw + }; + assign rev8w_result = { + rev8w, + {fu_data_i.operand_a[39:32]}, + {fu_data_i.operand_a[47:40]}, + {fu_data_i.operand_a[55:48]}, + {fu_data_i.operand_a[63:56]} + }; + end else begin : gen_32b + assign orcbw_result = orcbw; + assign rev8w_result = rev8w; + end + end + + // ----------- + // Result MUX + // ----------- + always_comb begin + result_o = '0; + if (riscv::IS_XLEN64) begin + unique case (fu_data_i.operation) + // Add word: Ignore the upper bits and sign extend to 64 bit + ADDW, SUBW: result_o = {{riscv::XLEN - 32{adder_result[31]}}, adder_result[31:0]}; + SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result; + // Shifts 32 bit + SLLW, SRLW, SRAW: result_o = {{riscv::XLEN - 32{shift_result32[31]}}, shift_result32[31:0]}; + default: ; + endcase + end + unique case (fu_data_i.operation) + // Standard Operations + ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1]; + ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1]; + XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1]; + // Adder Operations + ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD: result_o = adder_result; + // Shift Operations + SLL, SRL, SRA: result_o = (riscv::IS_XLEN64) ? shift_result : shift_result32; + // Comparison Operations + SLTS, SLTU: result_o = {{riscv::XLEN - 1{1'b0}}, less}; + default: ; // default case to suppress unique warning + endcase + + if (CVA6Cfg.RVB) begin + // Index for Bitwise Rotation + bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN - 1)); + // rolw, roriw, rorw + rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0])); + rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0])); + if (riscv::IS_XLEN64) begin + unique case (fu_data_i.operation) + CLZW, CTZW: + result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN - 5{1'b0}}, lz_tz_wcount}; // change + ROLW: result_o = {{riscv::XLEN - 32{rolw[31]}}, rolw}; + RORW, RORIW: result_o = {{riscv::XLEN - 32{rorw[31]}}, rorw}; + default: ; + endcase + end + unique case (fu_data_i.operation) + // Integer minimum/maximum + MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a; + MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a; + MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a; + MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a; + + // Single bit instructions operations + BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx; + BEXT, BEXTI: result_o = {{riscv::XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)}; + BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx; + BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx; + + // Count Leading/Trailing Zeros + CLZ, CTZ: + result_o = (lz_tz_empty) ? ({{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count} + 1) : + {{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count}; + + // Count population + CPOP, CPOPW: result_o = {{(riscv::XLEN - ($clog2(riscv::XLEN) + 1)) {1'b0}}, cpop}; + + // Sign and Zero Extend + SEXTB: result_o = {{riscv::XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]}; + SEXTH: result_o = {{riscv::XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]}; + ZEXTH: result_o = {{riscv::XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]}; + + // Bitwise Rotation + ROL: + result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0]))); + + ROR, RORI: + result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0]))); + + ORCB: result_o = orcbw_result; + REV8: result_o = rev8w_result; + + default: + if (fu_data_i.operation == SLLIUW && riscv::IS_XLEN64) + result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0]; // Left Shift 32 bit unsigned + endcase + end + if (CVA6Cfg.ZiCondExtEn) begin + unique case (fu_data_i.operation) + CZERO_EQZ: + result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1 + CZERO_NEZ: + result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1 + default: ; // default case to suppress unique warning + endcase + end + end +endmodule diff --git a/test/type_param/core/amo_buffer.sv b/test/type_param/core/amo_buffer.sv new file mode 100644 index 00000000..24a98ddb --- /dev/null +++ b/test/type_param/core/amo_buffer.sv @@ -0,0 +1,82 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 20.09.2018 +// Description: Buffers AMO requests +// This unit buffers an atomic memory operations for the cache subsyste. +// Furthermore it handles interfacing with the commit stage + +module amo_buffer #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // pipeline flush + + input logic valid_i, // AMO is valid + output logic ready_o, // AMO unit is ready + input ariane_pkg::amo_t amo_op_i, // AMO Operation + input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue + input riscv::xlen_t data_i, // data which is placed in the queue + input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) + // D$ + output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem + input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem + // Auxiliary signals + input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage + input logic no_st_pending_i // there is currently no store pending anymore +); + logic flush_amo_buffer; + logic amo_valid; + + typedef struct packed { + ariane_pkg::amo_t op; + logic [riscv::PLEN-1:0] paddr; + riscv::xlen_t data; + logic [1:0] size; + } amo_op_t; + + amo_op_t amo_data_in, amo_data_out; + + // validate this request as soon as all stores have drained and the AMO is in the commit stage + assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid; + assign amo_req_o.amo_op = amo_data_out.op; + assign amo_req_o.size = amo_data_out.size; + assign amo_req_o.operand_a = {{64 - riscv::PLEN{1'b0}}, amo_data_out.paddr}; + assign amo_req_o.operand_b = {{64 - riscv::XLEN{1'b0}}, amo_data_out.data}; + + assign amo_data_in.op = amo_op_i; + assign amo_data_in.data = data_i; + assign amo_data_in.paddr = paddr_i; + assign amo_data_in.size = data_size_i; + + // only flush if we are currently not committing the AMO + // e.g.: it is not speculative anymore + assign flush_amo_buffer = flush_i & !amo_valid_commit_i; + + fifo_v3 #( + .DEPTH(1), + .dtype(amo_op_t) + ) i_amo_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_amo_buffer), + .testmode_i(1'b0), + .full_o (amo_valid), + .empty_o (ready_o), + .usage_o (), // left open + .data_i (amo_data_in), + .push_i (valid_i), + .data_o (amo_data_out), + .pop_i (amo_resp_i.ack) + ); + +endmodule diff --git a/test/type_param/core/ariane_regfile_ff.sv b/test/type_param/core/ariane_regfile_ff.sv new file mode 100644 index 00000000..ae5cbeb0 --- /dev/null +++ b/test/type_param/core/ariane_regfile_ff.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Engineer: Francesco Conti - f.conti@unibo.it +// +// Additional contributions by: +// Markus Wegmann - markus.wegmann@technokrat.ch +// +// Design Name: RISC-V register file +// Project Name: zero-riscy +// Language: SystemVerilog +// +// Description: Register file with 31 or 15x 32 bit wide registers. +// Register 0 is fixed to 0. This register file is based on +// flip flops. +// + +module ariane_regfile #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned NR_READ_PORTS = 2, + parameter bit ZERO_REG_ZERO = 0 +) ( + // clock and reset + input logic clk_i, + input logic rst_ni, + // disable clock gates for testing + input logic test_en_i, + // read port + input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i, + output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, + // write port + input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_i +); + + localparam ADDR_WIDTH = 5; + localparam NUM_WORDS = 2 ** ADDR_WIDTH; + + logic [ NUM_WORDS-1:0][DATA_WIDTH-1:0] mem; + logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec; + + + always_comb begin : we_decoder + for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (waddr_i[j] == i) we_dec[j][i] = we_i[j]; + else we_dec[j][i] = 1'b0; + end + end + end + + // loop from 1 to NUM_WORDS-1 as R0 is nil + always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral + if (~rst_ni) begin + mem <= '{default: '0}; + end else begin + for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (we_dec[j][i]) begin + mem[i] <= wdata_i[j]; + end + end + if (ZERO_REG_ZERO) begin + mem[0] <= '0; + end + end + end + end + + for (genvar i = 0; i < NR_READ_PORTS; i++) begin + assign rdata_o[i] = mem[raddr_i[i]]; + end + +endmodule diff --git a/test/type_param/core/ariane_regfile_fpga.sv b/test/type_param/core/ariane_regfile_fpga.sv new file mode 100644 index 00000000..22d5aaa3 --- /dev/null +++ b/test/type_param/core/ariane_regfile_fpga.sv @@ -0,0 +1,125 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Engineer: Francesco Conti - f.conti@unibo.it +// +// Additional contributions by: +// Markus Wegmann - markus.wegmann@technokrat.ch +// Noam Gallmann - gnoam@live.com +// Felipe Lisboa Malaquias +// Henry Suzukawa +// +// +// Description: This register file is optimized for implementation on +// FPGAs. The register file features one distributed RAM block per implemented +// sync-write port, each with a parametrized number of async-read ports. +// Read-accesses are multiplexed from the relevant block depending on which block +// was last written to. For that purpose an additional array of registers is +// maintained keeping track of write acesses. +// + +module ariane_regfile_fpga #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned NR_READ_PORTS = 2, + parameter bit ZERO_REG_ZERO = 0 +) ( + // clock and reset + input logic clk_i, + input logic rst_ni, + // disable clock gates for testing + input logic test_en_i, + // read port + input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i, + output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, + // write port + input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_i +); + + localparam ADDR_WIDTH = 5; + localparam NUM_WORDS = 2 ** ADDR_WIDTH; + localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts); + + // Distributed RAM usually supports one write port per block - duplicate for each write port. + logic [ NUM_WORDS-1:0][ DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts]; + + logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec; + logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel; + logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q; + + // write adress decoder (for block selector) + always_comb begin + for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (waddr_i[j] == i) begin + we_dec[j][i] = we_i[j]; + end else begin + we_dec[j][i] = 1'b0; + end + end + end + end + + // update block selector: + // signal mem_block_sel records where the current valid value is stored. + // if multiple ports try to write to the same address simultaneously, the port with the highest + // index has priority. + always_comb begin + mem_block_sel = mem_block_sel_q; + for (int i = 0; i < NUM_WORDS; i++) begin + for (int j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + if (we_dec[j][i] == 1'b1) begin + mem_block_sel[i] = LOG_NR_WRITE_PORTS'(j); + end + end + end + end + + // block selector flops + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + mem_block_sel_q <= '0; + end else begin + mem_block_sel_q <= mem_block_sel; + end + end + + // distributed RAM blocks + logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts]; + for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block + always_ff @(posedge clk_i) begin + if (we_i[j] && ~waddr_i[j] != 0) begin + mem[j][waddr_i[j]] <= wdata_i[j]; + end + end + for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read + assign mem_read[j][k] = mem[j][raddr_i[k]]; + end + end + + // output MUX + logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr; + for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port + assign block_addr[k] = mem_block_sel_q[raddr_i[k]]; + assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k]; + end + + // random initialization of the memory to suppress assert warnings on Questa. + initial begin + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + for (int j = 0; j < NUM_WORDS; j++) begin + mem[i][j] = $random(); + end + end + end + +endmodule diff --git a/test/type_param/core/axi_shim.sv b/test/type_param/core/axi_shim.sv new file mode 100644 index 00000000..8e1cfa88 --- /dev/null +++ b/test/type_param/core/axi_shim.sv @@ -0,0 +1,310 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: axi_shim.sv + * Author: Michael Schaffner + * Florian Zaruba + * Date: 1.8.2018 + * + * Description: Manages communication with the AXI Bus. Note that this unit does not + * buffer requests and register the signals. + * + */ + + +module axi_shim #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2 + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // read channel + // request + input logic rd_req_i, + output logic rd_gnt_o, + input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i, + input logic [$clog2(AxiNumWords)-1:0] rd_blen_i, // axi convention: LEN-1 + input logic [2:0] rd_size_i, + input logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx + input logic rd_lock_i, + // read response (we have to unconditionally sink the response) + input logic rd_rdy_i, + output logic rd_last_o, + output logic rd_valid_o, + output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o, + output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o, + output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o, + output logic rd_exokay_o, // indicates whether exclusive tx succeeded + // write channel + input logic wr_req_i, + output logic wr_gnt_o, + input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i, + input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i, + input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i, + input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i, + input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1 + input logic [2:0] wr_size_i, + input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i, + input logic wr_lock_i, + input logic [5:0] wr_atop_i, + // write response + input logic wr_rdy_i, + output logic wr_valid_o, + output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o, + output logic wr_exokay_o, // indicates whether exclusive tx succeeded + // AXI port + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i +); + localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1; + + /////////////////////////////////////////////////////// + // write channel + /////////////////////////////////////////////////////// + + enum logic [3:0] { + IDLE, + WAIT_AW_READY, + WAIT_LAST_W_READY, + WAIT_LAST_W_READY_AW_READY, + WAIT_AW_READY_BURST + } + wr_state_q, wr_state_d; + + // AXI tx counter + logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q; + logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en; + + assign wr_single_req = (wr_blen_i == 0); + + // address + assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + assign axi_req_o.aw.addr = wr_addr_i[CVA6Cfg.AxiAddrWidth-1:0]; + assign axi_req_o.aw.size = wr_size_i; + assign axi_req_o.aw.len = wr_blen_i; + assign axi_req_o.aw.id = wr_id_i; + assign axi_req_o.aw.prot = 3'b0; + assign axi_req_o.aw.region = 4'b0; + assign axi_req_o.aw.lock = wr_lock_i; + assign axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE; + assign axi_req_o.aw.qos = 4'b0; + assign axi_req_o.aw.atop = wr_atop_i; + assign axi_req_o.aw.user = '0; + + // data + assign axi_req_o.w.data = wr_data_i[wr_cnt_q]; + assign axi_req_o.w.user = wr_user_i[wr_cnt_q]; + assign axi_req_o.w.strb = wr_be_i[wr_cnt_q]; + assign axi_req_o.w.last = wr_cnt_done; + + // write response + assign wr_exokay_o = (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY); + assign axi_req_o.b_ready = wr_rdy_i; + assign wr_valid_o = axi_resp_i.b_valid; + assign wr_id_o = axi_resp_i.b.id; + + // tx counter + assign wr_cnt_done = (wr_cnt_q == wr_blen_i); + assign wr_cnt_d = (wr_cnt_clr) ? '0 : (wr_cnt_en && CVA6Cfg.AxiBurstWriteEn) ? wr_cnt_q + 1 : wr_cnt_q; + + always_comb begin : p_axi_write_fsm + // default + wr_state_d = wr_state_q; + + axi_req_o.aw_valid = 1'b0; + axi_req_o.w_valid = 1'b0; + wr_gnt_o = 1'b0; + + wr_cnt_en = 1'b0; + wr_cnt_clr = 1'b0; + + case (wr_state_q) + /////////////////////////////////// + IDLE: begin + // we have an incoming request + if (wr_req_i) begin + // is this a read or write? + axi_req_o.aw_valid = 1'b1; + axi_req_o.w_valid = 1'b1; + + if (CVA6Cfg.AxiBurstWriteEn && !wr_single_req) begin + wr_cnt_en = axi_resp_i.w_ready; + + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + 2'b11: wr_state_d = WAIT_LAST_W_READY; + 2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY; + 2'b10: wr_state_d = WAIT_LAST_W_READY; + default: ; + endcase + end else if (wr_single_req) begin // its a single write + wr_cnt_clr = 1'b1; + // single req can be granted here + wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready; + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + 2'b01: wr_state_d = WAIT_AW_READY; + 2'b10: wr_state_d = WAIT_LAST_W_READY; + default: wr_state_d = IDLE; + endcase + // its a request for the whole cache line + end + end + end + /////////////////////////////////// + // ~> from single write + WAIT_AW_READY: begin + axi_req_o.aw_valid = 1'b1; + + if (axi_resp_i.aw_ready) begin + wr_state_d = IDLE; + wr_gnt_o = 1'b1; + end + end + /////////////////////////////////// + // ~> from write, there is an outstanding write + WAIT_LAST_W_READY: begin + axi_req_o.w_valid = 1'b1; + + if (CVA6Cfg.AxiBurstWriteEn && axi_resp_i.w_ready && !wr_cnt_done) begin + wr_cnt_en = 1'b1; + end else if (wr_cnt_done) begin // this is the last write + if (axi_resp_i.w_ready) begin + wr_state_d = IDLE; + wr_cnt_clr = 1'b1; + wr_gnt_o = 1'b1; + end + end + end + /////////////////////////////////// + default: begin + /////////////////////////////////// + // ~> we need to wait for an aw_ready and there is at least one outstanding write + if (CVA6Cfg.AxiBurstWriteEn) begin + if (wr_state_q == WAIT_LAST_W_READY_AW_READY) begin + axi_req_o.w_valid = 1'b1; + axi_req_o.aw_valid = 1'b1; + // we got an aw_ready + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + // we got an aw ready + 2'b01: begin + // are there any outstanding transactions? + if (wr_cnt_done) begin + wr_state_d = WAIT_AW_READY_BURST; + wr_cnt_clr = 1'b1; + end else begin + // yes, so reduce the count and stay here + wr_cnt_en = 1'b1; + end + end + 2'b10: wr_state_d = WAIT_LAST_W_READY; + 2'b11: begin + // we are finished + if (wr_cnt_done) begin + wr_state_d = IDLE; + wr_gnt_o = 1'b1; + wr_cnt_clr = 1'b1; + // there are outstanding transactions + end else begin + wr_state_d = WAIT_LAST_W_READY; + wr_cnt_en = 1'b1; + end + end + default: ; + endcase + end /////////////////////////////////// + // ~> all data has already been sent, we are only waiting for the aw_ready + else if (wr_state_q == WAIT_AW_READY_BURST) begin + axi_req_o.aw_valid = 1'b1; + + if (axi_resp_i.aw_ready) begin + wr_state_d = IDLE; + wr_gnt_o = 1'b1; + end + end + end else begin + wr_state_d = IDLE; + end + end + endcase + end + + + /////////////////////////////////////////////////////// + // read channel + /////////////////////////////////////////////////////// + + // address + // in case of a wrapping transfer we can simply begin at the address, if we want to request a cache-line + // with an incremental transfer we need to output the corresponding base address of the cache line + assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + assign axi_req_o.ar.addr = rd_addr_i[CVA6Cfg.AxiAddrWidth-1:0]; + assign axi_req_o.ar.size = rd_size_i; + assign axi_req_o.ar.len = rd_blen_i; + assign axi_req_o.ar.id = rd_id_i; + assign axi_req_o.ar.prot = 3'b0; + assign axi_req_o.ar.region = 4'b0; + assign axi_req_o.ar.lock = rd_lock_i; + assign axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE; + assign axi_req_o.ar.qos = 4'b0; + assign axi_req_o.ar.user = '0; + + // make the read request + assign axi_req_o.ar_valid = rd_req_i; + assign rd_gnt_o = rd_req_i & axi_resp_i.ar_ready; + + // return path + assign axi_req_o.r_ready = rd_rdy_i; + assign rd_data_o = axi_resp_i.r.data; + if (ariane_pkg::AXI_USER_EN) begin + assign rd_user_o = axi_resp_i.r.user; + end else begin + assign rd_user_o = '0; + end + assign rd_last_o = axi_resp_i.r.last; + assign rd_valid_o = axi_resp_i.r_valid; + assign rd_id_o = axi_resp_i.r.id; + assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY); + + + // ---------------- + // Registers + // ---------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + // start in flushing state and initialize the memory + wr_state_q <= IDLE; + wr_cnt_q <= '0; + end else begin + wr_state_q <= wr_state_d; + wr_cnt_q <= wr_cnt_d; + end + end + + // ---------------- + // Assertions + // ---------------- + + //pragma translate_off + initial begin + assert (AxiNumWords >= 1) + else $fatal(1, "[axi adapter] AxiNumWords must be >= 1"); + assert (CVA6Cfg.AxiIdWidth >= 2) + else $fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide"); + end + //pragma translate_on + +endmodule // axi_adapter2 diff --git a/test/type_param/core/branch_unit.sv b/test/type_param/core/branch_unit.sv new file mode 100644 index 00000000..dfcb1c76 --- /dev/null +++ b/test/type_param/core/branch_unit.sv @@ -0,0 +1,106 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 09.05.2017 +// Description: Branch target calculation and comparison + +module branch_unit #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic debug_mode_i, + input ariane_pkg::fu_data_t fu_data_i, + input logic [riscv::VLEN-1:0] pc_i, // PC of instruction + input logic is_compressed_instr_i, + input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict + input logic branch_valid_i, + input logic branch_comp_res_i, // branch comparison result from ALU + output logic [riscv::VLEN-1:0] branch_result_o, + + input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted + output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting + output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can + // accept new entries to the scoreboard + output ariane_pkg::exception_t branch_exception_o // branch exception out +); + logic [riscv::VLEN-1:0] target_address; + logic [riscv::VLEN-1:0] next_pc; + + // here we handle the various possibilities of mis-predicts + always_comb begin : mispredict_handler + // set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC + automatic logic [riscv::VLEN-1:0] jump_base; + // TODO(zarubaf): The ALU can be used to calculate the branch target + jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i; + + target_address = {riscv::VLEN{1'b0}}; + resolve_branch_o = 1'b0; + resolved_branch_o.target_address = {riscv::VLEN{1'b0}}; + resolved_branch_o.is_taken = 1'b0; + resolved_branch_o.valid = branch_valid_i; + resolved_branch_o.is_mispredict = 1'b0; + resolved_branch_o.cf_type = branch_predict_i.cf; + // calculate next PC, depending on whether the instruction is compressed or not this may be different + // TODO(zarubaf): We already calculate this a couple of times, maybe re-use? + next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4}); + // calculate target address simple 64 bit addition + target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0])); + // on a JALR we are supposed to reset the LSB to 0 (according to the specification) + if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0; + // we need to put the branch target address into rd, this is the result of this unit + branch_result_o = next_pc; + resolved_branch_o.pc = pc_i; + // There are only two sources of mispredicts: + // 1. Branches + // 2. Jumps to register addresses + if (branch_valid_i) begin + // write target address which goes to PC Gen + resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc; + resolved_branch_o.is_taken = branch_comp_res_i; + // check the outcome of the branch speculation + if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin + // Set the `cf_type` of the output as `branch`, this will update the BHT. + resolved_branch_o.cf_type = ariane_pkg::Branch; + // If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted. + resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch); + end + if (fu_data_i.operation == ariane_pkg::JALR + // check if the address of the jump register is correct and that we actually predicted + && (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin + resolved_branch_o.is_mispredict = 1'b1; + // update BTB only if this wasn't a return + if (branch_predict_i.cf != ariane_pkg::Return) + resolved_branch_o.cf_type = ariane_pkg::JumpR; + end + // to resolve the branch in ID + resolve_branch_o = 1'b1; + end + end + // use ALU exception signal for storing instruction fetch exceptions if + // the target address is not aligned to a 2 byte boundary + // + logic jump_taken; + always_comb begin : exception_handling + + // Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump + jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) || + ((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i); + branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED; + branch_exception_o.valid = 1'b0; + branch_exception_o.tval = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + // Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or + // an unconditional jump + if (branch_valid_i && (target_address[0] || (!CVA6Cfg.RVC && target_address[1])) && jump_taken) begin + branch_exception_o.valid = 1'b1; + end + end +endmodule diff --git a/test/type_param/core/cache_subsystem/axi_adapter.sv b/test/type_param/core/cache_subsystem/axi_adapter.sv new file mode 100644 index 00000000..0b8f9eb1 --- /dev/null +++ b/test/type_param/core/cache_subsystem/axi_adapter.sv @@ -0,0 +1,520 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: axi_adapter.sv + * Author: Florian Zaruba + * Date: 1.8.2018 + * + * Description: Manages communication with the AXI Bus + */ +//import std_cache_pkg::*; + +module axi_adapter #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned DATA_WIDTH = 256, + parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature + parameter int unsigned CACHELINE_BYTE_OFFSET = 8, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input logic req_i, + input ariane_pkg::ad_req_t type_i, + input ariane_pkg::amo_t amo_i, + output logic gnt_o, + input logic [riscv::XLEN-1:0] addr_i, + input logic we_i, + input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i, + input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i, + input logic [1:0] size_i, + input logic [CVA6Cfg.AxiIdWidth-1:0] id_i, + // read port + output logic valid_o, + output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o, + output logic [CVA6Cfg.AxiIdWidth-1:0] id_o, + // critical word - read port + output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o, + output logic critical_word_valid_o, + // AXI port + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i +); + localparam BURST_SIZE = (DATA_WIDTH / CVA6Cfg.AxiDataWidth) - 1; + localparam ADDR_INDEX = ($clog2( + DATA_WIDTH / CVA6Cfg.AxiDataWidth + ) > 0) ? $clog2( + DATA_WIDTH / CVA6Cfg.AxiDataWidth + ) : 1; + localparam MAX_OUTSTANDING_AW = CVA6Cfg.MaxOutstandingStores; + localparam MAX_OUTSTANDING_AW_CNT_WIDTH = $clog2( + MAX_OUTSTANDING_AW + 1 + ) > 0 ? $clog2( + MAX_OUTSTANDING_AW + 1 + ) : 1; + + typedef logic [MAX_OUTSTANDING_AW_CNT_WIDTH-1:0] outstanding_aw_cnt_t; + + enum logic [3:0] { + IDLE, + WAIT_B_VALID, + WAIT_AW_READY, + WAIT_LAST_W_READY, + WAIT_LAST_W_READY_AW_READY, + WAIT_AW_READY_BURST, + WAIT_R_VALID, + WAIT_R_VALID_MULTIPLE, + COMPLETE_READ, + WAIT_AMO_R_VALID + } + state_q, state_d; + + // counter for AXI transfers + logic [ADDR_INDEX-1:0] cnt_d, cnt_q; + logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] + cache_line_d, cache_line_q; + // save the address for a read, as we allow for non-cacheline aligned accesses + logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0] addr_offset_d, addr_offset_q; + logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q; + logic [ADDR_INDEX-1:0] index; + // save the atomic operation and size + ariane_pkg::amo_t amo_d, amo_q; + logic [1:0] size_d, size_q; + // outstanding write transactions counter + outstanding_aw_cnt_t outstanding_aw_cnt_q, outstanding_aw_cnt_d; + logic any_outstanding_aw; + + assign any_outstanding_aw = outstanding_aw_cnt_q != '0; + + always_comb begin : axi_fsm + // Default assignments + axi_req_o.aw_valid = 1'b0; + // Cast to AXI address width + axi_req_o.aw.addr = addr_i; + axi_req_o.aw.prot = 3'b0; + axi_req_o.aw.region = 4'b0; + axi_req_o.aw.len = 8'b0; + axi_req_o.aw.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes + axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction + axi_req_o.aw.lock = 1'b0; + axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE; + axi_req_o.aw.qos = 4'b0; + axi_req_o.aw.id = id_i; + axi_req_o.aw.atop = atop_from_amo(amo_i); + axi_req_o.aw.user = '0; + + axi_req_o.ar_valid = 1'b0; + // Cast to AXI address width + axi_req_o.ar.addr = addr_i; + // in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line + // with an incremental transfer we need to output the corresponding base address of the cache line + if (!CRITICAL_WORD_FIRST && type_i != ariane_pkg::SINGLE_REQ) begin + axi_req_o.ar.addr[CACHELINE_BYTE_OFFSET-1:0] = '0; + end + axi_req_o.ar.prot = 3'b0; + axi_req_o.ar.region = 4'b0; + axi_req_o.ar.len = 8'b0; + axi_req_o.ar.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes + axi_req_o.ar.burst = (CRITICAL_WORD_FIRST ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR); // wrapping transfer in case of a critical word first strategy + axi_req_o.ar.lock = 1'b0; + axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE; + axi_req_o.ar.qos = 4'b0; + axi_req_o.ar.id = id_i; + axi_req_o.ar.user = '0; + + axi_req_o.w_valid = 1'b0; + axi_req_o.w.data = wdata_i[0]; + axi_req_o.w.strb = be_i[0]; + axi_req_o.w.last = 1'b0; + axi_req_o.w.user = '0; + + axi_req_o.b_ready = 1'b0; + axi_req_o.r_ready = 1'b0; + + gnt_o = 1'b0; + valid_o = 1'b0; + id_o = axi_resp_i.r.id; + + critical_word_o = axi_resp_i.r.data; + critical_word_valid_o = 1'b0; + rdata_o = cache_line_q; + + state_d = state_q; + cnt_d = cnt_q; + cache_line_d = cache_line_q; + addr_offset_d = addr_offset_q; + id_d = id_q; + amo_d = amo_q; + size_d = size_q; + index = '0; + + outstanding_aw_cnt_d = outstanding_aw_cnt_q; + + case (state_q) + + IDLE: begin + cnt_d = '0; + // we have an incoming request + if (req_i) begin + // is this a read or write? + // write + if (we_i) begin + // multiple outstanding write transactions are only + // allowed if they are guaranteed not to be reordered + // i.e. same ID + if (!any_outstanding_aw || ((id_i == id_q) && (amo_i == ariane_pkg::AMO_NONE))) begin + // the data is valid + axi_req_o.aw_valid = 1'b1; + axi_req_o.w_valid = 1'b1; + // store-conditional requires exclusive access + axi_req_o.aw.lock = amo_i == ariane_pkg::AMO_SC; + // its a single write + if (type_i == ariane_pkg::SINGLE_REQ) begin + // only a single write so the data is already the last one + axi_req_o.w.last = 1'b1; + // single req can be granted here + gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready; + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + 2'b11: state_d = WAIT_B_VALID; + 2'b01: state_d = WAIT_AW_READY; + 2'b10: state_d = WAIT_LAST_W_READY; + default: state_d = IDLE; + endcase + + if (axi_resp_i.aw_ready) begin + id_d = id_i; + amo_d = amo_i; + size_d = size_i; + end + + // its a request for the whole cache line + end else begin + // bursts of AMOs unsupported + assert (amo_i == ariane_pkg::AMO_NONE) + else $fatal("Bursts of atomic operations are not supported"); + + axi_req_o.aw.len = BURST_SIZE[7:0]; // number of bursts to do + axi_req_o.w.data = wdata_i[0]; + axi_req_o.w.strb = be_i[0]; + + if (axi_resp_i.w_ready) cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1; + else cnt_d = BURST_SIZE[ADDR_INDEX-1:0]; + + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + 2'b11: state_d = WAIT_LAST_W_READY; + 2'b01: state_d = WAIT_LAST_W_READY_AW_READY; + 2'b10: state_d = WAIT_LAST_W_READY; + default: ; + endcase + end + end + // read + end else begin + // only multiple outstanding write transactions are allowed + if (!any_outstanding_aw) begin + + axi_req_o.ar_valid = 1'b1; + // load-reserved requires exclusive access + axi_req_o.ar.lock = amo_i == ariane_pkg::AMO_LR; + + gnt_o = axi_resp_i.ar_ready; + if (type_i != ariane_pkg::SINGLE_REQ) begin + assert (amo_i == ariane_pkg::AMO_NONE) + else $fatal("Bursts of atomic operations are not supported"); + + axi_req_o.ar.len = BURST_SIZE[7:0]; + cnt_d = BURST_SIZE[ADDR_INDEX-1:0]; + end + + if (axi_resp_i.ar_ready) begin + state_d = (type_i == ariane_pkg::SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE; + addr_offset_d = addr_i[ADDR_INDEX-1+3:3]; + end + end + end + end + end + + // ~> from single write + WAIT_AW_READY: begin + axi_req_o.aw_valid = 1'b1; + + if (axi_resp_i.aw_ready) begin + gnt_o = 1'b1; + state_d = WAIT_B_VALID; + id_d = id_i; + amo_d = amo_i; + size_d = size_i; + end + end + + // ~> we need to wait for an aw_ready and there is at least one outstanding write + WAIT_LAST_W_READY_AW_READY: begin + axi_req_o.w_valid = 1'b1; + axi_req_o.w.last = (cnt_q == '0); + if (type_i == ariane_pkg::SINGLE_REQ) begin + axi_req_o.w.data = wdata_i[0]; + axi_req_o.w.strb = be_i[0]; + end else begin + axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q]; + axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q]; + end + axi_req_o.aw_valid = 1'b1; + // we are here because we want to write a cache line + axi_req_o.aw.len = BURST_SIZE[7:0]; + // we got an aw_ready + case ({ + axi_resp_i.aw_ready, axi_resp_i.w_ready + }) + // we got an aw ready + 2'b01: begin + // are there any outstanding transactions? + if (cnt_q == 0) state_d = WAIT_AW_READY_BURST; + else // yes, so reduce the count and stay here + cnt_d = cnt_q - 1; + end + 2'b10: state_d = WAIT_LAST_W_READY; + 2'b11: begin + // we are finished + if (cnt_q == 0) begin + state_d = WAIT_B_VALID; + gnt_o = 1'b1; + // there are outstanding transactions + end else begin + state_d = WAIT_LAST_W_READY; + cnt_d = cnt_q - 1; + end + end + default: ; + endcase + + end + + // ~> all data has already been sent, we are only waiting for the aw_ready + WAIT_AW_READY_BURST: begin + axi_req_o.aw_valid = 1'b1; + axi_req_o.aw.len = BURST_SIZE[7:0]; + + if (axi_resp_i.aw_ready) begin + state_d = WAIT_B_VALID; + gnt_o = 1'b1; + end + end + + // ~> from write, there is an outstanding write + WAIT_LAST_W_READY: begin + axi_req_o.w_valid = 1'b1; + + if (type_i != ariane_pkg::SINGLE_REQ) begin + axi_req_o.w.data = wdata_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q]; + axi_req_o.w.strb = be_i[BURST_SIZE[ADDR_INDEX-1:0]-cnt_q]; + end + + // this is the last write + if (cnt_q == '0) begin + axi_req_o.w.last = 1'b1; + if (axi_resp_i.w_ready) begin + state_d = WAIT_B_VALID; + gnt_o = 1'b1; + end + end else if (axi_resp_i.w_ready) begin + cnt_d = cnt_q - 1; + end + end + + // ~> finish write transaction + WAIT_B_VALID: begin + id_o = axi_resp_i.b.id; + + // Write is valid + if (axi_resp_i.b_valid && !any_outstanding_aw) begin + axi_req_o.b_ready = 1'b1; + + // some atomics must wait for read data + // we only accept it after accepting bvalid + if (amo_returns_data(amo_q)) begin + if (axi_resp_i.r_valid) begin + // return read data if valid + valid_o = 1'b1; + axi_req_o.r_ready = 1'b1; + state_d = IDLE; + rdata_o = axi_resp_i.r.data; + end else begin + // wait otherwise + state_d = WAIT_AMO_R_VALID; + end + end else begin + valid_o = 1'b1; + state_d = IDLE; + + // store-conditional response + if (amo_q == ariane_pkg::AMO_SC) begin + if (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY) begin + // success -> return 0 + rdata_o = 'b0; + end else begin + // failure -> when request is 64-bit, return 1; + // when request is 32-bit place a 1 in both upper + // and lower half words. The right word will be + // realigned/masked externally + rdata_o = size_q == 2'b10 ? (1'b1 << 32) | 64'b1 : 64'b1; + end + end + end + // if the request was not an atomic we can possibly issue + // other requests while waiting for the response + end else begin + if ((amo_q == ariane_pkg::AMO_NONE) && (outstanding_aw_cnt_q != MAX_OUTSTANDING_AW)) begin + state_d = IDLE; + outstanding_aw_cnt_d = outstanding_aw_cnt_q + 1; + end + end + end + + // ~> some atomics wait for read data + WAIT_AMO_R_VALID: begin + // acknowledge data and terminate atomic + if (axi_resp_i.r_valid) begin + axi_req_o.r_ready = 1'b1; + state_d = IDLE; + valid_o = 1'b1; + rdata_o = axi_resp_i.r.data; + end + end + + // ~> cacheline read, single read + WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin + if (CRITICAL_WORD_FIRST) index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0] - cnt_q); + else index = BURST_SIZE[ADDR_INDEX-1:0] - cnt_q; + + // reads are always wrapping here + axi_req_o.r_ready = 1'b1; + // this is the first read a.k.a the critical word + if (axi_resp_i.r_valid) begin + if (CRITICAL_WORD_FIRST) begin + // this is the first word of a cacheline read, e.g.: the word which was causing the miss + if (state_q == WAIT_R_VALID_MULTIPLE && cnt_q == BURST_SIZE) begin + critical_word_valid_o = 1'b1; + critical_word_o = axi_resp_i.r.data; + end + end else begin + // check if the address offset matches - then we are getting the critical word + if (index == addr_offset_q) begin + critical_word_valid_o = 1'b1; + critical_word_o = axi_resp_i.r.data; + end + end + + // this is the last read + if (axi_resp_i.r.last) begin + id_d = axi_resp_i.r.id; + state_d = COMPLETE_READ; + end + + // save the word + if (state_q == WAIT_R_VALID_MULTIPLE) begin + cache_line_d[index] = axi_resp_i.r.data; + + end else cache_line_d[0] = axi_resp_i.r.data; + + // Decrease the counter + cnt_d = cnt_q - 1; + end + end + // ~> read is complete + COMPLETE_READ: begin + valid_o = 1'b1; + state_d = IDLE; + id_o = id_q; + end + + default: state_d = IDLE; + endcase + + // This process handles B responses when accepting + // multiple outstanding write transactions + if (any_outstanding_aw && axi_resp_i.b_valid) begin + axi_req_o.b_ready = 1'b1; + valid_o = 1'b1; + // Right hand side contains non-registered signal as we want + // to preserve a possible increment from the WAIT_B_VALID state + outstanding_aw_cnt_d = outstanding_aw_cnt_d - 1; + end + end + + // ---------------- + // Registers + // ---------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + // start in flushing state and initialize the memory + state_q <= IDLE; + cnt_q <= '0; + cache_line_q <= '0; + addr_offset_q <= '0; + id_q <= '0; + amo_q <= ariane_pkg::AMO_NONE; + size_q <= '0; + outstanding_aw_cnt_q <= '0; + end else begin + state_q <= state_d; + cnt_q <= cnt_d; + cache_line_q <= cache_line_d; + addr_offset_q <= addr_offset_d; + id_q <= id_d; + amo_q <= amo_d; + size_q <= size_d; + outstanding_aw_cnt_q <= outstanding_aw_cnt_d; + end + end + + function automatic axi_pkg::atop_t atop_from_amo(ariane_pkg::amo_t amo); + axi_pkg::atop_t result = 6'b000000; + + unique case (amo) + ariane_pkg::AMO_NONE: result = {axi_pkg::ATOP_NONE, 4'b0000}; + ariane_pkg::AMO_SWAP: result = {axi_pkg::ATOP_ATOMICSWAP}; + ariane_pkg::AMO_ADD: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD}; + ariane_pkg::AMO_AND: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR}; + ariane_pkg::AMO_OR: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET}; + ariane_pkg::AMO_XOR: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR}; + ariane_pkg::AMO_MAX: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX}; + ariane_pkg::AMO_MAXU: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX}; + ariane_pkg::AMO_MIN: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN}; + ariane_pkg::AMO_MINU: + result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN}; + ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported + ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported + default: result = 6'b000000; + endcase + + return result; + endfunction + + function automatic logic amo_returns_data(ariane_pkg::amo_t amo); + axi_pkg::atop_t atop = atop_from_amo(amo); + logic is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD; + logic is_swap_or_cmp = atop[5:4] == axi_pkg::ATOP_ATOMICSWAP[5:4]; + return is_load || is_swap_or_cmp; + endfunction + +endmodule diff --git a/test/type_param/core/cache_subsystem/cache_ctrl.sv b/test/type_param/core/cache_subsystem/cache_ctrl.sv new file mode 100644 index 00000000..e8770d20 --- /dev/null +++ b/test/type_param/core/cache_subsystem/cache_ctrl.sv @@ -0,0 +1,475 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// File: cache_ctrl.svh +// Author: Florian Zaruba +// Date: 14.10.2017 +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// Description: Cache controller + + +module cache_ctrl + import ariane_pkg::*; + import std_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic bypass_i, // enable cache + output logic busy_o, + // Core request ports + input dcache_req_i_t req_port_i, + output dcache_req_o_t req_port_o, + // SRAM interface + output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid + output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array + input logic gnt_i, + output cache_line_t data_o, + output cl_be_t be_o, + output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later + input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, + output logic we_o, + input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, + // Miss handling + output miss_req_t miss_req_o, + // return + input logic miss_gnt_i, + input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss + input logic [63:0] critical_word_i, + input logic critical_word_valid_i, + // bypass ports + input logic bypass_gnt_i, + input logic bypass_valid_i, + input logic [63:0] bypass_data_i, + // check MSHR for aliasing + output logic [55:0] mshr_addr_o, + input logic mshr_addr_matches_i, + input logic mshr_index_matches_i +); + + enum logic [3:0] { + IDLE, // 0 + WAIT_TAG, // 1 + WAIT_TAG_BYPASSED, // 2 + WAIT_GNT, // 3 + WAIT_GNT_SAVED, // 4 + STORE_REQ, // 5 + WAIT_REFILL_VALID, // 6 + WAIT_REFILL_GNT, // 7 + WAIT_TAG_SAVED, // 8 + WAIT_MSHR, // 9 + WAIT_CRITICAL_WORD // 10 + } + state_d, state_q; + + typedef struct packed { + logic [DCACHE_INDEX_WIDTH-1:0] index; + logic [DCACHE_TAG_WIDTH-1:0] tag; + logic [DCACHE_TID_WIDTH-1:0] id; + logic [7:0] be; + logic [1:0] size; + logic we; + logic [63:0] wdata; + logic bypass; + logic killed; + } mem_req_t; + + logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q; + + mem_req_t mem_req_d, mem_req_q; + + assign busy_o = (state_q != IDLE); + assign tag_o = mem_req_d.tag; + + logic [DCACHE_LINE_WIDTH-1:0] cl_i; + + always_comb begin : way_select + cl_i = '0; + for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data; + + // cl_i = data_i[one_hot_to_bin(hit_way_i)].data; + end + + // -------------- + // Cache FSM + // -------------- + always_comb begin : cache_ctrl_fsm + automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; + // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array + // cache-line offset -> multiple of 64 + cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left + // default assignments + state_d = state_q; + mem_req_d = mem_req_q; + hit_way_d = hit_way_q; + // output assignments + req_port_o.data_gnt = 1'b0; + req_port_o.data_rvalid = 1'b0; + req_port_o.data_rdata = '0; + req_port_o.data_rid = mem_req_q.id; + miss_req_o = '0; + mshr_addr_o = '0; + // Memory array communication + req_o = '0; + addr_o = req_port_i.address_index; + data_o = '0; + be_o = '0; + we_o = '0; + + mem_req_d.killed |= req_port_i.kill_req; + + case (state_q) + + IDLE: begin + // a new request arrived + if (req_port_i.data_req && !flush_i) begin + // request the cache line - we can do this speculatively + req_o = '1; + + // save index, be and we + mem_req_d.index = req_port_i.address_index; + mem_req_d.id = req_port_i.data_id; + mem_req_d.be = req_port_i.data_be; + mem_req_d.size = req_port_i.data_size; + mem_req_d.we = req_port_i.data_we; + mem_req_d.wdata = req_port_i.data_wdata; + mem_req_d.killed = req_port_i.kill_req; + + // Bypass mode, check for uncacheable address here as well + if (bypass_i) begin + state_d = WAIT_TAG_BYPASSED; + // grant this access only if it was a load + req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1; + mem_req_d.bypass = 1'b1; + // ------------------ + // Cache is enabled + // ------------------ + end else begin + // Wait that we have access on the memory array + if (gnt_i) begin + state_d = WAIT_TAG; + mem_req_d.bypass = 1'b0; + // only for a read + if (!req_port_i.data_we) req_port_o.data_gnt = 1'b1; + end + end + end + end + + // cache enabled and waiting for tag + WAIT_TAG, WAIT_TAG_SAVED: begin + // check that the client really wants to do the request and that we have a valid tag + if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin + // save tag if we didn't already save it + if (state_q != WAIT_TAG_SAVED) begin + mem_req_d.tag = req_port_i.address_tag; + end + // we speculatively request another transfer + if (req_port_i.data_req && !flush_i) begin + req_o = '1; + end + // ------------ + // HIT CASE + // ------------ + if (|hit_way_i) begin + // we can request another cache-line if this was a load + if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin + state_d = WAIT_TAG; // switch back to WAIT_TAG + mem_req_d.index = req_port_i.address_index; + mem_req_d.id = req_port_i.data_id; + mem_req_d.be = req_port_i.data_be; + mem_req_d.size = req_port_i.data_size; + mem_req_d.we = req_port_i.data_we; + mem_req_d.wdata = req_port_i.data_wdata; + mem_req_d.killed = req_port_i.kill_req; + mem_req_d.bypass = 1'b0; + + req_port_o.data_gnt = gnt_i; + + if (!gnt_i) begin + state_d = IDLE; + end + end else begin + state_d = IDLE; + end + + // this is timing critical + req_port_o.data_rdata = cl_i[cl_offset+:64]; + + // report data for a read + if (!mem_req_q.we) begin + req_port_o.data_rvalid = ~mem_req_q.killed; + // else this was a store so we need an extra step to handle it + end else begin + state_d = STORE_REQ; + hit_way_d = hit_way_i; + end + // ------------ + // MISS CASE + // ------------ + end else begin + // make a miss request + state_d = WAIT_REFILL_GNT; + end + // ---------------------------------------------- + // Check MSHR - Miss Status Handling Register + // ---------------------------------------------- + mshr_addr_o = {tag_o, mem_req_q.index}; + // 1. We've got a match on MSHR and while are going down the + // store path. This means that the miss controller is + // currently evicting our cache-line. As the store is + // non-atomic we need to constantly check whether we are + // matching the address the miss handler is serving. + // Furthermore we need to check for the whole index + // because a completely different memory line could alias + // with the cache-line we are evicting. + // 2. The second case is where we are currently loading and + // the address matches the exact CL the miss controller + // is currently serving. That means we need to wait for + // the miss controller to finish its request before we + // can continue to serve this CL. Otherwise we will fetch + // the cache-line again and potentially loosing any + // content we've written so far. This as a consequence + // means we can't have hit on the CL which mean the + // req_port_o.data_rvalid will be de-asserted. + if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin + state_d = WAIT_MSHR; + end + + // ------------------------- + // Check for cache-ability + // ------------------------- + if (!config_pkg::is_inside_cacheable_regions( + CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}} + )) begin + mem_req_d.bypass = 1'b1; + state_d = WAIT_REFILL_GNT; + end + + // we are still waiting for a valid tag + end else begin + // request cache line for saved index + addr_o = mem_req_q.index; + req_o = '1; + + // check that we still have a memory grant + if (!gnt_i) begin + state_d = WAIT_GNT; + end + end + end + + // ~> we already granted the request but lost the memory grant while waiting for the tag + WAIT_GNT, WAIT_GNT_SAVED: begin + // request cache line for saved index + addr_o = mem_req_q.index; + req_o = '1; + + // if we get a valid tag while waiting for the memory grant, save it + if (req_port_i.tag_valid) begin + mem_req_d.tag = req_port_i.address_tag; + state_d = WAIT_GNT_SAVED; + end + + // we have a memory grant again ~> go back to WAIT_TAG + if (gnt_i) begin + state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED; + end + end + + // ~> we are here as we need a second round of memory access for a store + STORE_REQ: begin + // check if the MSHR still doesn't match + mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; + + // We need to re-check for MSHR aliasing here as the store requires at least + // two memory look-ups on a single-ported SRAM and therefore is non-atomic + if (!mshr_index_matches_i) begin + // store data, write dirty bit + req_o = hit_way_q; + addr_o = mem_req_q.index; + we_o = 1'b1; + + be_o.vldrty = hit_way_q; + + // set the correct byte enable + be_o.data[cl_offset>>3+:8] = mem_req_q.be; + data_o.data[cl_offset+:64] = mem_req_q.wdata; + // ~> change the state + data_o.dirty = 1'b1; + data_o.valid = 1'b1; + + // got a grant ~> this is finished now + if (gnt_i) begin + req_port_o.data_gnt = 1'b1; + state_d = IDLE; + end + end else begin + state_d = WAIT_MSHR; + end + end // case: STORE_REQ + + // we've got a match on MSHR ~> miss unit is currently serving a request + WAIT_MSHR: begin + mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; + // we can start a new request + if (!mshr_index_matches_i) begin + req_o = '1; + + addr_o = mem_req_q.index; + + if (gnt_i) state_d = WAIT_TAG_SAVED; + end + end + + // its for sure a miss + WAIT_TAG_BYPASSED: begin + // check that the client really wants to do the request and that we have a valid tag + if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin + // save tag + mem_req_d.tag = req_port_i.address_tag; + state_d = WAIT_REFILL_GNT; + end + end + + // ~> wait for grant from miss unit + WAIT_REFILL_GNT: begin + + mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; + + miss_req_o.valid = 1'b1; + miss_req_o.bypass = mem_req_q.bypass; + miss_req_o.addr = {mem_req_q.tag, mem_req_q.index}; + miss_req_o.be = mem_req_q.be; + miss_req_o.size = mem_req_q.size; + miss_req_o.we = mem_req_q.we; + miss_req_o.wdata = mem_req_q.wdata; + + // got a grant so go to valid + if (bypass_gnt_i) begin + state_d = WAIT_REFILL_VALID; + // if this was a write we still need to give a grant to the store unit. + // We can also avoid waiting for the response valid, this signal is + // currently not used by the store unit + if (mem_req_q.we) begin + req_port_o.data_gnt = 1'b1; + state_d = IDLE; + end + end + + if (miss_gnt_i && !mem_req_q.we) state_d = WAIT_CRITICAL_WORD; + else if (miss_gnt_i) begin + state_d = IDLE; + req_port_o.data_gnt = 1'b1; + end + + // it can be the case that the miss unit is currently serving a + // request which matches ours + // so we need to check the MSHR for matching continuously + // if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i + if (mshr_addr_matches_i && !active_serving_i) begin + state_d = WAIT_MSHR; + end + end + + // ~> wait for critical word to arrive + WAIT_CRITICAL_WORD: begin + // speculatively request another word + if (req_port_i.data_req) begin + // request the cache line + req_o = '1; + end + + if (critical_word_valid_i) begin + req_port_o.data_rvalid = ~mem_req_q.killed; + req_port_o.data_rdata = critical_word_i; + // we can make another request + if (req_port_i.data_req && !flush_i) begin + // save index, be and we + mem_req_d.index = req_port_i.address_index; + mem_req_d.id = req_port_i.data_id; + mem_req_d.be = req_port_i.data_be; + mem_req_d.size = req_port_i.data_size; + mem_req_d.we = req_port_i.data_we; + mem_req_d.wdata = req_port_i.data_wdata; + mem_req_d.killed = req_port_i.kill_req; + + state_d = IDLE; + + // Wait until we have access on the memory array + if (gnt_i) begin + state_d = WAIT_TAG; + mem_req_d.bypass = 1'b0; + req_port_o.data_gnt = 1'b1; + end + end else begin + state_d = IDLE; + end + end + end + // ~> wait until the bypass request is valid + WAIT_REFILL_VALID: begin + // got a valid answer + if (bypass_valid_i) begin + req_port_o.data_rdata = bypass_data_i; + req_port_o.data_rvalid = ~mem_req_q.killed; + state_d = IDLE; + end + end + endcase + + if (req_port_i.kill_req) begin + req_port_o.data_rvalid = 1'b1; + if (!(state_q inside {WAIT_REFILL_GNT, WAIT_CRITICAL_WORD})) begin + state_d = IDLE; + end + end + end + + // -------------- + // Registers + // -------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + mem_req_q <= '0; + hit_way_q <= '0; + end else begin + state_q <= state_d; + mem_req_q <= mem_req_d; + hit_way_q <= hit_way_d; + end + end + + //pragma translate_off +`ifndef VERILATOR + initial begin + assert (DCACHE_LINE_WIDTH == 128) + else + $error( + "Cacheline width has to be 128 for the moment. But only small changes required in data select logic" + ); + end + // if the full MSHR address matches so should also match the partial one + partial_full_mshr_match : + assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i) + else $fatal(1, "partial mshr index doesn't match"); + // there should never be a valid answer when the MSHR matches and we are not being served + no_valid_on_mshr_match : + assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req) + else $fatal(1, "rvalid_o should not be set on MSHR match"); +`endif + //pragma translate_on +endmodule diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv new file mode 100644 index 00000000..31154038 --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_if_adapter.sv @@ -0,0 +1,200 @@ +// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies +// Alternatives (CEA) +// +// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Authors: Cesar Fuguet +// Date: February, 2023 +// Description: Interface adapter for the CVA6 core +module cva6_hpdcache_if_adapter + import hpdcache_pkg::*; + +// Parameters +// {{{ +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter bit is_load_port = 1'b1 +) +// }}} + +// Ports +// {{{ +( + // Clock and active-low reset pins + input logic clk_i, + input logic rst_ni, + + // Port ID + input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i, + + // Request/response ports from/to the CVA6 core + input ariane_pkg::dcache_req_i_t cva6_req_i, + output ariane_pkg::dcache_req_o_t cva6_req_o, + input ariane_pkg::amo_req_t cva6_amo_req_i, + output ariane_pkg::amo_resp_t cva6_amo_resp_o, + + // Request port to the L1 Dcache + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_pkg::hpdcache_req_t hpdcache_req_o, + output logic hpdcache_req_abort_o, + output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o, + output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o, + + // Response port from the L1 Dcache + input logic hpdcache_rsp_valid_i, + input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i +); + // }}} + + // Internal nets and registers + // {{{ + logic forward_store, forward_amo; + logic hpdcache_req_is_uncacheable; + // }}} + + // Request forwarding + // {{{ + generate + // LOAD request + // {{{ + if (is_load_port == 1'b1) begin : load_port_gen + assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions( + CVA6Cfg, + { + {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} + , cva6_req_i.address_tag + , {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}} + } + ); + + // Request forwarding + assign hpdcache_req_valid_o = cva6_req_i.data_req, + hpdcache_req_o.addr_offset = cva6_req_i.address_index, + hpdcache_req_o.wdata = '0, + hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD, + hpdcache_req_o.be = cva6_req_i.data_be, + hpdcache_req_o.size = cva6_req_i.data_size, + hpdcache_req_o.sid = hpdcache_req_sid_i, + hpdcache_req_o.tid = cva6_req_i.data_id, + hpdcache_req_o.need_rsp = 1'b1, + hpdcache_req_o.phys_indexed = 1'b0, + hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request + hpdcache_req_o.pma = '0; // unused on virtually indexed request + + assign hpdcache_req_abort_o = cva6_req_i.kill_req, + hpdcache_req_tag_o = cva6_req_i.address_tag, + hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable, + hpdcache_req_pma_o.io = 1'b0; + + // Response forwarding + assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i, + cva6_req_o.data_rdata = hpdcache_rsp_i.rdata, + cva6_req_o.data_rid = hpdcache_rsp_i.tid, + cva6_req_o.data_gnt = hpdcache_req_ready_i; + end // }}} + + // {{{ + else begin : store_amo_gen + // STORE/AMO request + hpdcache_req_addr_t amo_addr; + hpdcache_req_offset_t amo_addr_offset; + hpdcache_tag_t amo_tag; + logic amo_is_word, amo_is_word_hi; + hpdcache_req_data_t amo_data; + hpdcache_req_be_t amo_data_be; + hpdcache_req_op_t amo_op; + logic [31:0] amo_resp_word; + + // AMO logic + // {{{ + always_comb begin : amo_op_comb + amo_addr = cva6_amo_req_i.operand_a; + amo_addr_offset = amo_addr[0+:HPDCACHE_REQ_OFFSET_WIDTH]; + amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH+:HPDCACHE_TAG_WIDTH]; + amo_is_word = (cva6_amo_req_i.size == 2'b10); + amo_is_word_hi = cva6_amo_req_i.operand_a[2]; + + amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0+:32]}} : cva6_amo_req_i.operand_b; + + amo_data_be = amo_is_word_hi ? 8'hf0 : amo_is_word ? 8'h0f : 8'hff; + + unique case (cva6_amo_req_i.amo_op) + ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR; + ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC; + ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP; + ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD; + ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND; + ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR; + ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR; + ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX; + ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU; + ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN; + ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU; + default: amo_op = HPDCACHE_REQ_LOAD; + endcase + end + + assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32] + : hpdcache_rsp_i.rdata[0][0 +: 32]; + // }}} + + // Request forwarding + // {{{ + assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions( + CVA6Cfg, + { + {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} + , hpdcache_req_o.addr_tag, + {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}} + } + ); + + assign forward_store = cva6_req_i.data_req, forward_amo = cva6_amo_req_i.req; + + assign hpdcache_req_valid_o = forward_store | forward_amo, + hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset : cva6_req_i.address_index, + hpdcache_req_o.wdata = forward_amo ? amo_data : cva6_req_i.data_wdata, + hpdcache_req_o.op = forward_amo ? amo_op : hpdcache_pkg::HPDCACHE_REQ_STORE, + hpdcache_req_o.be = forward_amo ? amo_data_be : cva6_req_i.data_be, + hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size : cva6_req_i.data_size, + hpdcache_req_o.sid = hpdcache_req_sid_i, + hpdcache_req_o.tid = forward_amo ? '1 : '0, + hpdcache_req_o.need_rsp = forward_amo, + hpdcache_req_o.phys_indexed = 1'b1, + hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag, + hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable, + hpdcache_req_o.pma.io = 1'b0, + hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests + hpdcache_req_tag_o = '0, // unused on physically indexed requests + hpdcache_req_pma_o = '0; // unused on physically indexed requests + // }}} + + // Response forwarding + // {{{ + assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1), + cva6_req_o.data_rdata = hpdcache_rsp_i.rdata, + cva6_req_o.data_rid = hpdcache_rsp_i.tid, + cva6_req_o.data_gnt = hpdcache_req_ready_i; + + assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1), + cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word} + : hpdcache_rsp_i.rdata[0][63:0]; + // }}} + end + // }}} + endgenerate + // }}} + + // Assertions + // {{{ + // pragma translate_off + forward_one_request_assert : + assert property (@(posedge clk_i) ($onehot0({forward_store, forward_amo}))) + else $error("Only one request shall be forwarded"); + // pragma translate_on + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv new file mode 100644 index 00000000..7e90b914 --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem.sv @@ -0,0 +1,609 @@ +// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies +// Alternatives (CEA) +// +// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Authors: Cesar Fuguet +// Date: February, 2023 +// Description: CVA6 cache subsystem integrating standard CVA6's +// instruction cache and the Core-V High-Performance L1 +// data cache (CV-HPDcache). + +module cva6_hpdcache_subsystem +// Parameters +// {{{ +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int NumPorts = 4, + parameter int NrHwPrefetchers = 4, + parameter type noc_req_t = logic, + parameter type noc_resp_t = logic, + parameter type cmo_req_t = logic, + parameter type cmo_rsp_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // I$ + // {{{ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend + output ariane_pkg::icache_arsp_t icache_areq_o, + // data requests + input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend + output ariane_pkg::icache_drsp_t icache_dreq_o, + // }}} + + // D$ + // {{{ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + + // AMO interface + input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU + output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU + // CMO interface + input cmo_req_t dcache_cmo_req_i, // from CMO FU + output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU + // Request ports + input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU + output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU + // Write Buffer status + output logic wbuffer_empty_o, + output logic wbuffer_not_ni_o, + + // Hardware memory prefetcher configuration + input logic [NrHwPrefetchers-1:0] hwpf_base_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o, + input logic [NrHwPrefetchers-1:0] hwpf_param_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o, + input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o, + output logic [ 63:0] hwpf_status_o, + // }}} + + // AXI port to upstream memory/peripherals + // {{{ + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i + // }}} +); + // }}} + + `include "axi/typedef.svh" + + // I$ instantiation + // {{{ + logic icache_miss_valid, icache_miss_ready; + wt_cache_pkg::icache_req_t icache_miss; + + logic icache_miss_resp_valid; + wt_cache_pkg::icache_rtrn_t icache_miss_resp; + + localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1); + + cva6_icache #( + .CVA6Cfg(CVA6Cfg), + .RdTxId (ICACHE_RDTXID) + ) i_cva6_icache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (icache_flush_i), + .en_i (icache_en_i), + .miss_o (icache_miss_o), + .areq_i (icache_areq_i), + .areq_o (icache_areq_o), + .dreq_i (icache_dreq_i), + .dreq_o (icache_dreq_o), + .mem_rtrn_vld_i(icache_miss_resp_valid), + .mem_rtrn_i (icache_miss_resp), + .mem_data_req_o(icache_miss_valid), + .mem_data_ack_i(icache_miss_ready), + .mem_data_o (icache_miss) + ); + // }}} + + // D$ instantiation + // {{{ + `include "hpdcache_typedef.svh" + + // 0: Page-Table Walk (PTW) + // 1: Load unit + // 2: Accelerator load + // 3: Store/AMO + // . + // . + // . + // NumPorts: CMO + // NumPorts + 1: Hardware Memory Prefetcher (hwpf) + localparam int HPDCACHE_NREQUESTERS = NumPorts + 2; + + typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t; + typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t; + typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t; + typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t; + `HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t); + `HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t); + `HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t); + `HPDCACHE_TYPEDEF_MEM_RESP_W_T(hpdcache_mem_resp_w_t, hpdcache_mem_id_t); + + typedef logic [63:0] hwpf_stride_param_t; + + logic dcache_req_valid[HPDCACHE_NREQUESTERS-1:0]; + logic dcache_req_ready[HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_req_abort[HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_rsp_valid[HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_read_miss, dcache_write_miss; + + logic [ 2:0] snoop_valid; + logic [ 2:0] snoop_abort; + hpdcache_pkg::hpdcache_req_offset_t [ 2:0] snoop_addr_offset; + hpdcache_pkg::hpdcache_tag_t [ 2:0] snoop_addr_tag; + logic [ 2:0] snoop_phys_indexed; + + logic dcache_cmo_req_is_prefetch; + + logic dcache_miss_ready; + logic dcache_miss_valid; + hpdcache_mem_req_t dcache_miss; + + logic dcache_miss_resp_ready; + logic dcache_miss_resp_valid; + hpdcache_mem_resp_r_t dcache_miss_resp; + + logic dcache_wbuf_ready; + logic dcache_wbuf_valid; + hpdcache_mem_req_t dcache_wbuf; + + logic dcache_wbuf_data_ready; + logic dcache_wbuf_data_valid; + hpdcache_mem_req_w_t dcache_wbuf_data; + + logic dcache_wbuf_resp_ready; + logic dcache_wbuf_resp_valid; + hpdcache_mem_resp_w_t dcache_wbuf_resp; + + logic dcache_uc_read_ready; + logic dcache_uc_read_valid; + hpdcache_mem_req_t dcache_uc_read; + + logic dcache_uc_read_resp_ready; + logic dcache_uc_read_resp_valid; + hpdcache_mem_resp_r_t dcache_uc_read_resp; + + logic dcache_uc_write_ready; + logic dcache_uc_write_valid; + hpdcache_mem_req_t dcache_uc_write; + + logic dcache_uc_write_data_ready; + logic dcache_uc_write_data_valid; + hpdcache_mem_req_w_t dcache_uc_write_data; + + logic dcache_uc_write_resp_ready; + logic dcache_uc_write_resp_valid; + hpdcache_mem_resp_w_t dcache_uc_write_resp; + + hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in; + hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out; + + generate + ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0]; + + for (genvar r = 0; r < (NumPorts - 1); r++) begin : cva6_hpdcache_load_if_adapter_gen + assign dcache_req_ports[r] = dcache_req_ports_i[r]; + + cva6_hpdcache_if_adapter #( + .CVA6Cfg (CVA6Cfg), + .is_load_port(1'b1) + ) i_cva6_hpdcache_load_if_adapter ( + .clk_i, + .rst_ni, + + .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(r)), + + .cva6_req_i (dcache_req_ports[r]), + .cva6_req_o (dcache_req_ports_o[r]), + .cva6_amo_req_i ('0), + .cva6_amo_resp_o( /* unused */), + + .hpdcache_req_valid_o(dcache_req_valid[r]), + .hpdcache_req_ready_i(dcache_req_ready[r]), + .hpdcache_req_o (dcache_req[r]), + .hpdcache_req_abort_o(dcache_req_abort[r]), + .hpdcache_req_tag_o (dcache_req_tag[r]), + .hpdcache_req_pma_o (dcache_req_pma[r]), + + .hpdcache_rsp_valid_i(dcache_rsp_valid[r]), + .hpdcache_rsp_i (dcache_rsp[r]) + ); + end + + cva6_hpdcache_if_adapter #( + .CVA6Cfg (CVA6Cfg), + .is_load_port(1'b0) + ) i_cva6_hpdcache_store_if_adapter ( + .clk_i, + .rst_ni, + + .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts - 1)), + + .cva6_req_i (dcache_req_ports_i[NumPorts-1]), + .cva6_req_o (dcache_req_ports_o[NumPorts-1]), + .cva6_amo_req_i (dcache_amo_req_i), + .cva6_amo_resp_o(dcache_amo_resp_o), + + .hpdcache_req_valid_o(dcache_req_valid[NumPorts-1]), + .hpdcache_req_ready_i(dcache_req_ready[NumPorts-1]), + .hpdcache_req_o (dcache_req[NumPorts-1]), + .hpdcache_req_abort_o(dcache_req_abort[NumPorts-1]), + .hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]), + .hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]), + + .hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts-1]), + .hpdcache_rsp_i (dcache_rsp[NumPorts-1]) + ); + +`ifdef HPDCACHE_ENABLE_CMO + cva6_hpdcache_cmo_if_adapter #( + .cmo_req_t(cmo_req_t), + .cmo_rsp_t(cmo_rsp_t) + ) i_cva6_hpdcache_cmo_if_adapter ( + .clk_i, + .rst_ni, + + .dcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)), + + .cva6_cmo_req_i (dcache_cmo_req_i), + .cva6_cmo_resp_o(dcache_cmo_resp_o), + + .dcache_req_valid_o(dcache_req_valid[NumPorts]), + .dcache_req_ready_i(dcache_req_ready[NumPorts]), + .dcache_req_o (dcache_req[NumPorts]), + .dcache_req_abort_o(dcache_req_abort[NumPorts]), + .dcache_req_tag_o (dcache_req_tag[NumPorts]), + .dcache_req_pma_o (dcache_req_pma[NumPorts]), + + .dcache_rsp_valid_i(dcache_rsp_valid[NumPorts]), + .dcache_rsp_i (dcache_rsp[NumPorts]) + ); +`else + assign dcache_req_valid[NumPorts] = 1'b0, + dcache_req[NumPorts] = '0, + dcache_req_abort[NumPorts] = 1'b0, + dcache_req_tag[NumPorts] = '0, + dcache_req_pma[NumPorts] = '0; +`endif + endgenerate + + // Snoop load port + assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1], + snoop_abort[0] = dcache_req_abort[1], + snoop_addr_offset[0] = dcache_req[1].addr_offset, + snoop_addr_tag[0] = dcache_req_tag[1], + snoop_phys_indexed[0] = dcache_req[1].phys_indexed; + + // Snoop Store/AMO port + assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1], + snoop_abort[1] = dcache_req_abort[NumPorts-1], + snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset, + snoop_addr_tag[1] = dcache_req_tag[NumPorts-1], + snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed; + +`ifdef HPDCACHE_ENABLE_CMO + // Snoop CMO port (in case of read prefetch accesses) + assign dcache_cmo_req_is_prefetch = hpdcache_pkg::is_cmo_prefetch( + dcache_req[NumPorts].op, dcache_req[NumPorts].size + ); + assign snoop_valid[2] = dcache_req_valid[NumPorts] + & dcache_req_ready[NumPorts] + & dcache_cmo_req_is_prefetch, + snoop_abort[2] = dcache_req_abort[NumPorts], + snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset, + snoop_addr_tag[2] = dcache_req_tag[NumPorts], + snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed; +`else + assign snoop_valid[2] = 1'b0, + snoop_abort[2] = 1'b0, + snoop_addr_offset[2] = '0, + snoop_addr_tag[2] = '0, + snoop_phys_indexed[2] = 1'b0; +`endif + + generate + for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen + assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]), + hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]); + end + endgenerate + + hwpf_stride_wrapper #( + .NUM_HW_PREFETCH(NrHwPrefetchers), + .NUM_SNOOP_PORTS(3) + ) i_hwpf_stride_wrapper ( + .clk_i, + .rst_ni, + + .hwpf_stride_base_set_i (hwpf_base_set_i), + .hwpf_stride_base_i (hwpf_base_i), + .hwpf_stride_base_o (hwpf_base_o), + .hwpf_stride_param_set_i (hwpf_param_set_i), + .hwpf_stride_param_i (hwpf_param_i), + .hwpf_stride_param_o (hwpf_param_o), + .hwpf_stride_throttle_set_i(hwpf_throttle_set_i), + .hwpf_stride_throttle_i (hwpf_throttle_in), + .hwpf_stride_throttle_o (hwpf_throttle_out), + .hwpf_stride_status_o (hwpf_status_o), + + .snoop_valid_i (snoop_valid), + .snoop_abort_i (snoop_abort), + .snoop_addr_offset_i (snoop_addr_offset), + .snoop_addr_tag_i (snoop_addr_tag), + .snoop_phys_indexed_i(snoop_phys_indexed), + + .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts + 1)), + + .hpdcache_req_valid_o(dcache_req_valid[NumPorts+1]), + .hpdcache_req_ready_i(dcache_req_ready[NumPorts+1]), + .hpdcache_req_o (dcache_req[NumPorts+1]), + .hpdcache_req_abort_o(dcache_req_abort[NumPorts+1]), + .hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]), + .hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]), + .hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts+1]), + .hpdcache_rsp_i (dcache_rsp[NumPorts+1]) + ); + + hpdcache #( + .NREQUESTERS (HPDCACHE_NREQUESTERS), + .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), + .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t) + ) i_hpdcache ( + .clk_i, + .rst_ni, + + .wbuf_flush_i(dcache_flush_i), + + .core_req_valid_i(dcache_req_valid), + .core_req_ready_o(dcache_req_ready), + .core_req_i (dcache_req), + .core_req_abort_i(dcache_req_abort), + .core_req_tag_i (dcache_req_tag), + .core_req_pma_i (dcache_req_pma), + + .core_rsp_valid_o(dcache_rsp_valid), + .core_rsp_o (dcache_rsp), + + .mem_req_miss_read_ready_i(dcache_miss_ready), + .mem_req_miss_read_valid_o(dcache_miss_valid), + .mem_req_miss_read_o (dcache_miss), + + .mem_resp_miss_read_ready_o(dcache_miss_resp_ready), + .mem_resp_miss_read_valid_i(dcache_miss_resp_valid), + .mem_resp_miss_read_i (dcache_miss_resp), + + .mem_req_wbuf_write_ready_i(dcache_wbuf_ready), + .mem_req_wbuf_write_valid_o(dcache_wbuf_valid), + .mem_req_wbuf_write_o (dcache_wbuf), + + .mem_req_wbuf_write_data_ready_i(dcache_wbuf_data_ready), + .mem_req_wbuf_write_data_valid_o(dcache_wbuf_data_valid), + .mem_req_wbuf_write_data_o (dcache_wbuf_data), + + .mem_resp_wbuf_write_ready_o(dcache_wbuf_resp_ready), + .mem_resp_wbuf_write_valid_i(dcache_wbuf_resp_valid), + .mem_resp_wbuf_write_i (dcache_wbuf_resp), + + .mem_req_uc_read_ready_i(dcache_uc_read_ready), + .mem_req_uc_read_valid_o(dcache_uc_read_valid), + .mem_req_uc_read_o (dcache_uc_read), + + .mem_resp_uc_read_ready_o(dcache_uc_read_resp_ready), + .mem_resp_uc_read_valid_i(dcache_uc_read_resp_valid), + .mem_resp_uc_read_i (dcache_uc_read_resp), + + .mem_req_uc_write_ready_i(dcache_uc_write_ready), + .mem_req_uc_write_valid_o(dcache_uc_write_valid), + .mem_req_uc_write_o (dcache_uc_write), + + .mem_req_uc_write_data_ready_i(dcache_uc_write_data_ready), + .mem_req_uc_write_data_valid_o(dcache_uc_write_data_valid), + .mem_req_uc_write_data_o (dcache_uc_write_data), + + .mem_resp_uc_write_ready_o(dcache_uc_write_resp_ready), + .mem_resp_uc_write_valid_i(dcache_uc_write_resp_valid), + .mem_resp_uc_write_i (dcache_uc_write_resp), + + .evt_cache_write_miss_o(dcache_write_miss), + .evt_cache_read_miss_o (dcache_read_miss), + .evt_uncached_req_o ( /* unused */), + .evt_cmo_req_o ( /* unused */), + .evt_write_req_o ( /* unused */), + .evt_read_req_o ( /* unused */), + .evt_prefetch_req_o ( /* unused */), + .evt_req_on_hold_o ( /* unused */), + .evt_rtab_rollback_o ( /* unused */), + .evt_stall_refill_o ( /* unused */), + .evt_stall_o ( /* unused */), + + .wbuf_empty_o(wbuffer_empty_o), + + .cfg_enable_i (dcache_enable_i), + .cfg_wbuf_threshold_i (4'd2), + .cfg_wbuf_reset_timecnt_on_write_i (1'b1), + .cfg_wbuf_sequential_waw_i (1'b0), + .cfg_wbuf_inhibit_write_coalescing_i(1'b0), + .cfg_prefetch_updt_plru_i (1'b1), + .cfg_error_on_cacheable_amo_i (1'b0), + .cfg_rtab_single_entry_i (1'b0) + ); + + assign dcache_miss_o = dcache_read_miss, wbuffer_not_ni_o = wbuffer_empty_o; + + always_ff @(posedge clk_i or negedge rst_ni) begin : dcache_flush_ff + if (!rst_ni) dcache_flush_ack_o <= 1'b0; + else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i; + end + + // }}} + + // AXI arbiter instantiation + // {{{ + typedef logic [CVA6Cfg.AxiAddrWidth-1:0] axi_addr_t; + typedef logic [CVA6Cfg.AxiDataWidth-1:0] axi_data_t; + typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] axi_strb_t; + typedef logic [CVA6Cfg.AxiIdWidth-1:0] axi_id_t; + typedef logic [CVA6Cfg.AxiUserWidth-1:0] axi_user_t; + `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, axi_addr_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, axi_data_t, axi_strb_t, axi_user_t) + `AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, axi_addr_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t) + + cva6_hpdcache_subsystem_axi_arbiter #( + .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), + .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t), + + .AxiAddrWidth (CVA6Cfg.AxiAddrWidth), + .AxiDataWidth (CVA6Cfg.AxiDataWidth), + .AxiIdWidth (CVA6Cfg.AxiIdWidth), + .AxiUserWidth (CVA6Cfg.AxiUserWidth), + .axi_ar_chan_t(axi_ar_chan_t), + .axi_aw_chan_t(axi_aw_chan_t), + .axi_w_chan_t (axi_w_chan_t), + .axi_req_t (noc_req_t), + .axi_rsp_t (noc_resp_t) + ) i_axi_arbiter ( + .clk_i, + .rst_ni, + + .icache_miss_valid_i(icache_miss_valid), + .icache_miss_ready_o(icache_miss_ready), + .icache_miss_i (icache_miss), + .icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)), + + .icache_miss_resp_valid_o(icache_miss_resp_valid), + .icache_miss_resp_o (icache_miss_resp), + + .dcache_miss_ready_o(dcache_miss_ready), + .dcache_miss_valid_i(dcache_miss_valid), + .dcache_miss_i (dcache_miss), + + .dcache_miss_resp_ready_i(dcache_miss_resp_ready), + .dcache_miss_resp_valid_o(dcache_miss_resp_valid), + .dcache_miss_resp_o (dcache_miss_resp), + + .dcache_wbuf_ready_o(dcache_wbuf_ready), + .dcache_wbuf_valid_i(dcache_wbuf_valid), + .dcache_wbuf_i (dcache_wbuf), + + .dcache_wbuf_data_ready_o(dcache_wbuf_data_ready), + .dcache_wbuf_data_valid_i(dcache_wbuf_data_valid), + .dcache_wbuf_data_i (dcache_wbuf_data), + + .dcache_wbuf_resp_ready_i(dcache_wbuf_resp_ready), + .dcache_wbuf_resp_valid_o(dcache_wbuf_resp_valid), + .dcache_wbuf_resp_o (dcache_wbuf_resp), + + .dcache_uc_read_ready_o(dcache_uc_read_ready), + .dcache_uc_read_valid_i(dcache_uc_read_valid), + .dcache_uc_read_i (dcache_uc_read), + .dcache_uc_read_id_i ('1), + + .dcache_uc_read_resp_ready_i(dcache_uc_read_resp_ready), + .dcache_uc_read_resp_valid_o(dcache_uc_read_resp_valid), + .dcache_uc_read_resp_o (dcache_uc_read_resp), + + .dcache_uc_write_ready_o(dcache_uc_write_ready), + .dcache_uc_write_valid_i(dcache_uc_write_valid), + .dcache_uc_write_i (dcache_uc_write), + .dcache_uc_write_id_i ('1), + + .dcache_uc_write_data_ready_o(dcache_uc_write_data_ready), + .dcache_uc_write_data_valid_i(dcache_uc_write_data_valid), + .dcache_uc_write_data_i (dcache_uc_write_data), + + .dcache_uc_write_resp_ready_i(dcache_uc_write_resp_ready), + .dcache_uc_write_resp_valid_o(dcache_uc_write_resp_valid), + .dcache_uc_write_resp_o (dcache_uc_write_resp), + + .axi_req_o (noc_req_o), + .axi_resp_i(noc_resp_i) + ); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS)) + else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough"); + + a_invalid_instruction_fetch : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", + icache_dreq_o.vaddr, + icache_dreq_o.data + ); + + a_invalid_write_data : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X", + { + dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index + }, + dcache_req_ports_i[2].data_be, + dcache_req_ports_i[2].data_wdata + ); + + for (genvar j = 0; j < 2; j++) begin : gen_assertion + a_invalid_read_data : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid data on port %01d: data=%016X", + j, + dcache_req_ports_o[j].data_rdata + ); + end + // pragma translate_on + // }}} + +endmodule : cva6_hpdcache_subsystem diff --git a/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv new file mode 100644 index 00000000..9eb0a8bc --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv @@ -0,0 +1,586 @@ +// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies +// Alternatives (CEA) +// +// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Authors: Cesar Fuguet +// Date: February, 2023 +// Description: AXI arbiter for the CVA6 cache subsystem integrating standard +// CVA6's instruction cache and the Core-V High-Performance +// L1 Dcache (CV-HPDcache). + +module cva6_hpdcache_subsystem_axi_arbiter +// Parameters +// {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + parameter int unsigned AxiAddrWidth = 1, + parameter int unsigned AxiDataWidth = 1, + parameter int unsigned AxiIdWidth = 1, + parameter int unsigned AxiUserWidth = 1, + parameter type axi_ar_chan_t = logic, + parameter type axi_aw_chan_t = logic, + parameter type axi_w_chan_t = logic, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Interfaces from/to I$ + // {{{ + input logic icache_miss_valid_i, + output logic icache_miss_ready_o, + input wt_cache_pkg::icache_req_t icache_miss_i, + input hpdcache_mem_id_t icache_miss_id_i, + + output logic icache_miss_resp_valid_o, + output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o, + // }}} + + // Interfaces from/to D$ + // {{{ + output logic dcache_miss_ready_o, + input logic dcache_miss_valid_i, + input hpdcache_mem_req_t dcache_miss_i, + + input logic dcache_miss_resp_ready_i, + output logic dcache_miss_resp_valid_o, + output hpdcache_mem_resp_r_t dcache_miss_resp_o, + + // Write-buffer write interface + output logic dcache_wbuf_ready_o, + input logic dcache_wbuf_valid_i, + input hpdcache_mem_req_t dcache_wbuf_i, + + output logic dcache_wbuf_data_ready_o, + input logic dcache_wbuf_data_valid_i, + input hpdcache_mem_req_w_t dcache_wbuf_data_i, + + input logic dcache_wbuf_resp_ready_i, + output logic dcache_wbuf_resp_valid_o, + output hpdcache_mem_resp_w_t dcache_wbuf_resp_o, + + // Uncached read interface + output logic dcache_uc_read_ready_o, + input logic dcache_uc_read_valid_i, + input hpdcache_mem_req_t dcache_uc_read_i, + input hpdcache_mem_id_t dcache_uc_read_id_i, + + input logic dcache_uc_read_resp_ready_i, + output logic dcache_uc_read_resp_valid_o, + output hpdcache_mem_resp_r_t dcache_uc_read_resp_o, + + // Uncached write interface + output logic dcache_uc_write_ready_o, + input logic dcache_uc_write_valid_i, + input hpdcache_mem_req_t dcache_uc_write_i, + input hpdcache_mem_id_t dcache_uc_write_id_i, + + output logic dcache_uc_write_data_ready_o, + input logic dcache_uc_write_data_valid_i, + input hpdcache_mem_req_w_t dcache_uc_write_data_i, + + input logic dcache_uc_write_resp_ready_i, + output logic dcache_uc_write_resp_valid_o, + output hpdcache_mem_resp_w_t dcache_uc_write_resp_o, + // }}} + + // AXI port to upstream memory/peripherals + // {{{ + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i + // }}} +); + // }}} + + // Internal type definitions + // {{{ + typedef struct packed { + logic [AxiIdWidth-1:0] id; + logic [AxiDataWidth-1:0] data; + axi_pkg::resp_t resp; + logic last; + logic [AxiUserWidth-1:0] user; + } axi_r_chan_t; + + typedef struct packed { + logic [AxiIdWidth-1:0] id; + axi_pkg::resp_t resp; + logic [AxiUserWidth-1:0] user; + } axi_b_chan_t; + + localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth); + typedef hpdcache_mem_id_t [MEM_RESP_RT_DEPTH-1:0] mem_resp_rt_t; + typedef logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] icache_resp_data_t; + // }}} + + // Adapt the I$ interface to the HPDcache memory interface + // {{{ + localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH / 64; + localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS); + localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8); + localparam int ICACHE_WORD_SIZE = 3; + localparam int ICACHE_MEM_REQ_CL_LEN = + (ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth; + localparam int ICACHE_MEM_REQ_CL_SIZE = + (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ? + $clog2( + HPDcacheMemDataWidth / 8 + ) : ICACHE_CL_SIZE; + + // I$ request + hpdcache_mem_req_t icache_miss_req_wdata; + logic icache_miss_req_w, icache_miss_req_wok; + + hpdcache_mem_req_t icache_miss_req_rdata; + logic icache_miss_req_r, icache_miss_req_rok; + + logic icache_miss_pending_q; + + // This FIFO has two functionnalities: + // - Stabilize the ready-valid protocol. The ICACHE can abort a valid + // transaction without receiving the corresponding ready signal. This + // behavior is not supported by AXI. + // - Cut a possible long timing path. + hpdcache_fifo_reg #( + .FIFO_DEPTH (1), + .fifo_data_t(hpdcache_mem_req_t) + ) i_icache_miss_req_fifo ( + .clk_i, + .rst_ni, + + .w_i (icache_miss_req_w), + .wok_o (icache_miss_req_wok), + .wdata_i(icache_miss_req_wdata), + + .r_i (icache_miss_req_r), + .rok_o (icache_miss_req_rok), + .rdata_o(icache_miss_req_rdata) + ); + + assign icache_miss_req_w = icache_miss_valid_i, icache_miss_ready_o = icache_miss_req_wok; + + assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr, + icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1, + icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE, + icache_miss_req_wdata.mem_req_id = icache_miss_i.tid, + icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ, + icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0), + icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc; + + + // I$ response + logic icache_miss_resp_w, icache_miss_resp_wok; + hpdcache_mem_resp_r_t icache_miss_resp_wdata; + + logic icache_miss_resp_data_w, icache_miss_resp_data_wok; + logic icache_miss_resp_data_r, icache_miss_resp_data_rok; + icache_resp_data_t icache_miss_resp_data_rdata; + + logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok; + logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok; + hpdcache_mem_id_t icache_miss_resp_meta_id; + + icache_resp_data_t icache_miss_rdata; + + generate + if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin + hpdcache_fifo_reg #( + .FIFO_DEPTH (1), + .fifo_data_t(hpdcache_mem_id_t) + ) i_icache_refill_meta_fifo ( + .clk_i, + .rst_ni, + + .w_i (icache_miss_resp_meta_w), + .wok_o (icache_miss_resp_meta_wok), + .wdata_i(icache_miss_resp_wdata.mem_resp_r_id), + + .r_i (icache_miss_resp_meta_r), + .rok_o (icache_miss_resp_meta_rok), + .rdata_o(icache_miss_resp_meta_id) + ); + + hpdcache_data_upsize #( + .WR_WIDTH(HPDcacheMemDataWidth), + .RD_WIDTH(ariane_pkg::ICACHE_LINE_WIDTH), + .DEPTH (1) + ) i_icache_hpdcache_data_upsize ( + .clk_i, + .rst_ni, + + .w_i (icache_miss_resp_data_w), + .wlast_i(icache_miss_resp_wdata.mem_resp_r_last), + .wok_o (icache_miss_resp_data_wok), + .wdata_i(icache_miss_resp_wdata.mem_resp_r_data), + + .r_i (icache_miss_resp_data_r), + .rok_o (icache_miss_resp_data_rok), + .rdata_o(icache_miss_resp_data_rdata) + ); + + assign icache_miss_resp_meta_r = 1'b1, icache_miss_resp_data_r = 1'b1; + + assign icache_miss_resp_meta_w = icache_miss_resp_w & icache_miss_resp_wdata.mem_resp_r_last; + + assign icache_miss_resp_data_w = icache_miss_resp_w; + + assign icache_miss_resp_wok = icache_miss_resp_data_wok & ( + icache_miss_resp_meta_wok | ~icache_miss_resp_wdata.mem_resp_r_last); + + assign icache_miss_rdata = icache_miss_resp_data_rdata; + + end else begin + assign icache_miss_resp_data_rok = icache_miss_resp_w; + assign icache_miss_resp_meta_rok = icache_miss_resp_w; + assign icache_miss_resp_wok = 1'b1; + assign icache_miss_resp_meta_id = icache_miss_resp_wdata.mem_resp_r_id; + assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data; + + // In the case of uncacheable accesses, the Icache expects the data to be right-aligned + always_comb begin : icache_miss_resp_data_comb + if (!icache_miss_req_rdata.mem_req_cacheable) begin + automatic logic [ICACHE_CL_WORD_INDEX - 1:0] icache_miss_word_index; + automatic logic [63:0] icache_miss_word; + icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3+:ICACHE_CL_WORD_INDEX]; + icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64+:64]; + icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word}; + end else begin + icache_miss_rdata = icache_miss_resp_data_rdata; + end + end + end + endgenerate + + assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok, + icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK, + icache_miss_resp_o.user = '0, + icache_miss_resp_o.inv = '0, + icache_miss_resp_o.tid = icache_miss_resp_meta_id, + icache_miss_resp_o.data = icache_miss_rdata; + + // consume the Icache miss on the arrival of the response. The request + // metadata is decoded to forward the correct word in case of uncacheable + // Icache access + assign icache_miss_req_r = icache_miss_resp_meta_rok; + // }}} + + // Read request arbiter + // {{{ + logic mem_req_read_ready [2:0]; + logic mem_req_read_valid [2:0]; + hpdcache_mem_req_t mem_req_read [2:0]; + + logic mem_req_read_ready_arb; + logic mem_req_read_valid_arb; + hpdcache_mem_req_t mem_req_read_arb; + + assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q, + mem_req_read[0] = icache_miss_req_rdata; + + assign dcache_miss_ready_o = mem_req_read_ready[1], + mem_req_read_valid[1] = dcache_miss_valid_i, + mem_req_read[1] = dcache_miss_i; + + assign dcache_uc_read_ready_o = mem_req_read_ready[2], + mem_req_read_valid[2] = dcache_uc_read_valid_i, + mem_req_read[2] = dcache_uc_read_i; + + hpdcache_mem_req_read_arbiter #( + .N (3), + .hpdcache_mem_req_t(hpdcache_mem_req_t) + ) i_mem_req_read_arbiter ( + .clk_i, + .rst_ni, + + .mem_req_read_ready_o(mem_req_read_ready), + .mem_req_read_valid_i(mem_req_read_valid), + .mem_req_read_i (mem_req_read), + + .mem_req_read_ready_i(mem_req_read_ready_arb), + .mem_req_read_valid_o(mem_req_read_valid_arb), + .mem_req_read_o (mem_req_read_arb) + ); + // }}} + + // Read response demultiplexor + // {{{ + logic mem_resp_read_ready; + logic mem_resp_read_valid; + hpdcache_mem_resp_r_t mem_resp_read; + + logic mem_resp_read_ready_arb[2:0]; + logic mem_resp_read_valid_arb[2:0]; + hpdcache_mem_resp_r_t mem_resp_read_arb [2:0]; + + mem_resp_rt_t mem_resp_read_rt; + + always_comb begin + for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin + mem_resp_read_rt[i] = (i == int'( icache_miss_id_i)) ? 0 : + (i == int'(dcache_uc_read_id_i)) ? 2 : 1; + end + end + + hpdcache_mem_resp_demux #( + .N (3), + .resp_t (hpdcache_mem_resp_r_t), + .resp_id_t(hpdcache_mem_id_t) + ) i_mem_resp_read_demux ( + .clk_i, + .rst_ni, + + .mem_resp_ready_o(mem_resp_read_ready), + .mem_resp_valid_i(mem_resp_read_valid), + .mem_resp_id_i (mem_resp_read.mem_resp_r_id), + .mem_resp_i (mem_resp_read), + + .mem_resp_ready_i(mem_resp_read_ready_arb), + .mem_resp_valid_o(mem_resp_read_valid_arb), + .mem_resp_o (mem_resp_read_arb), + + .mem_resp_rt_i(mem_resp_read_rt) + ); + + assign icache_miss_resp_w = mem_resp_read_valid_arb[0], + icache_miss_resp_wdata = mem_resp_read_arb[0], + mem_resp_read_ready_arb[0] = icache_miss_resp_wok; + + assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1], + dcache_miss_resp_o = mem_resp_read_arb[1], + mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i; + + assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2], + dcache_uc_read_resp_o = mem_resp_read_arb[2], + mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i; + // }}} + + // Write request arbiter + // {{{ + logic mem_req_write_ready [1:0]; + logic mem_req_write_valid [1:0]; + hpdcache_mem_req_t mem_req_write [1:0]; + + logic mem_req_write_data_ready [1:0]; + logic mem_req_write_data_valid [1:0]; + hpdcache_mem_req_w_t mem_req_write_data [1:0]; + + logic mem_req_write_ready_arb; + logic mem_req_write_valid_arb; + hpdcache_mem_req_t mem_req_write_arb; + + logic mem_req_write_data_ready_arb; + logic mem_req_write_data_valid_arb; + hpdcache_mem_req_w_t mem_req_write_data_arb; + + assign dcache_wbuf_ready_o = mem_req_write_ready[0], + mem_req_write_valid[0] = dcache_wbuf_valid_i, + mem_req_write[0] = dcache_wbuf_i; + + assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0], + mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i, + mem_req_write_data[0] = dcache_wbuf_data_i; + + assign dcache_uc_write_ready_o = mem_req_write_ready[1], + mem_req_write_valid[1] = dcache_uc_write_valid_i, + mem_req_write[1] = dcache_uc_write_i; + + assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1], + mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i, + mem_req_write_data[1] = dcache_uc_write_data_i; + + hpdcache_mem_req_write_arbiter #( + .N (2), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t(hpdcache_mem_req_w_t) + ) i_mem_req_write_arbiter ( + .clk_i, + .rst_ni, + + .mem_req_write_ready_o(mem_req_write_ready), + .mem_req_write_valid_i(mem_req_write_valid), + .mem_req_write_i (mem_req_write), + + .mem_req_write_data_ready_o(mem_req_write_data_ready), + .mem_req_write_data_valid_i(mem_req_write_data_valid), + .mem_req_write_data_i (mem_req_write_data), + + .mem_req_write_ready_i(mem_req_write_ready_arb), + .mem_req_write_valid_o(mem_req_write_valid_arb), + .mem_req_write_o (mem_req_write_arb), + + .mem_req_write_data_ready_i(mem_req_write_data_ready_arb), + .mem_req_write_data_valid_o(mem_req_write_data_valid_arb), + .mem_req_write_data_o (mem_req_write_data_arb) + ); + // }}} + + // Write response demultiplexor + // {{{ + logic mem_resp_write_ready; + logic mem_resp_write_valid; + hpdcache_mem_resp_w_t mem_resp_write; + + logic mem_resp_write_ready_arb[1:0]; + logic mem_resp_write_valid_arb[1:0]; + hpdcache_mem_resp_w_t mem_resp_write_arb [1:0]; + + mem_resp_rt_t mem_resp_write_rt; + + always_comb begin + for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin + mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0; + end + end + + hpdcache_mem_resp_demux #( + .N (2), + .resp_t (hpdcache_mem_resp_w_t), + .resp_id_t(hpdcache_mem_id_t) + ) i_hpdcache_mem_resp_write_demux ( + .clk_i, + .rst_ni, + + .mem_resp_ready_o(mem_resp_write_ready), + .mem_resp_valid_i(mem_resp_write_valid), + .mem_resp_id_i (mem_resp_write.mem_resp_w_id), + .mem_resp_i (mem_resp_write), + + .mem_resp_ready_i(mem_resp_write_ready_arb), + .mem_resp_valid_o(mem_resp_write_valid_arb), + .mem_resp_o (mem_resp_write_arb), + + .mem_resp_rt_i(mem_resp_write_rt) + ); + + assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0], + dcache_wbuf_resp_o = mem_resp_write_arb[0], + mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i; + + assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1], + dcache_uc_write_resp_o = mem_resp_write_arb[1], + mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i; + // }}} + + // I$ miss pending + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) begin : icache_miss_pending_ff + if (!rst_ni) begin + icache_miss_pending_q <= 1'b0; + end else begin + icache_miss_pending_q <= ( (icache_miss_req_rok & mem_req_read_ready[0]) & ~icache_miss_pending_q) | + (~(icache_miss_req_r & icache_miss_req_rok) & icache_miss_pending_q); + end + end + // }}} + + // AXI adapters + // {{{ + axi_req_t axi_req; + axi_rsp_t axi_resp; + + hpdcache_mem_to_axi_write #( + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t), + .aw_chan_t (axi_aw_chan_t), + .w_chan_t (axi_w_chan_t), + .b_chan_t (axi_b_chan_t) + ) i_hpdcache_mem_to_axi_write ( + .req_ready_o(mem_req_write_ready_arb), + .req_valid_i(mem_req_write_valid_arb), + .req_i (mem_req_write_arb), + + .req_data_ready_o(mem_req_write_data_ready_arb), + .req_data_valid_i(mem_req_write_data_valid_arb), + .req_data_i (mem_req_write_data_arb), + + .resp_ready_i(mem_resp_write_ready), + .resp_valid_o(mem_resp_write_valid), + .resp_o (mem_resp_write), + + .axi_aw_valid_o(axi_req.aw_valid), + .axi_aw_o (axi_req.aw), + .axi_aw_ready_i(axi_resp.aw_ready), + + .axi_w_valid_o(axi_req.w_valid), + .axi_w_o (axi_req.w), + .axi_w_ready_i(axi_resp.w_ready), + + .axi_b_valid_i(axi_resp.b_valid), + .axi_b_i (axi_resp.b), + .axi_b_ready_o(axi_req.b_ready) + ); + + hpdcache_mem_to_axi_read #( + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t), + .ar_chan_t (axi_ar_chan_t), + .r_chan_t (axi_r_chan_t) + ) i_hpdcache_mem_to_axi_read ( + .req_ready_o(mem_req_read_ready_arb), + .req_valid_i(mem_req_read_valid_arb), + .req_i (mem_req_read_arb), + + .resp_ready_i(mem_resp_read_ready), + .resp_valid_o(mem_resp_read_valid), + .resp_o (mem_resp_read), + + .axi_ar_valid_o(axi_req.ar_valid), + .axi_ar_o (axi_req.ar), + .axi_ar_ready_i(axi_resp.ar_ready), + + .axi_r_valid_i(axi_resp.r_valid), + .axi_r_i (axi_resp.r), + .axi_r_ready_o(axi_req.r_ready) + ); + + assign axi_req_o = axi_req; + assign axi_resp = axi_resp_i; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + assert (HPDcacheMemIdWidth <= AxiIdWidth) + else $fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth"); + initial + assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1)) + else + $fatal( + "HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses" + ); + initial + assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1)) + else + $fatal( + "HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes" + ); + initial + assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) + else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line"); + initial + assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH) + else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line"); + // pragma translate_on + // }}} + +endmodule : cva6_hpdcache_subsystem_axi_arbiter diff --git a/test/type_param/core/cache_subsystem/cva6_icache.sv b/test/type_param/core/cache_subsystem/cva6_icache.sv new file mode 100644 index 00000000..37dd8d1d --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_icache.sv @@ -0,0 +1,584 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Instruction cache that is compatible with openpiton. +// +// Some notes: +// +// 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped +// to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width +// of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be +// consumed unconditionally. +// +// 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored) +// +// 3) NC accesses to I/O space are expected to return 32bit from memory. +// + + +module cva6_icache + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + /// ID to be used for read transactions + parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0 +) ( + input logic clk_i, + input logic rst_ni, + + /// flush the icache, flush and kill have to be asserted together + input logic flush_i, + /// enable icache + input logic en_i, + /// to performance counter + output logic miss_o, + // address translation requests + input icache_areq_t areq_i, + output icache_arsp_t areq_o, + // data requests + input icache_dreq_t dreq_i, + output icache_drsp_t dreq_o, + // refill port + input logic mem_rtrn_vld_i, + input icache_rtrn_t mem_rtrn_i, + output logic mem_data_req_o, + input logic mem_data_ack_i, + output icache_req_t mem_data_o +); + + // functions + function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh( + input logic [L1I_WAY_WIDTH-1:0] in); + logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + // signals + logic cache_en_d, cache_en_q; // cache is enabled + logic [riscv::VLEN-1:0] vaddr_d, vaddr_q; + logic paddr_is_nc; // asserted if physical address is non-cacheable + logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare + logic cache_rden; // triggers cache lookup + logic cache_wren; // triggers write to cacheline + logic + cmp_en_d, + cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal. + logic flush_d, flush_q; // used to register and signal pending flushes + + // replacement strategy + logic update_lfsr; // shift the LFSR + logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered + logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement + logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace + logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot) + logic all_ways_valid; // we need to switch repl strategy since all are valid + + // invalidations / flushing + logic inv_en; // incoming invalidations + logic inv_d, inv_q; // invalidation in progress + logic flush_en, flush_done; // used to flush cache entries + logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries + + // mem arrays + logic cl_we; // write enable to memory array + logic [ ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array + logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array + logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line + logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag + logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem + logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache + logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache + logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; // selected word from each cacheline + logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline + logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs + logic vld_we; // valid bits write enable + logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write + logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs + logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit + + // cpmtroller FSM + typedef enum logic [2:0] { + FLUSH, + IDLE, + READ, + MISS, + KILL_ATRANS, + KILL_MISS + } state_e; + state_e state_d, state_q; + + /////////////////////////////////////////////////////// + // address -> cl_index mapping, interface plumbing + /////////////////////////////////////////////////////// + + // extract tag from physical address, check if NC + assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q; + + // noncacheable if request goes to I/O space, or if cache is disabled + assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}} + )); + + // pass exception through + assign dreq_o.ex = areq_i.fetch_exception; + + // latch this in case we have to stall later on + // make sure this is 32bit aligned + assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q; + assign areq_o.fetch_vaddr = {vaddr_q[riscv::VLEN-1:2], 2'b0}; + + // split virtual address into index and offset to address cache arrays + assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH]; + + + if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset + // if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory + assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr[ICACHE_OFFSET_WIDTH-1:2], 2'b0} : + ( paddr_is_nc & mem_data_req_o ) ? {{ICACHE_OFFSET_WIDTH-1{1'b0}}, cl_offset_q[2]}<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case + cl_offset_q; + // request word address instead of cl address in case of NC access + assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit + {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl + end else begin : gen_piton_offset + // icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not. + // since the piton cache system replicates the data, we can always index the full CL + assign cl_offset_d = (dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr >> 2, 2'b0} : cl_offset_q; + + // request word address instead of cl address in case of NC access + assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit + {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl + end + + + assign mem_data_o.tid = RdTxId; + + assign mem_data_o.nc = paddr_is_nc; + // way that is being replaced + assign mem_data_o.way = repl_way; + assign dreq_o.vaddr = vaddr_q; + + // invalidations take two cycles + assign inv_d = inv_en; + + /////////////////////////////////////////////////////// + // main control logic + /////////////////////////////////////////////////////// + logic addr_ni; + assign addr_ni = config_pkg::is_inside_nonidempotent_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, areq_i.fetch_paddr} + ); + always_comb begin : p_fsm + // default assignment + state_d = state_q; + cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush + flush_en = 1'b0; + cmp_en_d = 1'b0; + cache_rden = 1'b0; + cache_wren = 1'b0; + inv_en = 1'b0; + flush_d = flush_q | flush_i; // register incoming flush + + // interfaces + dreq_o.ready = 1'b0; + areq_o.fetch_req = 1'b0; + dreq_o.valid = 1'b0; + mem_data_req_o = 1'b0; + // performance counter + miss_o = 1'b0; + + // handle invalidations unconditionally + // note: invald are mutually exclusive with + // ifills, since both arrive over the same IF + // however, we need to make sure below that we + // do not trigger a cache readout at the same time... + if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin + inv_en = 1'b1; + end + + unique case (state_q) + ////////////////////////////////// + // this clears all valid bits + FLUSH: begin + flush_en = 1'b1; + if (flush_done) begin + state_d = IDLE; + flush_d = 1'b0; + // if the cache was not enabled set this + cache_en_d = en_i; + end + end + ////////////////////////////////// + // wait for an incoming request + IDLE: begin + // only enable tag comparison if cache is enabled + cmp_en_d = cache_en_q; + + // handle pending flushes, or perform cache clear upon enable + if (flush_d || (en_i && !cache_en_q)) begin + state_d = FLUSH; + // wait for incoming requests + end else begin + // mem requests are for sure invals here + if (!mem_rtrn_vld_i) begin + dreq_o.ready = 1'b1; + // we have a new request + if (dreq_i.req) begin + cache_rden = 1'b1; + state_d = READ; + end + end + if (dreq_i.kill_s1) begin + state_d = IDLE; + end + end + end + ////////////////////////////////// + // check whether we have a hit + // in case the cache is disabled, + // or in case the address is NC, we + // reuse the miss mechanism to handle + // the request + READ: begin + areq_o.fetch_req = '1; + // only enable tag comparison if cache is enabled + cmp_en_d = cache_en_q; + // readout speculatively + cache_rden = cache_en_q; + + if (areq_i.fetch_valid && (!dreq_i.spec || ((CVA6Cfg.NonIdemPotenceEn && !addr_ni) || (!CVA6Cfg.NonIdemPotenceEn)))) begin + // check if we have to flush + if (flush_d) begin + state_d = IDLE; + // we have a hit or an exception output valid result + end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin + dreq_o.valid = ~dreq_i.kill_s2; // just don't output in this case + state_d = IDLE; + + // we can accept another request + // and stay here, but only if no inval is coming in + // note: we are not expecting ifill return packets here... + if (!mem_rtrn_vld_i) begin + dreq_o.ready = 1'b1; + if (dreq_i.req) begin + state_d = READ; + end + end + // if a request is being killed at this stage, + // we have to bail out and wait for the address translation to complete + if (dreq_i.kill_s1) begin + state_d = IDLE; + end + // we have a miss / NC transaction + end else if (dreq_i.kill_s2) begin + state_d = IDLE; + end else if (!inv_q) begin + cmp_en_d = 1'b0; + // only count this as a miss if the cache is enabled, and + // the address is cacheable + // send out ifill request + mem_data_req_o = 1'b1; + if (mem_data_ack_i) begin + miss_o = ~paddr_is_nc; + state_d = MISS; + end + end + // bail out if this request is being killed (and we missed on the TLB) + end else if (dreq_i.kill_s2 || flush_d) begin + state_d = KILL_ATRANS; + end + end + ////////////////////////////////// + // wait until the memory transaction + // returns. do not write to memory + // if the nc bit is set. + MISS: begin + // note: this is mutually exclusive with ICACHE_INV_REQ, + // so we do not have to check for invals here + if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin + state_d = IDLE; + // only return data if request is not being killed + if (!(dreq_i.kill_s2 || flush_d)) begin + dreq_o.valid = 1'b1; + // only write to cache if this address is cacheable + cache_wren = ~paddr_is_nc; + end + // bail out if this request is being killed + end else if (dreq_i.kill_s2 || flush_d) begin + state_d = KILL_MISS; + end + end + ////////////////////////////////// + // killed address translation, + // wait until paddr is valid, and go + // back to idle + KILL_ATRANS: begin + areq_o.fetch_req = '1; + if (areq_i.fetch_valid) begin + state_d = IDLE; + end + end + ////////////////////////////////// + // killed miss, + // wait until memory responds and + // go back to idle + KILL_MISS: begin + if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin + state_d = IDLE; + end + end + default: begin + // we should never get here + state_d = FLUSH; + end + endcase // state_q + end + + /////////////////////////////////////////////////////// + // valid bit invalidation and replacement strategy + /////////////////////////////////////////////////////// + + // note: it cannot happen that we get an invalidation + a cl replacement + // in the same cycle as these requests arrive via the same interface + // flushes take precedence over invalidations (it is ok if we ignore + // the inval since the cache is cleared anyway) + + assign flush_cnt_d = (flush_done) ? '0 : (flush_en) ? flush_cnt_q + 1 : flush_cnt_q; + + assign flush_done = (flush_cnt_q == (ICACHE_NUM_WORDS - 1)); + + // invalidation/clearing address + // flushing takes precedence over invals + assign vld_addr = (flush_en) ? flush_cnt_q : + (inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] : + cl_index; + + assign vld_req = (flush_en || cache_rden) ? '1 : + (mem_rtrn_i.inv.all && inv_en) ? '1 : + (mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh( + mem_rtrn_i.inv.way + ) : repl_way_oh_q; + + assign vld_wdata = (cache_wren) ? '1 : '0; + + assign vld_we = (cache_wren | inv_en | flush_en); + // assign vld_req = (vld_we | cache_rden); + + + // chose random replacement if all are valid + assign update_lfsr = cache_wren & all_ways_valid; + assign repl_way = (all_ways_valid) ? rnd_way : inv_way; + assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q; + + // enable signals for memory arrays + assign cl_req = (cache_rden) ? '1 : (cache_wren) ? repl_way_oh_q : '0; + assign cl_we = cache_wren; + + + // find invalid cache line + lzc #( + .WIDTH(ICACHE_SET_ASSOC) + ) i_lzc ( + .in_i (~vld_rdata), + .cnt_o (inv_way), + .empty_o(all_ways_valid) + ); + + // generate random cacheline index + lfsr #( + .LfsrWidth(8), + .OutWidth ($clog2(ariane_pkg::ICACHE_SET_ASSOC)) + ) i_lfsr ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .en_i (update_lfsr), + .out_o (rnd_way) + ); + + + /////////////////////////////////////////////////////// + // tag comparison, hit generation + /////////////////////////////////////////////////////// + + logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx; + + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel + assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i]; + assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:FETCH_WIDTH]; + assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH]; + end + + + lzc #( + .WIDTH(ICACHE_SET_ASSOC) + ) i_lzc_hit ( + .in_i (cl_hit), + .cnt_o (hit_idx), + .empty_o() + ); + + always_comb begin + if (cmp_en_q) begin + dreq_o.data = cl_sel[hit_idx]; + dreq_o.user = cl_user[hit_idx]; + end else begin + dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:FETCH_WIDTH]; + dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH]; + end + end + + /////////////////////////////////////////////////////// + // memory arrays and regs + /////////////////////////////////////////////////////// + + + logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[ICACHE_SET_ASSOC-1:0]; + + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram + // Tag RAM + sram #( + // tag + valid bit + .DATA_WIDTH(ICACHE_TAG_WIDTH + 1), + .NUM_WORDS (ICACHE_NUM_WORDS) + ) tag_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (vld_req[i]), + .we_i (vld_we), + .addr_i (vld_addr), + // we can always use the saved tag here since it takes a + // couple of cycle until we write to the cache upon a miss + .wuser_i('0), + .wdata_i({vld_wdata[i], cl_tag_q}), + .be_i ('1), + .ruser_o(), + .rdata_o(cl_tag_valid_rdata[i]) + ); + + assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0]; + assign vld_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH]; + + // Data RAM + sram #( + .USER_WIDTH(ICACHE_USER_LINE_WIDTH), + .DATA_WIDTH(ICACHE_LINE_WIDTH), + .USER_EN (ariane_pkg::FETCH_USER_EN), + .NUM_WORDS (ICACHE_NUM_WORDS) + ) data_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (cl_req[i]), + .we_i (cl_we), + .addr_i (cl_index), + .wuser_i(mem_rtrn_i.user), + .wdata_i(mem_rtrn_i.data), + .be_i ('1), + .ruser_o(cl_ruser[i]), + .rdata_o(cl_rdata[i]) + ); + end + + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + cl_tag_q <= '0; + flush_cnt_q <= '0; + vaddr_q <= '0; + cmp_en_q <= '0; + cache_en_q <= '0; + flush_q <= '0; + state_q <= FLUSH; + cl_offset_q <= '0; + repl_way_oh_q <= '0; + inv_q <= '0; + end else begin + cl_tag_q <= cl_tag_d; + flush_cnt_q <= flush_cnt_d; + vaddr_q <= vaddr_d; + cmp_en_q <= cmp_en_d; + cache_en_q <= cache_en_d; + flush_q <= flush_d; + state_q <= state_d; + cl_offset_q <= cl_offset_d; + repl_way_oh_q <= repl_way_oh_d; + inv_q <= inv_d; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + repl_inval0 : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld)) + else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously"); + + repl_inval1 : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren) + else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously"); + + invalid_state : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS})) + else $fatal(1, "[l1 icache] fsm reached an invalid state"); + + hot1 : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0( + cl_hit + )) + else $fatal(1, "[l1 icache] cl_hit signal must be hot1"); + + // this is only used for verification! + logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; + logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; + logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror + if (!rst_ni) begin + vld_mirror <= '{default: '0}; + tag_mirror <= '{default: '0}; + end else begin + for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin + if (vld_req[i] & vld_we) begin + vld_mirror[vld_addr][i] <= vld_wdata[i]; + tag_mirror[vld_addr][i] <= cl_tag_q; + end + end + end + end + + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl + assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata); + end + + tag_write_duplicate : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test)) + else $fatal(1, "[l1 icache] cannot allocate a CL that is already present in the cache"); + + + initial begin + // assert wrong parameterizations + assert (ICACHE_INDEX_WIDTH <= 12) + else $fatal(1, "[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages"); + end +`endif + //pragma translate_on + +endmodule // cva6_icache diff --git a/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv b/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv new file mode 100644 index 00000000..7579fe4b --- /dev/null +++ b/test/type_param/core/cache_subsystem/cva6_icache_axi_wrapper.sv @@ -0,0 +1,202 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Nils Wistoff , ETH Zurich +// Date: 07.09.2020 +// Description: wrapper module to connect the L1I$ to a 64bit AXI bus. +// + +module cva6_icache_axi_wrapper + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + input riscv::priv_lvl_t priv_lvl_i, + + input logic flush_i, // flush the icache, flush and kill have to be asserted together + input logic en_i, // enable icache + output logic miss_o, // to performance counter + // address translation requests + input icache_areq_t areq_i, + output icache_arsp_t areq_o, + // data requests + input icache_dreq_t dreq_i, + output icache_drsp_t dreq_o, + // AXI refill port + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i +); + + localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) + + (DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ; + + logic icache_mem_rtrn_vld; + icache_rtrn_t icache_mem_rtrn; + logic icache_mem_data_req; + logic icache_mem_data_ack; + icache_req_t icache_mem_data; + + logic axi_rd_req; + logic axi_rd_gnt; + logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr; + logic [ $clog2(AxiNumWords)-1:0] axi_rd_blen; + logic [ 2:0] axi_rd_size; + logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in; + logic axi_rd_rdy; + logic axi_rd_lock; + logic axi_rd_last; + logic axi_rd_valid; + logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data; + logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out; + logic axi_rd_exokay; + + logic req_valid_d, req_valid_q; + icache_req_t req_data_d, req_data_q; + logic first_d, first_q; + logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + rd_shift_d, rd_shift_q; + + // Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but + // required by AXI). + assign req_valid_d = ~axi_rd_gnt & (icache_mem_data_req | req_valid_q); + + // Update read request information on a new request + assign req_data_d = (icache_mem_data_req) ? icache_mem_data : req_data_q; + + // We have a new or pending read request + assign axi_rd_req = icache_mem_data_req | req_valid_q; + assign axi_rd_addr = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr); + + // Fetch a full cache line on a cache miss, or a single word on a bypassed access + assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH / 64 - 1; + assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth / 8); // Maximum + assign axi_rd_id_in = req_data_d.tid; + assign axi_rd_rdy = 1'b1; + assign axi_rd_lock = 1'b0; + + // Immediately acknowledge read request. This is an implicit requirement for the icache. + assign icache_mem_data_ack = icache_mem_data_req; + + // Return data as soon as last word arrives + assign icache_mem_rtrn_vld = axi_rd_valid & axi_rd_last; + assign icache_mem_rtrn.data = rd_shift_d; + assign icache_mem_rtrn.tid = req_data_q.tid; + assign icache_mem_rtrn.rtype = wt_cache_pkg::ICACHE_IFILL_ACK; + assign icache_mem_rtrn.inv = '0; + + // ------- + // I-Cache + // ------- + cva6_icache #( + // use ID 0 for icache reads + .CVA6Cfg(CVA6Cfg), + .RdTxId (0) + ) i_cva6_icache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .en_i (en_i), + .miss_o (miss_o), + .areq_i (areq_i), + .areq_o (areq_o), + .dreq_i (dreq_i), + .dreq_o (dreq_o), + .mem_rtrn_vld_i(icache_mem_rtrn_vld), + .mem_rtrn_i (icache_mem_rtrn), + .mem_data_req_o(icache_mem_data_req), + .mem_data_ack_i(icache_mem_data_ack), + .mem_data_o (icache_mem_data) + ); + + // -------- + // AXI shim + // -------- + axi_shim #( + .CVA6Cfg (CVA6Cfg), + .AxiNumWords(AxiNumWords), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_rsp_t) + ) i_axi_shim ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .rd_req_i (axi_rd_req), + .rd_gnt_o (axi_rd_gnt), + .rd_addr_i (axi_rd_addr), + .rd_blen_i (axi_rd_blen), + .rd_size_i (axi_rd_size), + .rd_id_i (axi_rd_id_in), + .rd_rdy_i (axi_rd_rdy), + .rd_lock_i (axi_rd_lock), + .rd_last_o (axi_rd_last), + .rd_valid_o (axi_rd_valid), + .rd_data_o (axi_rd_data), + .rd_user_o (), + .rd_id_o (axi_rd_id_out), + .rd_exokay_o(axi_rd_exokay), + .wr_req_i ('0), + .wr_gnt_o (), + .wr_addr_i ('0), + .wr_data_i ('0), + .wr_user_i ('0), + .wr_be_i ('0), + .wr_blen_i ('0), + .wr_size_i ('0), + .wr_id_i ('0), + .wr_lock_i ('0), + .wr_atop_i ('0), + .wr_rdy_i ('0), + .wr_valid_o (), + .wr_id_o (), + .wr_exokay_o(), + .axi_req_o (axi_req_o), + .axi_resp_i (axi_resp_i) + ); + + // Buffer burst data in shift register + always_comb begin : p_axi_rtrn_shift + first_d = first_q; + rd_shift_d = rd_shift_q; + + if (axi_rd_valid) begin + first_d = axi_rd_last; + if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + rd_shift_d = axi_rd_data; + end else begin + rd_shift_d = {axi_rd_data, rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]}; + end + + // If this is a single word transaction, we need to make sure that word is placed at offset 0 + if (first_q) begin + rd_shift_d[0] = axi_rd_data; + end + end + end + + // Registers + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf + if (!rst_ni) begin + req_valid_q <= 1'b0; + req_data_q <= '0; + first_q <= 1'b1; + rd_shift_q <= '0; + end else begin + req_valid_q <= req_valid_d; + req_data_q <= req_data_d; + first_q <= first_d; + rd_shift_q <= rd_shift_d; + end + end + +endmodule // cva6_icache_axi_wrapper diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh b/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh new file mode 100644 index 00000000..5e92a791 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/include/hpdcache_typedef.svh @@ -0,0 +1,62 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : February, 2023 + * Description : HPDcache Types' Definition + * History : + */ +`ifndef __HPDCACHE_TYPEDEF_SVH__ +`define __HPDCACHE_TYPEDEF_SVH__ + +`define HPDCACHE_TYPEDEF_MEM_REQ_T(__name__, addr_t, id_t) \ + typedef struct packed { \ + addr_t mem_req_addr; \ + hpdcache_pkg::hpdcache_mem_len_t mem_req_len; \ + hpdcache_pkg::hpdcache_mem_size_t mem_req_size; \ + id_t mem_req_id; \ + hpdcache_pkg::hpdcache_mem_command_e mem_req_command; \ + hpdcache_pkg::hpdcache_mem_atomic_e mem_req_atomic; \ + logic mem_req_cacheable; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_RESP_R_T(__name__, id_t, data_t) \ + typedef struct packed { \ + hpdcache_pkg::hpdcache_mem_error_e mem_resp_r_error; \ + id_t mem_resp_r_id; \ + data_t mem_resp_r_data; \ + logic mem_resp_r_last; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_REQ_W_T(__name__, data_t, be_t) \ + typedef struct packed { \ + data_t mem_req_w_data; \ + be_t mem_req_w_be; \ + logic mem_req_w_last; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_RESP_W_T(__name__, id_t) \ + typedef struct packed { \ + logic mem_resp_w_is_atomic; \ + hpdcache_pkg::hpdcache_mem_error_e mem_resp_w_error; \ + id_t mem_resp_w_id; \ + } __name__ + +`endif diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv new file mode 100644 index 00000000..d3e0a112 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv @@ -0,0 +1,181 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : November 22, 2022 + * Description : Refill data downsize + * History : + */ +module hpdcache_data_downsize +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int WR_WIDTH = 0, + parameter int RD_WIDTH = 0, + parameter int DEPTH = 0, + + localparam type wdata_t = logic [WR_WIDTH-1:0], + localparam type rdata_t = logic [RD_WIDTH-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + input logic w_i, + output logic wok_o, + input wdata_t wdata_i, + + input logic r_i, + output logic rok_o, + output rdata_t rdata_o +); +// }}} +// Architecture +// {{{ + // Local definitions + // {{{ + localparam int RD_WORDS = WR_WIDTH/RD_WIDTH; + localparam int PTR_WIDTH = $clog2(DEPTH); + localparam int WORDCNT_WIDTH = $clog2(RD_WORDS); + typedef logic [PTR_WIDTH-1:0] bufptr_t; + typedef logic [WORDCNT_WIDTH-1:0] wordptr_t; + typedef logic [PTR_WIDTH:0] occupancy_t; + // }}} + + // Internal registers and signals + // {{{ + rdata_t [DEPTH-1:0][RD_WORDS-1:0] buf_q; + bufptr_t wrptr_q, wrptr_d; + bufptr_t rdptr_q, rdptr_d; + occupancy_t used_q, used_d; + wordptr_t [DEPTH-1:0] words_q, words_d; + logic words_set; + logic full, empty; + // }}} + + // Control-Path + // {{{ + assign full = (hpdcache_uint'(used_q) == DEPTH), + empty = (used_q == 0), + wok_o = ~full, + rok_o = ~empty; + + always_comb + begin : ctrl_comb + automatic logic used_inc, used_dec; + automatic logic words_dec; + + rdptr_d = rdptr_q; + wrptr_d = wrptr_q; + used_dec = 1'b0; + used_inc = 1'b0; + words_dec = 1'b0; + words_set = 1'b0; + + if (w_i && wok_o) begin + used_inc = 1'b1; + words_set = 1'b1; + if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin + wrptr_d = 0; + end else begin + wrptr_d = wrptr_q + 1; + end + end + + if (r_i && rok_o) begin + words_dec = (words_q[rdptr_q] > 0); + if (words_q[rdptr_q] == 0) begin + used_dec = 1'b1; + if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin + rdptr_d = 0; + end else begin + rdptr_d = rdptr_q + 1; + end + end + end + + case ({used_inc, used_dec}) + 2'b10 : used_d = used_q + 1; + 2'b01 : used_d = used_q - 1; + default: used_d = used_q; + endcase + + words_d = words_q; + if (words_set) begin + words_d[wrptr_q] = wordptr_t'(RD_WORDS - 1); + end + if (words_dec) begin + words_d[rdptr_q] = words_q[rdptr_q] - 1; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : ctrl_ff + if (!rst_ni) begin + rdptr_q <= 0; + wrptr_q <= 0; + used_q <= 0; + words_q <= 0; + end else begin + rdptr_q <= rdptr_d; + wrptr_q <= wrptr_d; + used_q <= used_d; + words_q <= words_d; + end + end + // }}} + + // Data-Path + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : buf_ff + if (!rst_ni) begin + buf_q <= '0; + end else begin + if (words_set) begin + buf_q[wrptr_q] <= wdata_i; + end + end + end + + assign rdata_o = buf_q[rdptr_q][RD_WORDS - hpdcache_uint'(words_q[rdptr_q]) - 1]; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin : initial_assertions + assert (DEPTH > 0) else $error("DEPTH must be greater than 0"); + assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0"); + assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0"); + assert (RD_WIDTH < WR_WIDTH) else $error("RD_WIDTH must be less to WR_WIDTH"); + assert ((WR_WIDTH % RD_WIDTH) == 0) else $error("WR_WIDTH must be a multiple RD_WIDTH"); + end + // pragma translate_on + // }}} +// }}} +endmodule +// }}} diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv new file mode 100644 index 00000000..c4af81cb --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv @@ -0,0 +1,181 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : November 22, 2022 + * Description : Refill data upsize + * History : + */ +module hpdcache_data_upsize +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int WR_WIDTH = 0, + parameter int RD_WIDTH = 0, + parameter int DEPTH = 0, + + localparam type wdata_t = logic [WR_WIDTH-1:0], + localparam type rdata_t = logic [RD_WIDTH-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + input logic w_i, + input logic wlast_i, + output logic wok_o, + input wdata_t wdata_i, + + input logic r_i, + output logic rok_o, + output rdata_t rdata_o +); +// }}} +// Architecture +// {{{ + // Local definitions + // {{{ + localparam int WR_WORDS = RD_WIDTH/WR_WIDTH; + localparam int PTR_WIDTH = $clog2(DEPTH); + localparam int WORDCNT_WIDTH = $clog2(WR_WORDS); + typedef logic [PTR_WIDTH-1:0] bufptr_t; + typedef logic [WORDCNT_WIDTH-1:0] wordptr_t; + typedef logic [PTR_WIDTH:0] occupancy_t; + // }}} + + // Internal registers and signals + // {{{ + wdata_t [DEPTH-1:0][WR_WORDS-1:0] buf_q; + bufptr_t wrptr_q, wrptr_d; + bufptr_t rdptr_q, rdptr_d; + occupancy_t used_q, used_d; + wordptr_t [DEPTH-1:0] words_q, words_d; + logic full, empty; + logic shift; + // }}} + + // Control-Path + // {{{ + assign full = (hpdcache_uint'(used_q) == DEPTH), + empty = (used_q == 0), + wok_o = ~full, + rok_o = ~empty; + + always_comb + begin : ctrl_comb + automatic logic used_inc, used_dec; + automatic logic words_inc, words_reset; + + wrptr_d = wrptr_q; + rdptr_d = rdptr_q; + words_d = words_q; + used_dec = 1'b0; + used_inc = 1'b0; + words_reset = 1'b0; + words_inc = 1'b0; + shift = 1'b0; + + if (w_i && wok_o) begin + shift = 1'b1; + words_inc = (hpdcache_uint'(words_q[wrptr_q]) < (WR_WORDS-1)); + if (hpdcache_uint'(words_q[wrptr_q]) == (WR_WORDS-1) || wlast_i) begin + used_inc = 1'b1; + if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin + wrptr_d = 0; + end else begin + wrptr_d = wrptr_q + 1; + end + end + end + + if (r_i && rok_o) begin + used_dec = 1'b1; + words_reset = 1'b1; + if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin + rdptr_d = 0; + end else begin + rdptr_d = rdptr_q + 1; + end + end + + case ({used_inc, used_dec}) + 2'b10 : used_d = used_q + 1; + 2'b01 : used_d = used_q - 1; + default: used_d = used_q; + endcase + + if (words_inc) words_d[wrptr_q] = words_q[wrptr_q] + 1; + if (words_reset) words_d[rdptr_q] = 0; + end + + + always_ff @(posedge clk_i or negedge rst_ni) + begin : ctrl_ff + if (!rst_ni) begin + rdptr_q <= 0; + wrptr_q <= 0; + used_q <= 0; + words_q <= '0; + end else begin + rdptr_q <= rdptr_d; + wrptr_q <= wrptr_d; + used_q <= used_d; + words_q <= words_d; + end + end + // }}} + + // Data-Path + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : buf_ff + if (!rst_ni) begin + buf_q <= '0; + end else begin + if (shift) begin + buf_q[wrptr_q][words_q[wrptr_q]] <= wdata_i; + end + end + end + + assign rdata_o = buf_q[rdptr_q]; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin : initial_assertions + assert (DEPTH > 0) else $error("DEPTH must be greater than 0"); + assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0"); + assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0"); + assert (WR_WIDTH < RD_WIDTH) else $error("WR_WIDTH must be less to RD_WIDTH"); + assert ((RD_WIDTH % WR_WIDTH) == 0) else $error("RD_WIDTH must be a multiple WR_WIDTH"); + end + // pragma translate_on + // }}} +// }}} +endmodule +// }}} diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv new file mode 100644 index 00000000..3be21e08 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv @@ -0,0 +1,69 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Simple multiplexor + * History : + */ +module hpdcache_demux +// Parameters +// {{{ +#( + // Number of outputs + parameter int unsigned NOUTPUT = 0, + + // Width in bits of each input + parameter int unsigned DATA_WIDTH = 0, + + // Selector signal is one-hot encoded + parameter bit ONE_HOT_SEL = 0, + + // Compute the width of the selection signal + localparam int unsigned NOUTPUT_LOG2 = $clog2(NOUTPUT), + localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NOUTPUT : NOUTPUT_LOG2, + + localparam type data_t = logic [DATA_WIDTH-1:0], + localparam type sel_t = logic [SEL_WIDTH-1:0] +) +// }}} + +// Ports +// {{{ +( + input data_t data_i, + input sel_t sel_i, + output data_t [NOUTPUT-1:0] data_o +); +// }}} + + generate + always_comb + begin : demux_comb + for (int unsigned i = 0; i < NOUTPUT; i++) begin + if (!ONE_HOT_SEL) begin + data_o[i] = (sel_t'(i) == sel_i) ? data_i : '0; + end else begin + data_o[i] = sel_i[i] ? data_i : '0; + end + end + end + endgenerate +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv new file mode 100644 index 00000000..ba3be5f4 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv @@ -0,0 +1,167 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : FIFO buffer (using registers) + * History : + */ +module hpdcache_fifo_reg + // Parameters + // {{{ +#( + parameter int unsigned FIFO_DEPTH = 0, + parameter bit FEEDTHROUGH = 1'b0, + parameter type fifo_data_t = logic +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic w_i, + output logic wok_o, + input fifo_data_t wdata_i, + input logic r_i, + output logic rok_o, + output fifo_data_t rdata_o +); + // }}} + + /* + * Single-entry FIFO buffer -> synchronization buffer + */ + if (FIFO_DEPTH == 1) begin : gen_sync_buffer + hpdcache_sync_buffer #( + .FEEDTHROUGH (FEEDTHROUGH), + .data_t (fifo_data_t) + ) i_sync_buffer ( + .clk_i, + .rst_ni, + .w_i, + .wok_o, + .wdata_i, + .r_i, + .rok_o, + .rdata_o + ); + + /* + * Multi-entry FIFO buffer + */ + end else if (FIFO_DEPTH > 0) begin : gen_fifo + // Declaration of constants, types and functions + // {{{ + typedef logic unsigned [$clog2(FIFO_DEPTH)-1:0] fifo_addr_t; + // }}} + + // Declaration of internal wires and registers + // {{{ + fifo_data_t [FIFO_DEPTH-1:0] fifo_mem_q; + fifo_addr_t rptr_q, rptr_d; // read pointer + fifo_addr_t wptr_q, wptr_d; // write pointer + logic crossover_q, crossover_d; // write pointer has wrap + logic rexec, wexec; + logic rptr_max, wptr_max; + logic match_ptr; + logic empty, full; + // }}} + + // Global control signals + // {{{ + assign match_ptr = (wptr_q == rptr_q); + + assign empty = match_ptr & ~crossover_q, + full = match_ptr & crossover_q; + + assign rok_o = ~empty | (FEEDTHROUGH & w_i), + wok_o = ~full | (FEEDTHROUGH & r_i); + + assign rexec = r_i & ~empty, + wexec = w_i & (( FEEDTHROUGH & ((empty & ~r_i) | (full & r_i) | (~full & ~empty))) | + (~FEEDTHROUGH & ~full)); + + // }}} + + // Control of read and write pointers + // {{{ + assign rptr_max = (rptr_q == fifo_addr_t'(FIFO_DEPTH-1)); + assign wptr_max = (wptr_q == fifo_addr_t'(FIFO_DEPTH-1)); + + always_comb + begin : fifo_ctrl_comb + rptr_d = rptr_q; + wptr_d = wptr_q; + crossover_d = crossover_q; + + if (rexec) begin + rptr_d = rptr_max ? 0 : rptr_q + 1; + end + + if (wexec) begin + wptr_d = wptr_max ? 0 : wptr_q + 1; + end + + if (wexec && wptr_max) begin + crossover_d = 1'b1; + end else if (rexec && rptr_max) begin + crossover_d = 1'b0; + end + end + // }}} + + // FIFO buffer memory management + // {{{ + always_ff @(posedge clk_i) + begin + if (wexec) fifo_mem_q[wptr_q] <= wdata_i; + end + + assign rdata_o = FEEDTHROUGH && empty ? wdata_i : fifo_mem_q[rptr_q]; + // }}} + + // Setting of internal state + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + rptr_q <= 0; + wptr_q <= 0; + crossover_q <= 1'b0; + end else begin + rptr_q <= rptr_d; + wptr_q <= wptr_d; + crossover_q <= crossover_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + rptr_ahead_wptr_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + ((rptr_q <= wptr_q) && !crossover_q) || + ((rptr_q >= wptr_q) && crossover_q)) else + $error("fifo: read pointer is ahead of the write pointer"); + // pragma translate_on + // }}} + end +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv new file mode 100644 index 00000000..bbd8d90e --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv @@ -0,0 +1,85 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Fixed-Priority Arbiter + * History : + */ +module hpdcache_fxarb + // Parameters + // {{{ +#( + // Number of requesters + parameter int unsigned N = 0 +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic [N-1:0] req_i, + output logic [N-1:0] gnt_o, + input logic ready_i +); + // }}} + + // Declaration of internal wires and registers + // {{{ + logic [N-1:0] gnt_q, gnt; + logic wait_q; + // }}} + + // Compute the grant vector + // {{{ + hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i), .val_o(gnt)); + // }}} + + // Compute the output grant vector + // {{{ + assign gnt_o = wait_q ? gnt_q : gnt; + // }}} + + // Setting of internal state + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + wait_q <= 1'b0; + gnt_q <= '0; + end else begin + wait_q <= ~ready_i & (wait_q | (|req_i)); + if (!ready_i && !wait_q && (|req_i)) begin + gnt_q <= gnt; + end + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0(gnt_o)) else $error("arbiter: granting more than one requester"); + // pragma translate_on + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv new file mode 100644 index 00000000..d78e1ebf --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv @@ -0,0 +1,79 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Author(s) : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Simple multiplexor + * History : + */ +module hpdcache_mux + // Parameters + // {{{ +#( + // Number of inputs + parameter int unsigned NINPUT = 0, + + // Width in bits of each input + parameter int unsigned DATA_WIDTH = 0, + + // Selector signal is one-hot encoded + parameter bit ONE_HOT_SEL = 0, + + // Compute the width of the selection signal + localparam int unsigned NINPUT_LOG2 = $clog2(NINPUT), + localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NINPUT : NINPUT_LOG2, + + localparam type data_t = logic [DATA_WIDTH-1:0], + localparam type sel_t = logic [SEL_WIDTH-1:0] +) + // }}} + + // Ports + // {{{ +( + input data_t [NINPUT-1:0] data_i, + input sel_t sel_i, + output data_t data_o +); + // }}} + + generate + // Selector is one-hot encoded + if (ONE_HOT_SEL == 1) begin + always_comb + begin : data_out_mux_comb + data_o = '0; + for (int unsigned i = 0; i < NINPUT; i++) begin + data_o |= sel_i[i] ? data_i[i] : '0; + end + end + + // Selector is binary encoded + end else begin + always_comb + begin : data_out_mux_comb + data_o = '0; + for (int unsigned i = 0; i < NINPUT; i++) begin + data_o |= (i == int'(sel_i)) ? data_i[i] : '0; + end + end + end + endgenerate +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv new file mode 100644 index 00000000..36fe5bcf --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv @@ -0,0 +1,43 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Priority One-hot Encoder + * History : + */ +module hpdcache_prio_1hot_encoder + // Parameters +#( + parameter int unsigned N = 0 +) + // Ports +( + input logic [N-1:0] val_i, + output logic [N-1:0] val_o +); + + generate + assign val_o[0] = val_i[0]; + for (genvar i = 1; i < int'(N); i++) begin : prio_gen + assign val_o[i] = val_i[i] & ~(|val_i[i-1:0]); + end + endgenerate +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv new file mode 100644 index 00000000..184e6fbf --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv @@ -0,0 +1,63 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : 1RW register bank with write byte enable + * History : + */ +module hpdcache_regbank_wbyteenable_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + for (int i = 0; i < DATA_SIZE/8; i++) begin + if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8]; + end + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_regbank_wbyteenable_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv new file mode 100644 index 00000000..e185bc40 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv @@ -0,0 +1,61 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : 1RW register bank with write bit mask + * History : + */ +module hpdcache_regbank_wmask_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask); + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_regbank_wmask_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv new file mode 100644 index 00000000..9595725d --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv @@ -0,0 +1,121 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/** + * Author(s) : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Round-Robin Arbiter + * Based on design from + * http://www.rtlery.com/articles/how-design-round-robin-arbiter + * History : + */ +module hpdcache_rrarb + // Parameters + // {{{ +#( + // Number of requesters + parameter int unsigned N = 0 +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic [N-1:0] req_i, + output logic [N-1:0] gnt_o, + input logic ready_i +); + // }}} + + // Declaration of internal wires and registers + // {{{ + logic [N-1:0] gnt_q, gnt; + logic [N-1:0] nxt; + logic wait_q; + logic [N-1:0] mask, gnt_msk, gnt_nomsk; + logic pending; + genvar gen_i; + // }}} + + // Elaboration-time assertions + // {{{ + // pragma translate_off + generate + if (N == 0) $error("N must be greater than 0"); + endgenerate + // pragma translate_on + // }}} + + // Compute the thermometer mask vector + // {{{ + generate + if (N > 1) begin : gen_nxt_gt_1 + assign nxt = {gnt_q[N-2:0], gnt_q[N-1]}; + end else begin : gen_nxt_1 + assign nxt = gnt_q[0]; + end + + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : gen_mask + assign mask[gen_i] = |nxt[gen_i:0]; + end + endgenerate + // }}} + + // Compute the grant vector + // {{{ + hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i & mask), .val_o(gnt_msk)); + hpdcache_prio_1hot_encoder #(.N(N)) prio_nomsk_i (.val_i(req_i) , .val_o(gnt_nomsk)); + assign gnt = |gnt_msk ? gnt_msk : gnt_nomsk; + // }}} + + // Compute the output grant vector + // {{{ + assign gnt_o = wait_q ? gnt_q : gnt; + // }}} + + // Setting of internal state + // {{{ + assign pending = |req_i; + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + wait_q <= 1'b0; + gnt_q <= {1'b1, {N-1{1'b0}}}; + end else begin + wait_q <= ~ready_i & (wait_q | pending); + if (!wait_q && pending) begin + gnt_q <= gnt; + end + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + gnt_at_most_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0(gnt)) else $error("arbiter: granting more than one requester"); + gnt_q_exactly_one_requester: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot(gnt_q)) else $error("arbiter: grant state is not one-hot"); + // pragma translate_on + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv new file mode 100644 index 00000000..d4cab7de --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv @@ -0,0 +1,56 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for Behavioral SRAM macros + * History : + */ +module hpdcache_sram +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .rdata + ); + +endmodule : hpdcache_sram diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv new file mode 100644 index 00000000..43bdb450 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv @@ -0,0 +1,58 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for 1RW SRAM macros implementing a write byte enable + * History : + */ +module hpdcache_sram_wbyteenable +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_wbyteenable_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .wbyteenable, + .rdata + ); + +endmodule : hpdcache_sram_wbyteenable diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv new file mode 100644 index 00000000..a4771e3b --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv @@ -0,0 +1,58 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for 1RW SRAM macros implementing write bit mask + * History : + */ +module hpdcache_sram_wmask +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_wmask_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .wmask, + .rdata + ); + +endmodule : hpdcache_sram_wmask diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv new file mode 100644 index 00000000..863c5885 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv @@ -0,0 +1,89 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : October, 2023 + * Description : Synchronization buffer + * History : + */ +module hpdcache_sync_buffer + // Parameters + // {{{ +#( + parameter bit FEEDTHROUGH = 1'b0, + parameter type data_t = logic +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic w_i, + output logic wok_o, + input data_t wdata_i, + input logic r_i, + output logic rok_o, + output data_t rdata_o +); + // }}} + + // Declaration of internal wires and registers + // {{{ + data_t buf_q; + logic buf_we; + logic valid_q, valid_d; + // }}} + + // Global control signals + // {{{ + assign rok_o = valid_q | (FEEDTHROUGH & w_i), + wok_o = ~valid_q | (FEEDTHROUGH & r_i); + + assign buf_we = w_i & ((FEEDTHROUGH & ~(valid_q ^ r_i)) | (~FEEDTHROUGH & ~valid_q)); + // }}} + + // Control of buffer + // {{{ + assign valid_d = buf_we | (valid_q & ~r_i); + // }}} + + // FIFO buffer memory management + // {{{ + always_ff @(posedge clk_i) + begin + if (buf_we) buf_q <= wdata_i; + end + + assign rdata_o = FEEDTHROUGH && !valid_q ? wdata_i : buf_q; + // }}} + + // Setting of internal state + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + valid_q <= 1'b0; + end else begin + valid_q <= valid_d; + end + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv new file mode 100644 index 00000000..7288c731 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv @@ -0,0 +1,60 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : SRAM behavioral model + * History : + */ +module hpdcache_sram_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= wdata; + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv new file mode 100644 index 00000000..0e5e2256 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv @@ -0,0 +1,63 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Behavioral model of a 1RW SRAM with write byte enable + * History : + */ +module hpdcache_sram_wbyteenable_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + for (int i = 0; i < DATA_SIZE/8; i++) begin + if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8]; + end + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_wbyteenable_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv new file mode 100644 index 00000000..5058ba28 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv @@ -0,0 +1,61 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Behavioral model of a 1RW SRAM with write bit mask + * History : + */ +module hpdcache_sram_wmask_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask); + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_wmask_1rw diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv new file mode 100644 index 00000000..861e60c2 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv @@ -0,0 +1,658 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache top + * History : + */ +module hpdcache +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int NREQUESTERS = 1, + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic +) + // }}} + + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Force the write buffer to send all pending writes + input logic wbuf_flush_i, + + // Core request interface + // 1st cycle + input logic core_req_valid_i [NREQUESTERS-1:0], + output logic core_req_ready_o [NREQUESTERS-1:0], + input hpdcache_req_t core_req_i [NREQUESTERS-1:0], + // 2nd cycle + input logic core_req_abort_i [NREQUESTERS-1:0], + input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0], + input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0], + + // Core response interface + output logic core_rsp_valid_o [NREQUESTERS-1:0], + output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0], + + // Miss read interface + input logic mem_req_miss_read_ready_i, + output logic mem_req_miss_read_valid_o, + output hpdcache_mem_req_t mem_req_miss_read_o, + + output logic mem_resp_miss_read_ready_o, + input logic mem_resp_miss_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_miss_read_i, + + // Write-buffer write interface + input logic mem_req_wbuf_write_ready_i, + output logic mem_req_wbuf_write_valid_o, + output hpdcache_mem_req_t mem_req_wbuf_write_o, + + input logic mem_req_wbuf_write_data_ready_i, + output logic mem_req_wbuf_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_wbuf_write_data_o, + + output logic mem_resp_wbuf_write_ready_o, + input logic mem_resp_wbuf_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_wbuf_write_i, + + // Uncached read interface + input logic mem_req_uc_read_ready_i, + output logic mem_req_uc_read_valid_o, + output hpdcache_mem_req_t mem_req_uc_read_o, + + output logic mem_resp_uc_read_ready_o, + input logic mem_resp_uc_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_uc_read_i, + + // Uncached write interface + input logic mem_req_uc_write_ready_i, + output logic mem_req_uc_write_valid_o, + output hpdcache_mem_req_t mem_req_uc_write_o, + + input logic mem_req_uc_write_data_ready_i, + output logic mem_req_uc_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_uc_write_data_o, + + output logic mem_resp_uc_write_ready_o, + input logic mem_resp_uc_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_uc_write_i, + + // Performance events + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o, + output logic evt_stall_o, + + // Status interface + output logic wbuf_empty_o, + + // Configuration interface + input logic cfg_enable_i, + input wbuf_timecnt_t cfg_wbuf_threshold_i, + input logic cfg_wbuf_reset_timecnt_on_write_i, + input logic cfg_wbuf_sequential_waw_i, + input logic cfg_wbuf_inhibit_write_coalescing_i, + input logic cfg_prefetch_updt_plru_i, + input logic cfg_error_on_cacheable_amo_i, + input logic cfg_rtab_single_entry_i +); + + // }}} + + // Declaration of internal signals + // {{{ + logic refill_req_valid; + logic refill_req_ready; + logic refill_busy; + logic refill_updt_plru; + hpdcache_set_t refill_set; + hpdcache_dir_entry_t refill_dir_entry; + hpdcache_way_vector_t refill_read_victim_way; + hpdcache_way_vector_t refill_write_victim_way; + logic refill_write_dir; + logic refill_write_data; + hpdcache_word_t refill_word; + hpdcache_refill_data_t refill_data; + logic refill_core_rsp_valid; + hpdcache_rsp_t refill_core_rsp; + hpdcache_nline_t refill_nline; + logic refill_updt_rtab; + + logic miss_mshr_empty; + logic miss_mshr_check; + mshr_set_t miss_mshr_check_set; + mshr_tag_t miss_mshr_check_tag; + logic miss_mshr_hit; + logic miss_mshr_alloc_cs; + logic miss_mshr_alloc; + logic miss_mshr_alloc_ready; + logic miss_mshr_alloc_full; + hpdcache_nline_t miss_mshr_alloc_nline; + hpdcache_req_tid_t miss_mshr_alloc_tid; + hpdcache_req_sid_t miss_mshr_alloc_sid; + hpdcache_word_t miss_mshr_alloc_word; + logic miss_mshr_alloc_need_rsp; + logic miss_mshr_alloc_is_prefetch; + + logic wbuf_flush_all; + logic wbuf_write; + logic wbuf_write_ready; + wbuf_addr_t wbuf_write_addr; + wbuf_data_t wbuf_write_data; + wbuf_be_t wbuf_write_be; + logic wbuf_write_uncacheable; + logic wbuf_read_hit; + logic wbuf_read_flush_hit; + hpdcache_req_addr_t wbuf_rtab_addr; + logic wbuf_rtab_is_read; + logic wbuf_rtab_hit_open; + logic wbuf_rtab_hit_pend; + logic wbuf_rtab_hit_sent; + logic wbuf_rtab_not_ready; + + logic uc_ready; + logic uc_req_valid; + hpdcache_uc_op_t uc_req_op; + hpdcache_req_addr_t uc_req_addr; + hpdcache_req_size_t uc_req_size; + hpdcache_req_data_t uc_req_data; + hpdcache_req_be_t uc_req_be; + logic uc_req_uncacheable; + hpdcache_req_sid_t uc_req_sid; + hpdcache_req_tid_t uc_req_tid; + logic uc_req_need_rsp; + logic uc_wbuf_flush_all; + logic uc_dir_amo_match; + hpdcache_set_t uc_dir_amo_match_set; + hpdcache_tag_t uc_dir_amo_match_tag; + logic uc_dir_amo_update_plru; + hpdcache_way_vector_t uc_dir_amo_hit_way; + logic uc_data_amo_write; + logic uc_data_amo_write_enable; + hpdcache_set_t uc_data_amo_write_set; + hpdcache_req_size_t uc_data_amo_write_size; + hpdcache_word_t uc_data_amo_write_word; + logic [63:0] uc_data_amo_write_data; + logic [7:0] uc_data_amo_write_be; + logic uc_lrsc_snoop; + hpdcache_req_addr_t uc_lrsc_snoop_addr; + hpdcache_req_size_t uc_lrsc_snoop_size; + logic uc_core_rsp_ready; + logic uc_core_rsp_valid; + hpdcache_rsp_t uc_core_rsp; + + logic cmo_req_valid; + logic cmo_ready; + hpdcache_cmoh_op_t cmo_req_op; + hpdcache_req_addr_t cmo_req_addr; + hpdcache_req_data_t cmo_req_wdata; + logic cmo_wbuf_flush_all; + logic cmo_dir_check; + hpdcache_set_t cmo_dir_check_set; + hpdcache_tag_t cmo_dir_check_tag; + hpdcache_way_vector_t cmo_dir_check_hit_way; + logic cmo_dir_inval; + hpdcache_set_t cmo_dir_inval_set; + hpdcache_way_vector_t cmo_dir_inval_way; + + logic rtab_empty; + logic ctrl_empty; + + logic core_rsp_valid; + hpdcache_rsp_t core_rsp; + + logic arb_req_valid; + logic arb_req_ready; + hpdcache_req_t arb_req; + logic arb_abort; + hpdcache_tag_t arb_tag; + hpdcache_pma_t arb_pma; + + localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_READ_ID = {HPDcacheMemIdWidth{1'b1}}; + localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_WRITE_ID = {HPDcacheMemIdWidth{1'b1}}; + // }}} + + // Requesters arbiter + // {{{ + hpdcache_core_arbiter #( + .NREQUESTERS (NREQUESTERS) + ) core_req_arbiter_i ( + .clk_i, + .rst_ni, + + .core_req_valid_i, + .core_req_ready_o, + .core_req_i, + .core_req_abort_i, + .core_req_tag_i, + .core_req_pma_i, + + .core_rsp_valid_i (core_rsp_valid), + .core_rsp_i (core_rsp), + .core_rsp_valid_o, + .core_rsp_o, + + .arb_req_valid_o (arb_req_valid), + .arb_req_ready_i (arb_req_ready), + .arb_req_o (arb_req), + .arb_abort_o (arb_abort), + .arb_tag_o (arb_tag), + .arb_pma_o (arb_pma) + ); + // }}} + + // HPDcache controller + // {{{ + hpdcache_ctrl hpdcache_ctrl_i( + .clk_i, + .rst_ni, + + .core_req_valid_i (arb_req_valid), + .core_req_ready_o (arb_req_ready), + .core_req_i (arb_req), + .core_req_abort_i (arb_abort), + .core_req_tag_i (arb_tag), + .core_req_pma_i (arb_pma), + + .core_rsp_valid_o (core_rsp_valid), + .core_rsp_o (core_rsp), + + .wbuf_flush_i, + + .cachedir_hit_o (/* unused */), + + .miss_mshr_check_o (miss_mshr_check), + .miss_mshr_check_set_o (miss_mshr_check_set), + .miss_mshr_check_tag_o (miss_mshr_check_tag), + .miss_mshr_alloc_o (miss_mshr_alloc), + .miss_mshr_alloc_cs_o (miss_mshr_alloc_cs), + .miss_mshr_alloc_ready_i (miss_mshr_alloc_ready), + .miss_mshr_alloc_full_i (miss_mshr_alloc_full), + .miss_mshr_alloc_nline_o (miss_mshr_alloc_nline), + .miss_mshr_alloc_tid_o (miss_mshr_alloc_tid), + .miss_mshr_alloc_sid_o (miss_mshr_alloc_sid), + .miss_mshr_alloc_word_o (miss_mshr_alloc_word), + .miss_mshr_alloc_need_rsp_o (miss_mshr_alloc_need_rsp), + .miss_mshr_alloc_is_prefetch_o (miss_mshr_alloc_is_prefetch), + .miss_mshr_hit_i (miss_mshr_hit), + + .refill_req_valid_i (refill_req_valid), + .refill_req_ready_o (refill_req_ready), + .refill_busy_i (refill_busy), + .refill_updt_plru_i (refill_updt_plru), + .refill_set_i (refill_set), + .refill_dir_entry_i (refill_dir_entry), + .refill_victim_way_o (refill_read_victim_way), + .refill_victim_way_i (refill_write_victim_way), + .refill_write_dir_i (refill_write_dir), + .refill_write_data_i (refill_write_data), + .refill_word_i (refill_word), + .refill_data_i (refill_data), + .refill_core_rsp_valid_i (refill_core_rsp_valid), + .refill_core_rsp_i (refill_core_rsp), + .refill_nline_i (refill_nline), + .refill_updt_rtab_i (refill_updt_rtab), + + .wbuf_empty_i (wbuf_empty_o), + .wbuf_flush_all_o (wbuf_flush_all), + .wbuf_write_o (wbuf_write), + .wbuf_write_ready_i (wbuf_write_ready), + .wbuf_write_addr_o (wbuf_write_addr), + .wbuf_write_data_o (wbuf_write_data), + .wbuf_write_be_o (wbuf_write_be), + .wbuf_write_uncacheable_o (wbuf_write_uncacheable), + .wbuf_read_hit_i (wbuf_read_hit), + .wbuf_read_flush_hit_o (wbuf_read_flush_hit), + .wbuf_rtab_addr_o (wbuf_rtab_addr), + .wbuf_rtab_is_read_o (wbuf_rtab_is_read), + .wbuf_rtab_hit_open_i (wbuf_rtab_hit_open), + .wbuf_rtab_hit_pend_i (wbuf_rtab_hit_pend), + .wbuf_rtab_hit_sent_i (wbuf_rtab_hit_sent), + .wbuf_rtab_not_ready_i (wbuf_rtab_not_ready), + + .uc_busy_i (~uc_ready), + .uc_lrsc_snoop_o (uc_lrsc_snoop), + .uc_lrsc_snoop_addr_o (uc_lrsc_snoop_addr), + .uc_lrsc_snoop_size_o (uc_lrsc_snoop_size), + .uc_req_valid_o (uc_req_valid), + .uc_req_op_o (uc_req_op), + .uc_req_addr_o (uc_req_addr), + .uc_req_size_o (uc_req_size), + .uc_req_data_o (uc_req_data), + .uc_req_be_o (uc_req_be), + .uc_req_uc_o (uc_req_uncacheable), + .uc_req_sid_o (uc_req_sid), + .uc_req_tid_o (uc_req_tid), + .uc_req_need_rsp_o (uc_req_need_rsp), + .uc_wbuf_flush_all_i (uc_wbuf_flush_all), + .uc_dir_amo_match_i (uc_dir_amo_match), + .uc_dir_amo_match_set_i (uc_dir_amo_match_set), + .uc_dir_amo_match_tag_i (uc_dir_amo_match_tag), + .uc_dir_amo_update_plru_i (uc_dir_amo_update_plru), + .uc_dir_amo_hit_way_o (uc_dir_amo_hit_way), + .uc_data_amo_write_i (uc_data_amo_write), + .uc_data_amo_write_enable_i (uc_data_amo_write_enable), + .uc_data_amo_write_set_i (uc_data_amo_write_set), + .uc_data_amo_write_size_i (uc_data_amo_write_size), + .uc_data_amo_write_word_i (uc_data_amo_write_word), + .uc_data_amo_write_data_i (uc_data_amo_write_data), + .uc_data_amo_write_be_i (uc_data_amo_write_be), + .uc_core_rsp_ready_o (uc_core_rsp_ready), + .uc_core_rsp_valid_i (uc_core_rsp_valid), + .uc_core_rsp_i (uc_core_rsp), + + .cmo_busy_i (~cmo_ready), + .cmo_req_valid_o (cmo_req_valid), + .cmo_req_op_o (cmo_req_op), + .cmo_req_addr_o (cmo_req_addr), + .cmo_req_wdata_o (cmo_req_wdata), + .cmo_wbuf_flush_all_i (cmo_wbuf_flush_all), + .cmo_dir_check_i (cmo_dir_check), + .cmo_dir_check_set_i (cmo_dir_check_set), + .cmo_dir_check_tag_i (cmo_dir_check_tag), + .cmo_dir_check_hit_way_o (cmo_dir_check_hit_way), + .cmo_dir_inval_i (cmo_dir_inval), + .cmo_dir_inval_set_i (cmo_dir_inval_set), + .cmo_dir_inval_way_i (cmo_dir_inval_way), + + .rtab_empty_o (rtab_empty), + .ctrl_empty_o (ctrl_empty), + + .cfg_enable_i, + .cfg_rtab_single_entry_i, + + .evt_cache_write_miss_o, + .evt_cache_read_miss_o, + .evt_uncached_req_o, + .evt_cmo_req_o, + .evt_write_req_o, + .evt_read_req_o, + .evt_prefetch_req_o, + .evt_req_on_hold_o, + .evt_rtab_rollback_o, + .evt_stall_refill_o, + .evt_stall_o + ); + // }}} + + // HPDcache write-buffer + // {{{ + hpdcache_wbuf_wrapper #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t) + ) hpdcache_wbuf_i( + .clk_i, + .rst_ni, + + .empty_o (wbuf_empty_o), + .full_o (/* unused */), + .flush_all_i (wbuf_flush_all), + + .cfg_threshold_i (cfg_wbuf_threshold_i), + .cfg_reset_timecnt_on_write_i (cfg_wbuf_reset_timecnt_on_write_i), + .cfg_sequential_waw_i (cfg_wbuf_sequential_waw_i), + .cfg_inhibit_write_coalescing_i (cfg_wbuf_inhibit_write_coalescing_i), + + .write_i (wbuf_write), + .write_ready_o (wbuf_write_ready), + .write_addr_i (wbuf_write_addr), + .write_data_i (wbuf_write_data), + .write_be_i (wbuf_write_be), + .write_uc_i (wbuf_write_uncacheable), + + .read_addr_i (wbuf_write_addr), + .read_hit_o (wbuf_read_hit), + .read_flush_hit_i (wbuf_read_flush_hit), + + .replay_addr_i (wbuf_rtab_addr), + .replay_is_read_i (wbuf_rtab_is_read), + .replay_open_hit_o (wbuf_rtab_hit_open), + .replay_pend_hit_o (wbuf_rtab_hit_pend), + .replay_sent_hit_o (wbuf_rtab_hit_sent), + .replay_not_ready_o (wbuf_rtab_not_ready), + + .mem_req_write_ready_i (mem_req_wbuf_write_ready_i), + .mem_req_write_valid_o (mem_req_wbuf_write_valid_o), + .mem_req_write_o (mem_req_wbuf_write_o), + + .mem_req_write_data_ready_i (mem_req_wbuf_write_data_ready_i), + .mem_req_write_data_valid_o (mem_req_wbuf_write_data_valid_o), + .mem_req_write_data_o (mem_req_wbuf_write_data_o), + + .mem_resp_write_ready_o (mem_resp_wbuf_write_ready_o), + .mem_resp_write_valid_i (mem_resp_wbuf_write_valid_i), + .mem_resp_write_i (mem_resp_wbuf_write_i) + ); + // }}} + + // Miss handler + // {{{ + hpdcache_miss_handler #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t) + ) hpdcache_miss_handler_i( + .clk_i, + .rst_ni, + + .mshr_empty_o (miss_mshr_empty), + .mshr_full_o (/* unused */), + + .cfg_prefetch_updt_plru_i, + + .mshr_check_i (miss_mshr_check), + .mshr_check_set_i (miss_mshr_check_set), + .mshr_check_tag_i (miss_mshr_check_tag), + .mshr_check_hit_o (miss_mshr_hit), + + .mshr_alloc_ready_o (miss_mshr_alloc_ready), + .mshr_alloc_i (miss_mshr_alloc), + .mshr_alloc_cs_i (miss_mshr_alloc_cs), + .mshr_alloc_full_o (miss_mshr_alloc_full), + .mshr_alloc_nline_i (miss_mshr_alloc_nline), + .mshr_alloc_tid_i (miss_mshr_alloc_tid), + .mshr_alloc_sid_i (miss_mshr_alloc_sid), + .mshr_alloc_word_i (miss_mshr_alloc_word), + .mshr_alloc_need_rsp_i (miss_mshr_alloc_need_rsp), + .mshr_alloc_is_prefetch_i (miss_mshr_alloc_is_prefetch), + + .refill_req_ready_i (refill_req_ready), + .refill_req_valid_o (refill_req_valid), + .refill_busy_o (refill_busy), + .refill_updt_plru_o (refill_updt_plru), + .refill_set_o (refill_set), + .refill_dir_entry_o (refill_dir_entry), + .refill_victim_way_i (refill_read_victim_way), + .refill_write_dir_o (refill_write_dir), + .refill_write_data_o (refill_write_data), + .refill_victim_way_o (refill_write_victim_way), + .refill_data_o (refill_data), + .refill_word_o (refill_word), + .refill_nline_o (refill_nline), + .refill_updt_rtab_o (refill_updt_rtab), + + .refill_core_rsp_valid_o (refill_core_rsp_valid), + .refill_core_rsp_o (refill_core_rsp), + + .mem_req_ready_i (mem_req_miss_read_ready_i), + .mem_req_valid_o (mem_req_miss_read_valid_o), + .mem_req_o (mem_req_miss_read_o), + + .mem_resp_ready_o (mem_resp_miss_read_ready_o), + .mem_resp_valid_i (mem_resp_miss_read_valid_i), + .mem_resp_i (mem_resp_miss_read_i) + ); + // }}} + + // Uncacheable request handler + // {{{ + hpdcache_uncached #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t) + ) hpdcache_uc_i( + .clk_i, + .rst_ni, + + .wbuf_empty_i (wbuf_empty_o), + .mshr_empty_i (miss_mshr_empty), + .rtab_empty_i (rtab_empty), + .ctrl_empty_i (ctrl_empty), + + .req_valid_i (uc_req_valid), + .req_ready_o (uc_ready), + .req_op_i (uc_req_op), + .req_addr_i (uc_req_addr), + .req_size_i (uc_req_size), + .req_data_i (uc_req_data), + .req_be_i (uc_req_be), + .req_uc_i (uc_req_uncacheable), + .req_sid_i (uc_req_sid), + .req_tid_i (uc_req_tid), + .req_need_rsp_i (uc_req_need_rsp), + + .wbuf_flush_all_o (uc_wbuf_flush_all), + + .dir_amo_match_o (uc_dir_amo_match), + .dir_amo_match_set_o (uc_dir_amo_match_set), + .dir_amo_match_tag_o (uc_dir_amo_match_tag), + .dir_amo_update_plru_o (uc_dir_amo_update_plru), + .dir_amo_hit_way_i (uc_dir_amo_hit_way), + + .data_amo_write_o (uc_data_amo_write), + .data_amo_write_enable_o (uc_data_amo_write_enable), + .data_amo_write_set_o (uc_data_amo_write_set), + .data_amo_write_size_o (uc_data_amo_write_size), + .data_amo_write_word_o (uc_data_amo_write_word), + .data_amo_write_data_o (uc_data_amo_write_data), + .data_amo_write_be_o (uc_data_amo_write_be), + + .lrsc_snoop_i (uc_lrsc_snoop), + .lrsc_snoop_addr_i (uc_lrsc_snoop_addr), + .lrsc_snoop_size_i (uc_lrsc_snoop_size), + + .core_rsp_ready_i (uc_core_rsp_ready), + .core_rsp_valid_o (uc_core_rsp_valid), + .core_rsp_o (uc_core_rsp), + + .mem_read_id_i (HPDCACHE_UC_READ_ID), + .mem_write_id_i (HPDCACHE_UC_WRITE_ID), + + .mem_req_read_ready_i (mem_req_uc_read_ready_i), + .mem_req_read_valid_o (mem_req_uc_read_valid_o), + .mem_req_read_o (mem_req_uc_read_o), + + .mem_resp_read_ready_o (mem_resp_uc_read_ready_o), + .mem_resp_read_valid_i (mem_resp_uc_read_valid_i), + .mem_resp_read_i (mem_resp_uc_read_i), + + .mem_req_write_ready_i (mem_req_uc_write_ready_i), + .mem_req_write_valid_o (mem_req_uc_write_valid_o), + .mem_req_write_o (mem_req_uc_write_o), + + .mem_req_write_data_ready_i (mem_req_uc_write_data_ready_i), + .mem_req_write_data_valid_o (mem_req_uc_write_data_valid_o), + .mem_req_write_data_o (mem_req_uc_write_data_o), + + .mem_resp_write_ready_o (mem_resp_uc_write_ready_o), + .mem_resp_write_valid_i (mem_resp_uc_write_valid_i), + .mem_resp_write_i (mem_resp_uc_write_i), + + .cfg_error_on_cacheable_amo_i + ); + + // CMO Request Handler + // {{{ + hpdcache_cmo hpdcache_cmo_i( + .clk_i, + .rst_ni, + + .wbuf_empty_i (wbuf_empty_o), + .mshr_empty_i (miss_mshr_empty), + .rtab_empty_i (rtab_empty), + .ctrl_empty_i (ctrl_empty), + + .req_valid_i (cmo_req_valid), + .req_ready_o (cmo_ready), + .req_op_i (cmo_req_op), + .req_addr_i (cmo_req_addr), + .req_wdata_i (cmo_req_wdata), + + .wbuf_flush_all_o (cmo_wbuf_flush_all), + + .dir_check_o (cmo_dir_check), + .dir_check_set_o (cmo_dir_check_set), + .dir_check_tag_o (cmo_dir_check_tag), + .dir_check_hit_way_i (cmo_dir_check_hit_way), + + .dir_inval_o (cmo_dir_inval), + .dir_inval_set_o (cmo_dir_inval_set), + .dir_inval_way_o (cmo_dir_inval_way) + ); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial begin + req_access_width_assert: + assert (HPDCACHE_REQ_WORDS <= HPDCACHE_ACCESS_WORDS) else + $error("req data width shall be l.e. to cache access width"); + refill_access_width_assert: + assert (HPDCACHE_CL_WORDS >= HPDCACHE_ACCESS_WORDS) else + $error("cache access width shall be l.e. to cache-line width"); + miss_mem_id_width_assert: + assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_WAY_WIDTH + HPDCACHE_MSHR_SET_WIDTH)) else + $error("insufficient ID bits on the mem interface to transport misses"); + wbuf_mem_id_width_assert: + assert (HPDcacheMemIdWidth >= HPDCACHE_WBUF_DIR_PTR_WIDTH) else + $error("insufficient ID bits on the mem interface to transport writes"); + + end + // pragma translate_on + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv new file mode 100644 index 00000000..d233af1d --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv @@ -0,0 +1,67 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache AMO computing unit + * History : + */ +module hpdcache_amo +import hpdcache_pkg::*; +// Ports +// {{{ +( + input logic [63:0] ld_data_i, + input logic [63:0] st_data_i, + input hpdcache_uc_op_t op_i, + output logic [63:0] result_o +); +// }}} + + logic signed [63:0] ld_data; + logic signed [63:0] st_data; + logic signed [63:0] sum; + logic ugt, sgt; + + assign ld_data = ld_data_i, + st_data = st_data_i; + + assign ugt = (ld_data_i > st_data_i), + sgt = (ld_data > st_data), + sum = ld_data + st_data; + + always_comb + begin : amo_compute_comb + unique case (1'b1) + op_i.is_amo_lr : result_o = ld_data_i; + op_i.is_amo_sc : result_o = st_data_i; + op_i.is_amo_swap : result_o = st_data_i; + op_i.is_amo_add : result_o = sum; + op_i.is_amo_and : result_o = ld_data_i & st_data_i; + op_i.is_amo_or : result_o = ld_data_i | st_data_i; + op_i.is_amo_xor : result_o = ld_data_i ^ st_data_i; + op_i.is_amo_max : result_o = sgt ? ld_data_i : st_data_i; + op_i.is_amo_maxu : result_o = ugt ? ld_data_i : st_data_i; + op_i.is_amo_min : result_o = sgt ? st_data_i : ld_data_i; + op_i.is_amo_minu : result_o = ugt ? st_data_i : ld_data_i; + default : result_o = '0; + endcase + end +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv new file mode 100644 index 00000000..de09cd74 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv @@ -0,0 +1,250 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : July, 2021 + * Description : HPDcache Cache-Management-Operation Handler + * History : + */ +module hpdcache_cmo +import hpdcache_pkg::*; +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + input logic wbuf_empty_i, + input logic mshr_empty_i, + input logic rtab_empty_i, + input logic ctrl_empty_i, + // }}} + + // Request interface + // {{{ + input logic req_valid_i, + output logic req_ready_o, + input hpdcache_cmoh_op_t req_op_i, + input hpdcache_req_addr_t req_addr_i, + input hpdcache_req_data_t req_wdata_i, + // }}} + + // Write Buffer Interface + // {{{ + output logic wbuf_flush_all_o, + // }}} + + // Cache Directory Interface + // {{{ + output logic dir_check_o, + output hpdcache_set_t dir_check_set_o, + output hpdcache_tag_t dir_check_tag_o, + input hpdcache_way_vector_t dir_check_hit_way_i, + + output logic dir_inval_o, + output hpdcache_set_t dir_inval_set_o, + output hpdcache_way_vector_t dir_inval_way_o + // }}} +); +// }}} + +// Definition of constants and types +// {{{ + typedef enum { + CMOH_IDLE, + CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY, + CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY, + CMOH_INVAL_CHECK_NLINE, + CMOH_INVAL_SET + } hpdcache_cmoh_fsm_t; +// }}} + +// Internal signals and registers +// {{{ + hpdcache_cmoh_fsm_t cmoh_fsm_q, cmoh_fsm_d; + hpdcache_cmoh_op_t cmoh_op_q, cmoh_op_d; + hpdcache_req_addr_t cmoh_addr_q, cmoh_addr_d; + hpdcache_way_vector_t cmoh_way_q, cmoh_way_d; + hpdcache_set_t cmoh_set_cnt_q, cmoh_set_cnt_d; + hpdcache_nline_t cmoh_nline_q; + hpdcache_tag_t cmoh_tag_q; + hpdcache_set_t cmoh_set_q; + hpdcache_data_word_t cmoh_wdata; +// }}} + +// CMO request handler FSM +// {{{ + assign cmoh_nline_q = cmoh_addr_q[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH], + cmoh_set_q = cmoh_nline_q[0 +: HPDCACHE_SET_WIDTH], + cmoh_tag_q = cmoh_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH]; + + assign dir_check_set_o = cmoh_set_q, + dir_check_tag_o = cmoh_tag_q; + + assign req_ready_o = (cmoh_fsm_q == CMOH_IDLE); + + // Only the least significant word of the write data contains parameters + // for the CMO handler + assign cmoh_wdata = req_wdata_i[0]; + + always_comb + begin : cmoh_fsm_comb + cmoh_op_d = cmoh_op_q; + cmoh_addr_d = cmoh_addr_q; + cmoh_way_d = cmoh_way_q; + cmoh_set_cnt_d = cmoh_set_cnt_q; + + dir_check_o = 1'b0; + + dir_inval_o = 1'b0; + dir_inval_set_o = cmoh_set_q; + dir_inval_way_o = '0; + + wbuf_flush_all_o = 1'b0; + + cmoh_fsm_d = cmoh_fsm_q; + + case (cmoh_fsm_q) + CMOH_IDLE: begin + cmoh_fsm_d = CMOH_IDLE; + + if (req_valid_i) begin + unique case (1'b1) + req_op_i.is_fence: begin + // request to the write buffer to send all open entries + wbuf_flush_all_o = rtab_empty_i; + + // then wait for the write buffer to be empty + if (!rtab_empty_i || !wbuf_empty_i) begin + cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY; + end + end + req_op_i.is_inval_by_nline, + req_op_i.is_inval_by_set, + req_op_i.is_inval_all: begin + cmoh_op_d = req_op_i; + cmoh_addr_d = req_addr_i; + cmoh_way_d = cmoh_wdata[0 +: HPDCACHE_WAYS]; + cmoh_set_cnt_d = 0; + if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + if (req_op_i.is_inval_by_nline) begin + cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE; + end else begin + cmoh_fsm_d = CMOH_INVAL_SET; + end + end else begin + cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY; + end + end + default: begin + // pragma translate_off + $error("cmo handler: unexpected operation"); + // pragma translate_on + end + endcase + end + end + CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY: begin + wbuf_flush_all_o = rtab_empty_i; + + if (wbuf_empty_i && rtab_empty_i) begin + cmoh_fsm_d = CMOH_IDLE; + end else begin + cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY; + end + end + CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY: begin + cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY; + if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + if (cmoh_op_q.is_inval_by_nline) begin + cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE; + end else begin + cmoh_fsm_d = CMOH_INVAL_SET; + end + end + end + CMOH_INVAL_CHECK_NLINE: begin + dir_check_o = 1'b1; + cmoh_fsm_d = CMOH_INVAL_SET; + end + CMOH_INVAL_SET: begin + cmoh_fsm_d = CMOH_INVAL_SET; + case (1'b1) + cmoh_op_q.is_inval_by_nline: begin + dir_inval_o = |dir_check_hit_way_i; + dir_inval_way_o = dir_check_hit_way_i; + cmoh_fsm_d = CMOH_IDLE; + end + cmoh_op_q.is_inval_all: begin + dir_inval_o = 1'b1; + dir_inval_way_o = {HPDCACHE_WAYS{1'b1}}; + dir_inval_set_o = cmoh_set_cnt_q; + cmoh_set_cnt_d = cmoh_set_cnt_q + 1; + if (cmoh_set_cnt_q == hpdcache_set_t'(HPDCACHE_SETS - 1)) begin + cmoh_fsm_d = CMOH_IDLE; + end + end + cmoh_op_q.is_inval_by_set: begin + dir_inval_o = 1'b1; + dir_inval_way_o = cmoh_way_q; + cmoh_fsm_d = CMOH_IDLE; + end + endcase + end + endcase + end +// }}} + +// CMO request handler set state +// {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + cmoh_fsm_q <= CMOH_IDLE; + end else begin + cmoh_fsm_q <= cmoh_fsm_d; + end + end + + always_ff @(posedge clk_i) + begin + cmoh_op_q <= cmoh_op_d; + cmoh_addr_q <= cmoh_addr_d; + cmoh_way_q <= cmoh_way_d; + cmoh_set_cnt_q <= cmoh_set_cnt_d; + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) disable iff (!rst_ni) + req_valid_i -> $onehot(req_op_i)) else + $error("cmo_handler: more than one operation type requested"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + req_valid_i -> (cmoh_fsm_q == CMOH_IDLE)) else + $error("cmo_handler: new request received while busy"); +// pragma translate_on +// }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv new file mode 100644 index 00000000..1f8f5a47 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv @@ -0,0 +1,171 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : September, 2023 + * Description : HPDcache request arbiter + * History : + */ +module hpdcache_core_arbiter +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int NREQUESTERS = 1 +) + // }}} + + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Core request interface + // 1st cycle + input logic core_req_valid_i [NREQUESTERS-1:0], + output logic core_req_ready_o [NREQUESTERS-1:0], + input hpdcache_req_t core_req_i [NREQUESTERS-1:0], + // 2nd cycle + input logic core_req_abort_i [NREQUESTERS-1:0], + input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0], + input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0], + + // Core response interface + input logic core_rsp_valid_i, + input hpdcache_rsp_t core_rsp_i, + output logic core_rsp_valid_o [NREQUESTERS-1:0], + output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0], + + // Granted request + output logic arb_req_valid_o, + input logic arb_req_ready_i, + output hpdcache_req_t arb_req_o, + output logic arb_abort_o, + output hpdcache_tag_t arb_tag_o, + output hpdcache_pma_t arb_pma_o +); + + // }}} + + // Declaration of internal signals + // {{{ + logic [NREQUESTERS-1:0] core_req_valid; + hpdcache_req_t [NREQUESTERS-1:0] core_req; + logic [NREQUESTERS-1:0] core_req_abort; + hpdcache_tag_t [NREQUESTERS-1:0] core_req_tag; + hpdcache_pma_t [NREQUESTERS-1:0] core_req_pma; + + logic [NREQUESTERS-1:0] arb_req_gnt_q, arb_req_gnt_d; + // }}} + + // Requesters arbiter + // {{{ + // Pack request ports + genvar gen_i; + + generate + for (gen_i = 0; gen_i < int'(NREQUESTERS); gen_i++) begin : gen_core_req + assign core_req_ready_o[gen_i] = arb_req_gnt_d[gen_i] & arb_req_ready_i, + core_req_valid[gen_i] = core_req_valid_i[gen_i], + core_req[gen_i] = core_req_i[gen_i]; + + assign core_req_abort[gen_i] = core_req_abort_i[gen_i], + core_req_tag[gen_i] = core_req_tag_i[gen_i], + core_req_pma[gen_i] = core_req_pma_i[gen_i]; + end + endgenerate + + // Arbiter + hpdcache_fxarb #(.N(NREQUESTERS)) req_arbiter_i + ( + .clk_i, + .rst_ni, + .req_i (core_req_valid), + .gnt_o (arb_req_gnt_d), + .ready_i (arb_req_ready_i) + ); + + // Request multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_req_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_mux_i ( + .data_i (core_req), + .sel_i (arb_req_gnt_d), + .data_o (arb_req_o) + ); + + // Request abort multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH (1), + .ONE_HOT_SEL (1'b1) + ) core_req_abort_mux_i ( + .data_i (core_req_abort), + .sel_i (arb_req_gnt_q), + .data_o (arb_abort_o) + ); + + // Tag Multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_tag_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_tag_mux_i ( + .data_i (core_req_tag), + .sel_i (arb_req_gnt_q), + .data_o (arb_tag_o) + ); + + // PMA Multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_pma_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_pma_mux_i ( + .data_i (core_req_pma), + .sel_i (arb_req_gnt_q), + .data_o (arb_pma_o) + ); + + // Save the grant signal for the tag in the next cycle + always_ff @(posedge clk_i or negedge rst_ni) + begin : arb_req_gnt_ff + if (!rst_ni) arb_req_gnt_q <= '0; + else arb_req_gnt_q <= arb_req_gnt_d; + end + + assign arb_req_valid_o = |arb_req_gnt_d; + // }}} + + // Response demultiplexor + // {{{ + always_comb + begin : resp_demux + for (int unsigned i = 0; i < NREQUESTERS; i++) begin + core_rsp_valid_o[i] = core_rsp_valid_i && (i == int'(core_rsp_i.sid)); + core_rsp_o[i] = core_rsp_i; + end + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv new file mode 100755 index 00000000..19369c5d --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv @@ -0,0 +1,760 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache controller + * History : + */ +module hpdcache_ctrl + // Package imports + // {{{ +import hpdcache_pkg::*; + // }}} + + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + // Core request interface + input logic core_req_valid_i, + output logic core_req_ready_o, + input hpdcache_req_t core_req_i, + input logic core_req_abort_i, + input hpdcache_tag_t core_req_tag_i, + input hpdcache_pma_t core_req_pma_i, + + // Core response interface + output logic core_rsp_valid_o, + output hpdcache_rsp_t core_rsp_o, + + // Force the write buffer to send all pending writes + input logic wbuf_flush_i, + + // Global control signals + output logic cachedir_hit_o, + + // Miss handler interface + output logic miss_mshr_check_o, + output mshr_set_t miss_mshr_check_set_o, + output mshr_tag_t miss_mshr_check_tag_o, + output logic miss_mshr_alloc_o, + output logic miss_mshr_alloc_cs_o, + input logic miss_mshr_alloc_ready_i, + input logic miss_mshr_alloc_full_i, + output hpdcache_nline_t miss_mshr_alloc_nline_o, + output hpdcache_req_tid_t miss_mshr_alloc_tid_o, + output hpdcache_req_sid_t miss_mshr_alloc_sid_o, + output hpdcache_word_t miss_mshr_alloc_word_o, + output logic miss_mshr_alloc_need_rsp_o, + output logic miss_mshr_alloc_is_prefetch_o, + input logic miss_mshr_hit_i, + + // Refill interface + input logic refill_req_valid_i, + output logic refill_req_ready_o, + input logic refill_busy_i, + input logic refill_updt_plru_i, + input hpdcache_set_t refill_set_i, + input hpdcache_dir_entry_t refill_dir_entry_i, + output hpdcache_way_vector_t refill_victim_way_o, + input hpdcache_way_vector_t refill_victim_way_i, + input logic refill_write_dir_i, + input logic refill_write_data_i, + input hpdcache_word_t refill_word_i, + input hpdcache_refill_data_t refill_data_i, + input logic refill_core_rsp_valid_i, + input hpdcache_rsp_t refill_core_rsp_i, + input hpdcache_nline_t refill_nline_i, + input logic refill_updt_rtab_i, + + // Write buffer interface + input logic wbuf_empty_i, + output logic wbuf_flush_all_o, + output logic wbuf_write_o, + input logic wbuf_write_ready_i, + output wbuf_addr_t wbuf_write_addr_o, + output wbuf_data_t wbuf_write_data_o, + output wbuf_be_t wbuf_write_be_o, + output logic wbuf_write_uncacheable_o, + input logic wbuf_read_hit_i, + output logic wbuf_read_flush_hit_o, + output hpdcache_req_addr_t wbuf_rtab_addr_o, + output logic wbuf_rtab_is_read_o, + input logic wbuf_rtab_hit_open_i, + input logic wbuf_rtab_hit_pend_i, + input logic wbuf_rtab_hit_sent_i, + input logic wbuf_rtab_not_ready_i, + + // Uncacheable request handler + input logic uc_busy_i, + output logic uc_lrsc_snoop_o, + output hpdcache_req_addr_t uc_lrsc_snoop_addr_o, + output hpdcache_req_size_t uc_lrsc_snoop_size_o, + output logic uc_req_valid_o, + output hpdcache_uc_op_t uc_req_op_o, + output hpdcache_req_addr_t uc_req_addr_o, + output hpdcache_req_size_t uc_req_size_o, + output hpdcache_req_data_t uc_req_data_o, + output hpdcache_req_be_t uc_req_be_o, + output logic uc_req_uc_o, + output hpdcache_req_sid_t uc_req_sid_o, + output hpdcache_req_tid_t uc_req_tid_o, + output logic uc_req_need_rsp_o, + input logic uc_wbuf_flush_all_i, + input logic uc_dir_amo_match_i, + input hpdcache_set_t uc_dir_amo_match_set_i, + input hpdcache_tag_t uc_dir_amo_match_tag_i, + input logic uc_dir_amo_update_plru_i, + output hpdcache_way_vector_t uc_dir_amo_hit_way_o, + input logic uc_data_amo_write_i, + input logic uc_data_amo_write_enable_i, + input hpdcache_set_t uc_data_amo_write_set_i, + input hpdcache_req_size_t uc_data_amo_write_size_i, + input hpdcache_word_t uc_data_amo_write_word_i, + input logic [63:0] uc_data_amo_write_data_i, + input logic [7:0] uc_data_amo_write_be_i, + output logic uc_core_rsp_ready_o, + input logic uc_core_rsp_valid_i, + input hpdcache_rsp_t uc_core_rsp_i, + + // Cache Management Operation (CMO) + input logic cmo_busy_i, + output logic cmo_req_valid_o, + output hpdcache_cmoh_op_t cmo_req_op_o, + output hpdcache_req_addr_t cmo_req_addr_o, + output hpdcache_req_data_t cmo_req_wdata_o, + input logic cmo_wbuf_flush_all_i, + input logic cmo_dir_check_i, + input hpdcache_set_t cmo_dir_check_set_i, + input hpdcache_tag_t cmo_dir_check_tag_i, + output hpdcache_way_vector_t cmo_dir_check_hit_way_o, + input logic cmo_dir_inval_i, + input hpdcache_set_t cmo_dir_inval_set_i, + input hpdcache_way_vector_t cmo_dir_inval_way_i, + + output logic rtab_empty_o, + output logic ctrl_empty_o, + + // Configuration signals + input logic cfg_enable_i, + input logic cfg_rtab_single_entry_i, + + // Performance events + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o, + output logic evt_stall_o +); + // }}} + + // Definition of internal registers + // {{{ + logic st1_req_valid_q, st1_req_valid_d; + hpdcache_req_t st1_req_q; + logic st1_req_rtab_q; + rtab_ptr_t st1_rtab_pop_try_ptr_q; + + logic st2_req_valid_q, st2_req_valid_d; + logic st2_req_is_prefetch_q, st2_req_is_prefetch_d; + logic st2_req_need_rsp_q; + hpdcache_req_addr_t st2_req_addr_q; + hpdcache_req_sid_t st2_req_sid_q; + hpdcache_req_tid_t st2_req_tid_q; + // }}} + + // Definition of internal signals + // {{{ + logic [1:0] st0_arb_req; + logic [1:0] st0_arb_req_grant; + logic st0_arb_ready; + + logic st0_req_ready; + + logic st0_req_valid; + hpdcache_req_t st0_req; + logic st0_req_is_uncacheable; + logic st0_req_is_load; + logic st0_req_is_store; + logic st0_req_is_amo; + logic st0_req_is_cmo_fence; + logic st0_req_is_cmo_inval; + logic st0_req_is_cmo_prefetch; + logic st0_req_cachedir_read; + logic st0_req_cachedata_read; + hpdcache_set_t st0_req_set; + hpdcache_word_t st0_req_word; + logic st0_rtab_pop_try_valid; + logic st0_rtab_pop_try_ready; + hpdcache_req_t st0_rtab_pop_try_req; + logic st0_rtab_pop_try_sel; + rtab_ptr_t st0_rtab_pop_try_ptr; + + logic st1_rsp_valid; + logic st1_rsp_aborted; + hpdcache_req_t st1_req; + logic st1_req_abort; + logic st1_req_cachedata_write; + logic st1_req_cachedata_write_enable; + hpdcache_pma_t st1_req_pma; + hpdcache_tag_t st1_req_tag; + hpdcache_set_t st1_req_set; + hpdcache_word_t st1_req_word; + hpdcache_nline_t st1_req_nline; + hpdcache_req_addr_t st1_req_addr; + logic st1_req_updt_lru; + logic st1_req_is_uncacheable; + logic st1_req_is_load; + logic st1_req_is_store; + logic st1_req_is_amo; + logic st1_req_is_amo_lr; + logic st1_req_is_amo_sc; + logic st1_req_is_amo_swap; + logic st1_req_is_amo_add; + logic st1_req_is_amo_and; + logic st1_req_is_amo_or; + logic st1_req_is_amo_xor; + logic st1_req_is_amo_max; + logic st1_req_is_amo_maxu; + logic st1_req_is_amo_min; + logic st1_req_is_amo_minu; + logic st1_req_is_cmo_inval; + logic st1_req_is_cmo_fence; + logic st1_req_is_cmo_prefetch; + hpdcache_way_vector_t st1_dir_hit; + hpdcache_req_data_t st1_read_data; + logic st1_rtab_alloc; + logic st1_rtab_alloc_and_link; + logic st1_rtab_pop_try_commit; + logic st1_rtab_pop_try_rback; + logic st1_rtab_mshr_hit; + logic st1_rtab_mshr_full; + logic st1_rtab_mshr_ready; + logic st1_rtab_wbuf_hit; + logic st1_rtab_wbuf_not_ready; + logic st1_rtab_check; + logic st1_rtab_check_hit; + + logic st2_req_we; + hpdcache_word_t st2_req_word; + + logic rtab_full; + + logic hpdcache_init_ready; + // }}} + + // Decoding of the request + // {{{ + // Select between request in the replay table or a new core requests + assign st0_req_valid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_valid + : core_req_valid_i, + st0_req.addr_offset = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_offset + : core_req_i.addr_offset, + st0_req.addr_tag = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_tag + : core_req_i.addr_tag, + st0_req.wdata = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.wdata + : core_req_i.wdata, + st0_req.op = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.op + : core_req_i.op, + st0_req.be = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.be + : core_req_i.be, + st0_req.size = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.size + : core_req_i.size, + st0_req.sid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.sid + : core_req_i.sid, + st0_req.tid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.tid + : core_req_i.tid, + st0_req.need_rsp = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.need_rsp + : core_req_i.need_rsp, + st0_req.phys_indexed = st0_rtab_pop_try_sel ? 1'b1 + : core_req_i.phys_indexed, + st0_req.pma = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.pma + : core_req_i.pma; + + // Decode operation in stage 0 + assign st0_req_is_uncacheable = ~cfg_enable_i | ( st0_req.phys_indexed + & st0_req.pma.uncacheable), + st0_req_is_load = is_load(st0_req.op), + st0_req_is_store = is_store(st0_req.op), + st0_req_is_amo = is_amo(st0_req.op), + st0_req_is_cmo_fence = is_cmo_fence(st0_req.op, st0_req.size), + st0_req_is_cmo_inval = is_cmo_inval(st0_req.op, st0_req.size), + st0_req_is_cmo_prefetch = is_cmo_prefetch(st0_req.op, st0_req.size); + + // Decode operation in stage 1 + + // In case of replay or physically-indexed cache, the tag and PMA come + // from stage 0. Otherwise, this information come directly from the + // requester in stage 1 + assign st1_req_tag = st1_req_q.phys_indexed ? st1_req_q.addr_tag : core_req_tag_i, + st1_req_pma = st1_req_q.phys_indexed ? st1_req_q.pma : core_req_pma_i; + + assign st1_req.addr_offset = st1_req_q.addr_offset, + st1_req.addr_tag = st1_req_rtab_q ? st1_req_q.addr_tag : st1_req_tag, + st1_req.wdata = st1_req_q.wdata, + st1_req.op = st1_req_q.op, + st1_req.be = st1_req_q.be, + st1_req.size = st1_req_q.size, + st1_req.sid = st1_req_q.sid, + st1_req.tid = st1_req_q.tid, + st1_req.need_rsp = st1_req_q.need_rsp, + st1_req.phys_indexed = st1_req_q.phys_indexed, + st1_req.pma = st1_req_rtab_q ? st1_req_q.pma : st1_req_pma; + + // A requester can ask to abort a request it initiated on the + // previous cycle (stage 0). Useful in case of TLB miss for example + assign st1_req_abort = core_req_abort_i & ~st1_req.phys_indexed; + + assign st1_req_is_uncacheable = ~cfg_enable_i | st1_req.pma.uncacheable, + st1_req_is_load = is_load(st1_req.op), + st1_req_is_store = is_store(st1_req.op), + st1_req_is_amo = is_amo(st1_req.op), + st1_req_is_amo_lr = is_amo_lr(st1_req.op), + st1_req_is_amo_sc = is_amo_sc(st1_req.op), + st1_req_is_amo_swap = is_amo_swap(st1_req.op), + st1_req_is_amo_add = is_amo_add(st1_req.op), + st1_req_is_amo_and = is_amo_and(st1_req.op), + st1_req_is_amo_or = is_amo_or(st1_req.op), + st1_req_is_amo_xor = is_amo_xor(st1_req.op), + st1_req_is_amo_max = is_amo_max(st1_req.op), + st1_req_is_amo_maxu = is_amo_maxu(st1_req.op), + st1_req_is_amo_min = is_amo_min(st1_req.op), + st1_req_is_amo_minu = is_amo_minu(st1_req.op), + st1_req_is_cmo_inval = is_cmo_inval(st1_req.op, st1_req.size), + st1_req_is_cmo_fence = is_cmo_fence(st1_req.op, st1_req.size), + st1_req_is_cmo_prefetch = is_cmo_prefetch(st1_req.op, st1_req.size); + // }}} + + // Refill arbiter: it arbitrates between normal requests (from the core, + // coprocessor, prefetch) and refill requests (from the miss handler). + // + // TODO This arbiter could be replaced by a weighted-round-robin arbiter. + // This way we could distribute asymetrically the bandwidth to the core + // and the refill interfaces. + // {{{ + hpdcache_rrarb #(.N(2)) st0_arb_i + ( + .clk_i, + .rst_ni, + .req_i (st0_arb_req), + .gnt_o (st0_arb_req_grant), + .ready_i (st0_arb_ready) + ); + + // The arbiter can cycle the priority token when: + // - The granted request is consumed (req_grant & req_valid & req_ready) + // - The granted request is aborted (req_grant & ~req_valid) + assign st0_arb_ready = ((st0_arb_req_grant[0] & st0_req_valid & st0_req_ready ) | + (st0_arb_req_grant[1] & refill_req_valid_i & refill_req_ready_o) | + (st0_arb_req_grant[0] & ~st0_req_valid ) | + (st0_arb_req_grant[1] & ~refill_req_valid_i)); + + assign st0_arb_req[0] = st0_req_valid, + st0_arb_req[1] = refill_req_valid_i; + + assign core_req_ready_o = st0_req_ready & ~st0_rtab_pop_try_sel, + st0_rtab_pop_try_ready = st0_req_ready & st0_rtab_pop_try_sel; + + // Trigger an event signal when the pipeline is stalled (new request is not consumed) + assign evt_stall_o = core_req_valid_i & ~core_req_ready_o; + // }}} + + // Cache controller protocol engine + // {{{ + hpdcache_ctrl_pe hpdcache_ctrl_pe_i( + .arb_st0_req_valid_i (st0_req_valid & st0_arb_req_grant[0]), + .arb_st0_req_ready_o (st0_req_ready), + .arb_refill_valid_i (refill_req_valid_i & st0_arb_req_grant[1]), + .arb_refill_ready_o (refill_req_ready_o), + .st0_req_is_uncacheable_i (st0_req_is_uncacheable), + .st0_req_need_rsp_i (st0_req.need_rsp), + .st0_req_is_load_i (st0_req_is_load), + .st0_req_is_store_i (st0_req_is_store), + .st0_req_is_amo_i (st0_req_is_amo), + .st0_req_is_cmo_fence_i (st0_req_is_cmo_fence), + .st0_req_is_cmo_inval_i (st0_req_is_cmo_inval), + .st0_req_is_cmo_prefetch_i (st0_req_is_cmo_prefetch), + .st0_req_mshr_check_o (miss_mshr_check_o), + .st0_req_cachedir_read_o (st0_req_cachedir_read), + .st0_req_cachedata_read_o (st0_req_cachedata_read), + + .st1_req_valid_i (st1_req_valid_q), + .st1_req_abort_i (st1_req_abort), + .st1_req_rtab_i (st1_req_rtab_q), + .st1_req_is_uncacheable_i (st1_req_is_uncacheable), + .st1_req_need_rsp_i (st1_req.need_rsp), + .st1_req_is_load_i (st1_req_is_load), + .st1_req_is_store_i (st1_req_is_store), + .st1_req_is_amo_i (st1_req_is_amo), + .st1_req_is_cmo_inval_i (st1_req_is_cmo_inval), + .st1_req_is_cmo_fence_i (st1_req_is_cmo_fence), + .st1_req_is_cmo_prefetch_i (st1_req_is_cmo_prefetch), + .st1_req_valid_o (st1_req_valid_d), + .st1_rsp_valid_o (st1_rsp_valid), + .st1_rsp_aborted_o (st1_rsp_aborted), + .st1_req_cachedir_updt_lru_o (st1_req_updt_lru), + .st1_req_cachedata_write_o (st1_req_cachedata_write), + .st1_req_cachedata_write_enable_o (st1_req_cachedata_write_enable), + + .st2_req_valid_i (st2_req_valid_q), + .st2_req_is_prefetch_i (st2_req_is_prefetch_q), + .st2_req_valid_o (st2_req_valid_d), + .st2_req_we_o (st2_req_we), + .st2_req_is_prefetch_o (st2_req_is_prefetch_d), + .st2_req_mshr_alloc_o (miss_mshr_alloc_o), + .st2_req_mshr_alloc_cs_o (miss_mshr_alloc_cs_o), + + .rtab_full_i (rtab_full), + .rtab_req_valid_i (st0_rtab_pop_try_valid), + .rtab_sel_o (st0_rtab_pop_try_sel), + .rtab_check_o (st1_rtab_check), + .rtab_check_hit_i (st1_rtab_check_hit), + .st1_rtab_alloc_o (st1_rtab_alloc), + .st1_rtab_alloc_and_link_o (st1_rtab_alloc_and_link), + .st1_rtab_commit_o (st1_rtab_pop_try_commit), + .st1_rtab_rback_o (st1_rtab_pop_try_rback), + .st1_rtab_mshr_hit_o (st1_rtab_mshr_hit), + .st1_rtab_mshr_full_o (st1_rtab_mshr_full), + .st1_rtab_mshr_ready_o (st1_rtab_mshr_ready), + .st1_rtab_wbuf_hit_o (st1_rtab_wbuf_hit), + .st1_rtab_wbuf_not_ready_o (st1_rtab_wbuf_not_ready), + + .cachedir_hit_i (cachedir_hit_o), + .cachedir_init_ready_i (hpdcache_init_ready), + + .mshr_alloc_ready_i (miss_mshr_alloc_ready_i), + .mshr_hit_i (miss_mshr_hit_i), + .mshr_full_i (miss_mshr_alloc_full_i), + + .refill_busy_i, + .refill_core_rsp_valid_i, + + .wbuf_write_valid_o (wbuf_write_o), + .wbuf_write_ready_i, + .wbuf_read_hit_i, + .wbuf_write_uncacheable_o, + .wbuf_read_flush_hit_o, + + .uc_busy_i, + .uc_req_valid_o, + .uc_core_rsp_ready_o, + + .cmo_busy_i, + .cmo_req_valid_o, + + .evt_cache_write_miss_o, + .evt_cache_read_miss_o, + .evt_uncached_req_o, + .evt_cmo_req_o, + .evt_write_req_o, + .evt_read_req_o, + .evt_prefetch_req_o, + .evt_req_on_hold_o, + .evt_rtab_rollback_o, + .evt_stall_refill_o + ); + + assign ctrl_empty_o = ~(st1_req_valid_q | st2_req_valid_q); + // }}} + + // Replay table + // {{{ + hpdcache_rtab #( + .rtab_entry_t (hpdcache_req_t) + ) hpdcache_rtab_i( + .clk_i, + .rst_ni, + + .empty_o (rtab_empty_o), + .full_o (rtab_full), + + .check_i (st1_rtab_check), + .check_nline_i (st1_req_nline), + .check_hit_o (st1_rtab_check_hit), + + .alloc_i (st1_rtab_alloc), + .alloc_and_link_i (st1_rtab_alloc_and_link), + .alloc_req_i (st1_req), + .alloc_mshr_hit_i (st1_rtab_mshr_hit), + .alloc_mshr_full_i (st1_rtab_mshr_full), + .alloc_mshr_ready_i (st1_rtab_mshr_ready), + .alloc_wbuf_hit_i (st1_rtab_wbuf_hit), + .alloc_wbuf_not_ready_i (st1_rtab_wbuf_not_ready), + + .pop_try_valid_o (st0_rtab_pop_try_valid), + .pop_try_i (st0_rtab_pop_try_ready), + .pop_try_req_o (st0_rtab_pop_try_req), + .pop_try_ptr_o (st0_rtab_pop_try_ptr), + + .pop_commit_i (st1_rtab_pop_try_commit), + .pop_commit_ptr_i (st1_rtab_pop_try_ptr_q), + + .pop_rback_i (st1_rtab_pop_try_rback), + .pop_rback_ptr_i (st1_rtab_pop_try_ptr_q), + .pop_rback_mshr_hit_i (st1_rtab_mshr_hit), + .pop_rback_mshr_full_i (st1_rtab_mshr_full), + .pop_rback_mshr_ready_i (st1_rtab_mshr_ready), + .pop_rback_wbuf_hit_i (st1_rtab_wbuf_hit), + .pop_rback_wbuf_not_ready_i (st1_rtab_wbuf_not_ready), + + .wbuf_addr_o (wbuf_rtab_addr_o), + .wbuf_is_read_o (wbuf_rtab_is_read_o), + .wbuf_hit_open_i (wbuf_rtab_hit_open_i), + .wbuf_hit_pend_i (wbuf_rtab_hit_pend_i), + .wbuf_hit_sent_i (wbuf_rtab_hit_sent_i), + .wbuf_not_ready_i (wbuf_rtab_not_ready_i), + + .miss_ready_i (miss_mshr_alloc_ready_i), + + .refill_i (refill_updt_rtab_i), + .refill_nline_i, + + .cfg_single_entry_i (cfg_rtab_single_entry_i) + ); + // }}} + + // Pipeline stage 1 registers + // {{{ + always_ff @(posedge clk_i) + begin : st1_req_payload_ff + if (st0_req_ready) begin + st1_req_q <= st0_req; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : st1_req_valid_ff + if (!rst_ni) begin + st1_req_valid_q <= 1'b0; + st1_req_rtab_q <= 1'b0; + st1_rtab_pop_try_ptr_q <= '0; + end else begin + st1_req_valid_q <= st1_req_valid_d; + if (st0_req_ready) begin + st1_req_rtab_q <= st0_rtab_pop_try_sel; + if (st0_rtab_pop_try_sel) begin + st1_rtab_pop_try_ptr_q <= st0_rtab_pop_try_ptr; + end + end + end + end + // }}} + + // Pipeline stage 2 registers + // {{{ + always_ff @(posedge clk_i) + begin : st2_req_payload_ff + if (st2_req_we) begin + st2_req_need_rsp_q <= st1_req.need_rsp; + st2_req_addr_q <= st1_req_addr; + st2_req_sid_q <= st1_req.sid; + st2_req_tid_q <= st1_req.tid; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : st2_req_valid_ff + if (!rst_ni) begin + st2_req_valid_q <= 1'b0; + st2_req_is_prefetch_q <= 1'b0; + end else begin + st2_req_valid_q <= st2_req_valid_d; + st2_req_is_prefetch_q <= st2_req_is_prefetch_d; + end + end + // }}} + + // Controller for the HPDcache directory and data memory arrays + // {{{ + assign st0_req_set = hpdcache_get_req_offset_set(st0_req.addr_offset), + st0_req_word = hpdcache_get_req_offset_word(st0_req.addr_offset), + st1_req_set = hpdcache_get_req_offset_set(st1_req.addr_offset), + st1_req_word = hpdcache_get_req_offset_word(st1_req.addr_offset), + st1_req_addr = {st1_req.addr_tag, st1_req.addr_offset}, + st1_req_nline = hpdcache_get_req_addr_nline(st1_req_addr), + st2_req_word = hpdcache_get_req_addr_word(st2_req_addr_q); + + hpdcache_memctrl hpdcache_memctrl_i ( + .clk_i, + .rst_ni, + + .ready_o (hpdcache_init_ready), + + .dir_match_i (st0_req_cachedir_read), + .dir_match_set_i (st0_req_set), + .dir_match_tag_i (st1_req.addr_tag), + .dir_update_lru_i (st1_req_updt_lru), + .dir_hit_way_o (st1_dir_hit), + + .dir_amo_match_i (uc_dir_amo_match_i), + .dir_amo_match_set_i (uc_dir_amo_match_set_i), + .dir_amo_match_tag_i (uc_dir_amo_match_tag_i), + .dir_amo_update_plru_i (uc_dir_amo_update_plru_i), + .dir_amo_hit_way_o (uc_dir_amo_hit_way_o), + + .dir_refill_i (refill_write_dir_i), + .dir_refill_set_i (refill_set_i), + .dir_refill_entry_i (refill_dir_entry_i), + .dir_refill_updt_plru_i (refill_updt_plru_i), + .dir_victim_way_o (refill_victim_way_o), + + .dir_cmo_check_i (cmo_dir_check_i), + .dir_cmo_check_set_i (cmo_dir_check_set_i), + .dir_cmo_check_tag_i (cmo_dir_check_tag_i), + .dir_cmo_check_hit_way_o (cmo_dir_check_hit_way_o), + + .dir_cmo_inval_i (cmo_dir_inval_i), + .dir_cmo_inval_set_i (cmo_dir_inval_set_i), + .dir_cmo_inval_way_i (cmo_dir_inval_way_i), + + .data_req_read_i (st0_req_cachedata_read), + .data_req_read_set_i (st0_req_set), + .data_req_read_size_i (st0_req.size), + .data_req_read_word_i (st0_req_word), + .data_req_read_data_o (st1_read_data), + + .data_req_write_i (st1_req_cachedata_write), + .data_req_write_enable_i (st1_req_cachedata_write_enable), + .data_req_write_set_i (st1_req_set), + .data_req_write_size_i (st1_req.size), + .data_req_write_word_i (st1_req_word), + .data_req_write_data_i (st1_req.wdata), + .data_req_write_be_i (st1_req.be), + + .data_amo_write_i (uc_data_amo_write_i), + .data_amo_write_enable_i (uc_data_amo_write_enable_i), + .data_amo_write_set_i (uc_data_amo_write_set_i), + .data_amo_write_size_i (uc_data_amo_write_size_i), + .data_amo_write_word_i (uc_data_amo_write_word_i), + .data_amo_write_data_i (uc_data_amo_write_data_i), + .data_amo_write_be_i (uc_data_amo_write_be_i), + + .data_refill_i (refill_write_data_i), + .data_refill_way_i (refill_victim_way_i), + .data_refill_set_i (refill_set_i), + .data_refill_word_i (refill_word_i), + .data_refill_data_i (refill_data_i) + ); + + assign cachedir_hit_o = |st1_dir_hit; + // }}} + + // Write buffer outputs + // {{{ + assign wbuf_write_addr_o = st1_req_addr, + wbuf_write_data_o = st1_req.wdata, + wbuf_write_be_o = st1_req.be, + wbuf_flush_all_o = cmo_wbuf_flush_all_i | uc_wbuf_flush_all_i | wbuf_flush_i; + // }}} + + // Miss handler outputs + // {{{ + assign miss_mshr_check_set_o = + st0_req.addr_offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_MSHR_SET_WIDTH]; + assign miss_mshr_check_tag_o = + st1_req_nline[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH]; + + assign miss_mshr_alloc_nline_o = hpdcache_get_req_addr_nline(st2_req_addr_q), + miss_mshr_alloc_tid_o = st2_req_tid_q, + miss_mshr_alloc_sid_o = st2_req_sid_q, + miss_mshr_alloc_word_o = st2_req_word, + miss_mshr_alloc_need_rsp_o = st2_req_need_rsp_q, + miss_mshr_alloc_is_prefetch_o = st2_req_is_prefetch_q; + // }}} + + // Uncacheable request handler outputs + // {{{ + assign uc_lrsc_snoop_o = st1_req_valid_q & st1_req_is_store, + uc_lrsc_snoop_addr_o = st1_req_addr, + uc_lrsc_snoop_size_o = st1_req.size, + uc_req_addr_o = st1_req_addr, + uc_req_size_o = st1_req.size, + uc_req_data_o = st1_req.wdata, + uc_req_be_o = st1_req.be, + uc_req_uc_o = st1_req_is_uncacheable, + uc_req_sid_o = st1_req.sid, + uc_req_tid_o = st1_req.tid, + uc_req_need_rsp_o = st1_req.need_rsp, + uc_req_op_o.is_ld = st1_req_is_load, + uc_req_op_o.is_st = st1_req_is_store, + uc_req_op_o.is_amo_lr = st1_req_is_amo_lr, + uc_req_op_o.is_amo_sc = st1_req_is_amo_sc, + uc_req_op_o.is_amo_swap = st1_req_is_amo_swap, + uc_req_op_o.is_amo_add = st1_req_is_amo_add, + uc_req_op_o.is_amo_and = st1_req_is_amo_and, + uc_req_op_o.is_amo_or = st1_req_is_amo_or, + uc_req_op_o.is_amo_xor = st1_req_is_amo_xor, + uc_req_op_o.is_amo_max = st1_req_is_amo_max, + uc_req_op_o.is_amo_maxu = st1_req_is_amo_maxu, + uc_req_op_o.is_amo_min = st1_req_is_amo_min, + uc_req_op_o.is_amo_minu = st1_req_is_amo_minu; + // }}} + + // CMO request handler outputs + // {{{ + assign cmo_req_addr_o = st1_req_addr, + cmo_req_wdata_o = st1_req.wdata, + cmo_req_op_o.is_fence = st1_req_is_cmo_fence, + cmo_req_op_o.is_inval_by_nline = st1_req_is_cmo_inval & + is_cmo_inval_by_nline(st1_req.size), + cmo_req_op_o.is_inval_by_set = st1_req_is_cmo_inval & + is_cmo_inval_by_set(st1_req.size), + cmo_req_op_o.is_inval_all = st1_req_is_cmo_inval & + is_cmo_inval_all(st1_req.size); + // }}} + + // Control of the response to the core + // {{{ + assign core_rsp_valid_o = refill_core_rsp_valid_i | + (uc_core_rsp_valid_i & uc_core_rsp_ready_o) | + st1_rsp_valid, + core_rsp_o.rdata = (refill_core_rsp_valid_i ? refill_core_rsp_i.rdata : + (uc_core_rsp_valid_i ? uc_core_rsp_i.rdata : + st1_read_data)), + core_rsp_o.sid = (refill_core_rsp_valid_i ? refill_core_rsp_i.sid : + (uc_core_rsp_valid_i ? uc_core_rsp_i.sid : + st1_req.sid)), + core_rsp_o.tid = (refill_core_rsp_valid_i ? refill_core_rsp_i.tid : + (uc_core_rsp_valid_i ? uc_core_rsp_i.tid : + st1_req.tid)), + core_rsp_o.error = (refill_core_rsp_valid_i ? refill_core_rsp_i.error : + (uc_core_rsp_valid_i ? uc_core_rsp_i.error : + /* FIXME */1'b0)), + core_rsp_o.aborted = st1_rsp_aborted; + // }}} + + // Assertions + // pragma translate_off + // {{{ + assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0({core_req_ready_o, st0_rtab_pop_try_ready, refill_req_ready_o})) else + $error("ctrl: only one request can be served per cycle"); + // }}} + // pragma translate_on +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv new file mode 100755 index 00000000..13b4f588 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv @@ -0,0 +1,620 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Control Protocol Engine + * History : + */ +module hpdcache_ctrl_pe + // Ports + // {{{ +( + // Refill arbiter + // {{{ + input logic arb_st0_req_valid_i, + output logic arb_st0_req_ready_o, + input logic arb_refill_valid_i, + output logic arb_refill_ready_o, + // }}} + + // Pipeline stage 0 + // {{{ + input logic st0_req_is_uncacheable_i, + input logic st0_req_need_rsp_i, + input logic st0_req_is_load_i, + input logic st0_req_is_store_i, + input logic st0_req_is_amo_i, + input logic st0_req_is_cmo_fence_i, + input logic st0_req_is_cmo_inval_i, + input logic st0_req_is_cmo_prefetch_i, + output logic st0_req_mshr_check_o, + output logic st0_req_cachedir_read_o, + output logic st0_req_cachedata_read_o, + // }}} + + // Pipeline stage 1 + // {{{ + input logic st1_req_valid_i, + input logic st1_req_abort_i, + input logic st1_req_rtab_i, + input logic st1_req_is_uncacheable_i, + input logic st1_req_need_rsp_i, + input logic st1_req_is_load_i, + input logic st1_req_is_store_i, + input logic st1_req_is_amo_i, + input logic st1_req_is_cmo_inval_i, + input logic st1_req_is_cmo_fence_i, + input logic st1_req_is_cmo_prefetch_i, + output logic st1_req_valid_o, + output logic st1_rsp_valid_o, + output logic st1_rsp_aborted_o, + output logic st1_req_cachedir_updt_lru_o, + output logic st1_req_cachedata_write_o, + output logic st1_req_cachedata_write_enable_o, + // }}} + + // Pipeline stage 2 + // {{{ + input logic st2_req_valid_i, + input logic st2_req_is_prefetch_i, + output logic st2_req_valid_o, + output logic st2_req_we_o, + output logic st2_req_is_prefetch_o, + output logic st2_req_mshr_alloc_o, + output logic st2_req_mshr_alloc_cs_o, + // }}} + + // Replay + // {{{ + input logic rtab_full_i, + input logic rtab_req_valid_i, + output logic rtab_sel_o, + output logic rtab_check_o, + input logic rtab_check_hit_i, + output logic st1_rtab_alloc_o, + output logic st1_rtab_alloc_and_link_o, + output logic st1_rtab_commit_o, + output logic st1_rtab_rback_o, + output logic st1_rtab_mshr_hit_o, + output logic st1_rtab_mshr_full_o, + output logic st1_rtab_mshr_ready_o, + output logic st1_rtab_wbuf_hit_o, + output logic st1_rtab_wbuf_not_ready_o, + // }}} + + // Cache directory + // {{{ + input logic cachedir_hit_i, + input logic cachedir_init_ready_i, + // }}} + + // Miss Status Holding Register (MSHR) + // {{{ + input logic mshr_alloc_ready_i, + input logic mshr_hit_i, + input logic mshr_full_i, + // }}} + + // Refill interface + // {{{ + input logic refill_busy_i, + input logic refill_core_rsp_valid_i, + // }}} + + // Write buffer + // {{{ + input logic wbuf_write_ready_i, + input logic wbuf_read_hit_i, + output logic wbuf_write_valid_o, + output logic wbuf_write_uncacheable_o, + output logic wbuf_read_flush_hit_o, + // }}} + + // Uncacheable request handler + // {{{ + input logic uc_busy_i, + output logic uc_req_valid_o, + output logic uc_core_rsp_ready_o, + // }}} + + // Cache Management Operation (CMO) + // {{{ + input logic cmo_busy_i, + output logic cmo_req_valid_o, + // }}} + + // Performance events + // {{{ + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o + // }}} +); + // }}} + + // Definition of internal signals + // {{{ + logic st1_fence; + logic st1_rtab_alloc, st1_rtab_alloc_and_link; + // }}} + + // Global control signals + // {{{ + + // Determine if the new request is a "fence". Here, fence instructions are + // considered those that need to be executed in program order + // (irrespectively of addresses). This means that all memory operations + // arrived before the "fence" instruction need to be finished, and only + // then the "fence" instruction is executed. In the same manner, all + // instructions following the "fence" need to wait the completion of this + // last before being executed. + assign st1_fence = st1_req_is_uncacheable_i | + st1_req_is_cmo_fence_i | + st1_req_is_cmo_inval_i | + st1_req_is_amo_i; + // }}} + + // Arbitration of responses to the core + // {{{ + assign uc_core_rsp_ready_o = ~refill_core_rsp_valid_i; + // }}} + + // Arbiter between core or replay request. + // {{{ + // Take the replay request when: + // - The replay table is full. + // - The replay table has a ready request (request with all dependencies solved) + // - There is an outstanding CMO or uncached/AMO request + // + // IMPORTANT: When the replay table is full, the cache cannot accept new core + // requests because this can introduce a dead-lock : If the core request needs to + // be put on hold, as there is no place the replay table, the pipeline needs to + // stall. If the pipeline is stalled, dependencies of on-hold requests cannot be + // solved, and the system is locked. + assign rtab_sel_o = rtab_full_i | + rtab_req_valid_i | + (st1_req_valid_i & st1_fence) | + cmo_busy_i | + uc_busy_i; + // }}} + + // Replay logic + // {{{ + // Replay table allocation + assign st1_rtab_alloc_o = st1_rtab_alloc & ~st1_req_rtab_i, + st1_rtab_alloc_and_link_o = st1_rtab_alloc_and_link, + st1_rtab_rback_o = st1_rtab_alloc & st1_req_rtab_i; + + // Performance event + assign evt_req_on_hold_o = st1_rtab_alloc | st1_rtab_alloc_and_link, + evt_rtab_rollback_o = st1_rtab_rback_o; + // }}} + + // Data-cache control lines + // {{{ + always_comb + begin : hpdcache_ctrl_comb + automatic logic nop, st1_nop, st2_nop; + + uc_req_valid_o = 1'b0; + + cmo_req_valid_o = 1'b0; + + wbuf_write_valid_o = 1'b0; + wbuf_read_flush_hit_o = 1'b0; + wbuf_write_uncacheable_o = 1'b0; // unused + + arb_st0_req_ready_o = 1'b0; + arb_refill_ready_o = 1'b0; + + st0_req_mshr_check_o = 1'b0; + st0_req_cachedir_read_o = 1'b0; + st0_req_cachedata_read_o = 1'b0; + + st1_req_valid_o = st1_req_valid_i; + st1_nop = 1'b0; + st1_req_cachedata_write_o = 1'b0; + st1_req_cachedata_write_enable_o = 1'b0; + st1_req_cachedir_updt_lru_o = 1'b0; + st1_rsp_valid_o = 1'b0; + st1_rsp_aborted_o = 1'b0; + + st2_req_valid_o = st2_req_valid_i; + st2_req_we_o = 1'b0; + st2_req_is_prefetch_o = 1'b0; + st2_req_mshr_alloc_cs_o = 1'b0; + st2_req_mshr_alloc_o = 1'b0; + st2_nop = 1'b0; + + nop = 1'b0; + + rtab_check_o = 1'b0; + st1_rtab_alloc = 1'b0; + st1_rtab_alloc_and_link = 1'b0; + st1_rtab_commit_o = 1'b0; + st1_rtab_mshr_hit_o = 1'b0; + st1_rtab_mshr_full_o = 1'b0; + st1_rtab_mshr_ready_o = 1'b0; + st1_rtab_wbuf_hit_o = 1'b0; + st1_rtab_wbuf_not_ready_o = 1'b0; + + evt_cache_write_miss_o = 1'b0; + evt_cache_read_miss_o = 1'b0; + evt_uncached_req_o = 1'b0; + evt_cmo_req_o = 1'b0; + evt_write_req_o = 1'b0; + evt_read_req_o = 1'b0; + evt_prefetch_req_o = 1'b0; + evt_stall_refill_o = 1'b0; + + // Wait for the cache to be initialized + // {{{ + if (!cachedir_init_ready_i) begin + // initialization of the cache RAMs + end + // }}} + + // Refilling the cache + // {{{ + else if (refill_busy_i) begin + // miss handler has the control of the cache + evt_stall_refill_o = arb_st0_req_valid_i; + end + // }}} + + // Normal pipeline operation + // {{{ + else begin + // Stage 2 request pending + // {{{ + if (st2_req_valid_i) begin + st2_req_valid_o = 1'b0; + + // Allocate an entry in the MSHR + st2_req_mshr_alloc_cs_o = 1'b1; + st2_req_mshr_alloc_o = 1'b1; + + // Introduce a NOP in the next cycle to prevent a hazard on the MSHR + st2_nop = 1'b1; + + // Performance event + evt_cache_read_miss_o = ~st2_req_is_prefetch_i; + evt_read_req_o = ~st2_req_is_prefetch_i; + evt_prefetch_req_o = st2_req_is_prefetch_i; + end + // }}} + + // Stage 1 request pending + // {{{ + if (st1_req_valid_i) begin + // Check if the request in stage 1 has a conflict with one of the + // request in the replay table. + rtab_check_o = ~st1_req_rtab_i & ~st1_fence; + + // Check if the current request is aborted. If so, respond to the + // core (when need_rsp is set) and set the aborted flag + if (st1_req_abort_i && !st1_req_rtab_i) begin + st1_rsp_valid_o = st1_req_need_rsp_i; + st1_rsp_aborted_o = 1'b1; + end + + // Allocate a new entry in the replay table in case of conflict with + // an on-hold request + else if (rtab_check_o && rtab_check_hit_i) begin + st1_rtab_alloc_and_link = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // CMO fence or invalidate + // {{{ + else if (st1_req_is_cmo_fence_i || st1_req_is_cmo_inval_i) begin + cmo_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_cmo_req_o = 1'b1; + end + // }}} + + // Uncacheable load, store or AMO request + // {{{ + else if (st1_req_is_uncacheable_i) begin + uc_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_uncached_req_o = 1'b1; + end + // }}} + + // Cacheable request + // {{{ + else begin + // AMO cacheable request + // {{{ + if (st1_req_is_amo_i) begin + uc_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_uncached_req_o = 1'b1; + end + // }}} + + // Load cacheable request + // {{{ + if (|{st1_req_is_load_i, + st1_req_is_cmo_prefetch_i}) + begin + // Cache miss + // {{{ + if (!cachedir_hit_i) begin + // If there is a match in the write buffer, lets send the + // entry right away + wbuf_read_flush_hit_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + + // Pending miss on the same line + if (mshr_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_hit_o = 1'b1; + end + + // No available slot in the MSHR + else if (mshr_full_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_full_o = 1'b1; + end + + // Hit on an open entry of the write buffer: + // wait for the entry to be acknowledged + else if (wbuf_read_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_hit_o = 1'b1; + end + + // Miss Handler is not ready to send + else if (!mshr_alloc_ready_i) begin + // Put the request on hold if the MISS HANDLER is not + // ready to send a new miss request. This is to prevent + // a deadlock between the read request channel and the + // read response channel. + // + // The request channel may be stalled by targets if they + // are not able to send a response (response is + // prioritary). Therefore, we need to put the request on + // hold to allow a possible refill read response to be + // accomplished. + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_ready_o = 1'b1; + end + + // Forward the request to the next stage to allocate the + // entry in the MSHR and send the refill request + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + st2_req_valid_o = 1'b1; + st2_req_we_o = 1'b1; + st2_req_is_prefetch_o = st1_req_is_cmo_prefetch_i; + end + end + // }}} + + // Cache hit + // {{{ + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Add a NOP when replaying a request, and there is no available + // request from the replay table. + st1_nop = st1_req_rtab_i & ~rtab_sel_o; + + // Update the PLRU bit for the accessed set + st1_req_cachedir_updt_lru_o = st1_req_is_load_i; + + // Respond to the core (if needed) + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Performance event + evt_read_req_o = ~st1_req_is_cmo_prefetch_i; + evt_prefetch_req_o = st1_req_is_cmo_prefetch_i; + end + // }}} + end + // }}} + + // Store cacheable request + // {{{ + if (st1_req_is_store_i) begin + // Write in the write buffer if there is no pending miss in the same line. + // + // We assume here that the NoC that transports read and write transactions does + // not guaranty the order between transactions on those channels. + // Therefore, the cache must hold a write if there is a pending read on the + // same address. + wbuf_write_valid_o = ~mshr_hit_i; + + // Add a NOP in the pipeline when: + // - Structural hazard on the cache data if the st0 request is a load + // operation. + // - Replaying a request, the cache cannot accept a request from the + // core the next cycle. It can however accept a new request from the + // replay table + // + // IMPORTANT: we could remove the NOP in the first scenario if the + // controller checks for the hit of this write. However, this adds + // a DIR_RAM -> DATA_RAM timing path. + st1_nop = (arb_st0_req_valid_i & st0_req_is_load_i) | + (st1_req_rtab_i & ~rtab_sel_o); + + // Enable the data RAM in case of write. However, the actual write + // depends on the hit signal from the cache directory. + // + // IMPORTANT: this produces unnecessary power consumption in case of + // write misses, but removes timing paths between the cache directory + // RAM and the data RAM chip-select. + st1_req_cachedata_write_o = 1'b1; + + // Cache miss + if (!cachedir_hit_i) begin + // Pending miss on the same line + if (mshr_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_hit_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // No available entry in the write buffer (or conflict on pending entry) + else if (!wbuf_write_ready_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_not_ready_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Respond to the core (if needed) + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Performance event + evt_cache_write_miss_o = 1'b1; + evt_write_req_o = 1'b1; + end + end + + // Cache hit + else begin + // No available entry in the write buffer (or conflict on pending entry) + if (!wbuf_write_ready_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_not_ready_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // The store can be performed in the write buffer and in the cache + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Respond to the core + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Update the PLRU bit for the accessed set + st1_req_cachedir_updt_lru_o = 1'b1; + + // Write in the data RAM + st1_req_cachedata_write_enable_o = 1'b1; + + // Performance event + evt_write_req_o = 1'b1; + end + end + end + // }}} + end + // }}} + end + // }}} + + // New request + // {{{ + nop = st1_nop | st2_nop; + + // The cache controller accepts a core request when: + // - The req-refill arbiter grants the request + // - The pipeline is not being flushed + arb_st0_req_ready_o = arb_st0_req_valid_i & ~nop; + + // The cache controller accepts a refill when: + // - The req-refill arbiter grants the refill + // - The pipeline is empty + arb_refill_ready_o = arb_refill_valid_i & ~(st1_req_valid_i | st2_req_valid_i); + + // Forward the request to stage 1 + // - There is a valid request in stage 0 + st1_req_valid_o = arb_st0_req_ready_o; + + // New cacheable stage 0 request granted + // {{{ + // IMPORTANT: here the RAM is enabled independently if the + // request needs to be put on-hold. + // This increases the power consumption in that cases, but + // removes the timing paths RAM-to-RAM between the cache + // directory and the data array. + if (arb_st0_req_valid_i && !st0_req_is_uncacheable_i) begin + st0_req_cachedata_read_o = + st0_req_is_load_i & + ~(st1_req_valid_i & st1_req_is_store_i & ~st1_req_is_uncacheable_i); + if (st0_req_is_load_i | + st0_req_is_cmo_prefetch_i | + st0_req_is_store_i | + st0_req_is_amo_i ) + begin + st0_req_mshr_check_o = 1'b1; + st0_req_cachedir_read_o = ~st0_req_is_amo_i; + end + end + // }}} + // }}} + end + // }}} end of normal pipeline operation + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv new file mode 100644 index 00000000..7c7ee659 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv @@ -0,0 +1,120 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Directory and Data Memory Arrays + * History : + */ +module hpdcache_memarray +import hpdcache_pkg::*; + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + input hpdcache_dir_addr_t dir_addr_i, + input hpdcache_way_vector_t dir_cs_i, + input hpdcache_way_vector_t dir_we_i, + input hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry_i, + output hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry_o, + + input hpdcache_data_addr_t data_addr_i, + input hpdcache_data_enable_t data_cs_i, + input hpdcache_data_enable_t data_we_i, + input hpdcache_data_be_entry_t data_wbyteenable_i, + input hpdcache_data_entry_t data_wentry_i, + output hpdcache_data_entry_t data_rentry_o +); + // }}} + + // Memory arrays + // {{{ + generate + genvar x, y, dir_w; + + // Directory + // + for (dir_w = 0; dir_w < int'(HPDCACHE_WAYS); dir_w++) begin : dir_sram_gen + hpdcache_sram #( + .DATA_SIZE (HPDCACHE_DIR_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DIR_RAM_ADDR_WIDTH) + ) dir_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (dir_cs_i[dir_w]), + .we (dir_we_i[dir_w]), + .addr (dir_addr_i), + .wdata (dir_wentry_i[dir_w]), + .rdata (dir_rentry_o[dir_w]) + ); + end + + // Data + // + for (y = 0; y < int'(HPDCACHE_DATA_RAM_Y_CUTS); y++) begin : data_sram_row_gen + for (x = 0; x < int'(HPDCACHE_DATA_RAM_X_CUTS); x++) begin : data_sram_col_gen + if (HPDCACHE_DATA_RAM_WBYTEENABLE) begin : data_sram_wbyteenable_gen + hpdcache_sram_wbyteenable #( + .DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH) + ) data_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (data_cs_i[y][x]), + .we (data_we_i[y][x]), + .addr (data_addr_i[y][x]), + .wdata (data_wentry_i[y][x]), + .wbyteenable (data_wbyteenable_i[y][x]), + .rdata (data_rentry_o[y][x]) + ); + end else begin : data_sram_wmask_gen + hpdcache_data_ram_data_t data_wmask; + + // build the bitmask from the write byte enable signal + always_comb + begin : data_wmask_comb + for (int w = 0; w < HPDCACHE_DATA_WAYS_PER_RAM_WORD; w++) begin + for (int b = 0; b < HPDCACHE_WORD_WIDTH/8; b++) begin + data_wmask[w][8*b +: 8] = {8{data_wbyteenable_i[y][x][w][b]}}; + end + end + end + + hpdcache_sram_wmask #( + .DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH) + ) data_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (data_cs_i[y][x]), + .we (data_we_i[y][x]), + .addr (data_addr_i[y][x]), + .wdata (data_wentry_i[y][x]), + .wmask (data_wmask), + .rdata (data_rentry_o[y][x]) + ); + end + end + end + endgenerate + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv new file mode 100644 index 00000000..c87cc3f5 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv @@ -0,0 +1,656 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Directory and Data Memory RAMs Controller + * History : + */ +module hpdcache_memctrl +import hpdcache_pkg::*; + // Ports + // {{{ +( + // Global clock and reset signals + // {{{ + input logic clk_i, + input logic rst_ni, + // }}} + + // Global control signals + // {{{ + output logic ready_o, + // }}} + + // DIR array access interface + // {{{ + input logic dir_match_i, + input hpdcache_set_t dir_match_set_i, + input hpdcache_tag_t dir_match_tag_i, + input logic dir_update_lru_i, + output hpdcache_way_vector_t dir_hit_way_o, + + input logic dir_amo_match_i, + input hpdcache_set_t dir_amo_match_set_i, + input hpdcache_tag_t dir_amo_match_tag_i, + input logic dir_amo_update_plru_i, + output hpdcache_way_vector_t dir_amo_hit_way_o, + + input logic dir_refill_i, + input hpdcache_set_t dir_refill_set_i, + input hpdcache_dir_entry_t dir_refill_entry_i, + input logic dir_refill_updt_plru_i, + output hpdcache_way_vector_t dir_victim_way_o, + + input logic dir_cmo_check_i, + input hpdcache_set_t dir_cmo_check_set_i, + input hpdcache_tag_t dir_cmo_check_tag_i, + output hpdcache_way_vector_t dir_cmo_check_hit_way_o, + + input logic dir_cmo_inval_i, + input hpdcache_set_t dir_cmo_inval_set_i, + input hpdcache_way_vector_t dir_cmo_inval_way_i, + + // }}} + + // DATA array access interface + // {{{ + input logic data_req_read_i, + input hpdcache_set_t data_req_read_set_i, + input hpdcache_req_size_t data_req_read_size_i, + input hpdcache_word_t data_req_read_word_i, + output hpdcache_req_data_t data_req_read_data_o, + + input logic data_req_write_i, + input logic data_req_write_enable_i, + input hpdcache_set_t data_req_write_set_i, + input hpdcache_req_size_t data_req_write_size_i, + input hpdcache_word_t data_req_write_word_i, + input hpdcache_req_data_t data_req_write_data_i, + input hpdcache_req_be_t data_req_write_be_i, + + input logic data_amo_write_i, + input logic data_amo_write_enable_i, + input hpdcache_set_t data_amo_write_set_i, + input hpdcache_req_size_t data_amo_write_size_i, + input hpdcache_word_t data_amo_write_word_i, + input logic [63:0] data_amo_write_data_i, + input logic [7:0] data_amo_write_be_i, + + input logic data_refill_i, + input hpdcache_way_vector_t data_refill_way_i, + input hpdcache_set_t data_refill_set_i, + input hpdcache_word_t data_refill_word_i, + input hpdcache_refill_data_t data_refill_data_i + // }}} +); + // }}} + + // Definition of constants + // {{{ + localparam int unsigned HPDCACHE_ALL_CUTS = HPDCACHE_DATA_RAM_X_CUTS*HPDCACHE_DATA_RAM_Y_CUTS; + localparam int unsigned HPDCACHE_DATA_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS; + // }}} + + // Definition of functions + // {{{ + + // hpdcache_compute_data_ram_cs + // + // description: This function computes the chip-select signal for data + // RAMs depending on the request size and the word offset + function automatic hpdcache_data_row_enable_t hpdcache_compute_data_ram_cs( + input hpdcache_req_size_t size_i, + input hpdcache_word_t word_i); + + localparam hpdcache_uint32 off_width = + HPDCACHE_ACCESS_WORDS > 1 ? $clog2(HPDCACHE_ACCESS_WORDS) : 1; + + hpdcache_data_row_enable_t ret; + hpdcache_uint32 off; + + case (size_i) + 3'h0, + 3'h1, + 3'h2, + 3'h3: ret = hpdcache_data_row_enable_t'({ 64/HPDCACHE_WORD_WIDTH{1'b1}}); + 3'h4: ret = hpdcache_data_row_enable_t'({128/HPDCACHE_WORD_WIDTH{1'b1}}); + 3'h5: ret = hpdcache_data_row_enable_t'({256/HPDCACHE_WORD_WIDTH{1'b1}}); + default: ret = hpdcache_data_row_enable_t'({512/HPDCACHE_WORD_WIDTH{1'b1}}); + endcase + + off = HPDCACHE_ACCESS_WORDS > 1 ? hpdcache_uint'(word_i[0 +: off_width]) : 0; + return hpdcache_data_row_enable_t'(ret << off); + endfunction + + function automatic hpdcache_data_ram_row_idx_t hpdcache_way_to_data_ram_row( + input hpdcache_way_vector_t way); + for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_data_ram_row_idx_t'(i / HPDCACHE_DATA_WAYS_PER_RAM_WORD); + end + return 0; + endfunction + + function automatic hpdcache_data_ram_way_idx_t hpdcache_way_to_data_ram_word( + input hpdcache_way_vector_t way); + for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_data_ram_way_idx_t'(i % HPDCACHE_DATA_WAYS_PER_RAM_WORD); + end + return 0; + endfunction + + function automatic hpdcache_data_ram_addr_t hpdcache_set_to_data_ram_addr( + input hpdcache_set_t set, + input hpdcache_word_t word); + hpdcache_uint ret; + + ret = (hpdcache_uint'(set)*(HPDCACHE_CL_WORDS / HPDCACHE_ACCESS_WORDS)) + + (hpdcache_uint'(word) / HPDCACHE_ACCESS_WORDS); + + return hpdcache_data_ram_addr_t'(ret); + endfunction + // }}} + + // Definition of internal signals and registers + // {{{ + genvar gen_i, gen_j, gen_k; + + // Directory initialization signals and registers + logic init_q, init_d; + hpdcache_dir_addr_t init_set_q, init_set_d; + hpdcache_way_vector_t init_dir_cs; + hpdcache_way_vector_t init_dir_we; + hpdcache_dir_entry_t init_dir_wentry; + + // Directory valid bit vector (one bit per set and way) + hpdcache_way_vector_t [HPDCACHE_SETS-1:0] dir_valid_q, dir_valid_d; + hpdcache_set_t dir_req_set_q, dir_req_set_d; + hpdcache_dir_addr_t dir_addr; + hpdcache_way_vector_t dir_cs; + hpdcache_way_vector_t dir_we; + hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry; + hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry; + + hpdcache_data_addr_t data_addr; + hpdcache_data_enable_t data_cs; + hpdcache_data_enable_t data_we; + hpdcache_data_be_entry_t data_wbyteenable; + hpdcache_data_entry_t data_wentry; + hpdcache_data_entry_t data_rentry; + + logic data_write; + logic data_write_enable; + hpdcache_set_t data_write_set; + hpdcache_req_size_t data_write_size; + hpdcache_word_t data_write_word; + hpdcache_refill_data_t data_write_data; + hpdcache_refill_be_t data_write_be; + + hpdcache_refill_data_t data_req_write_data; + hpdcache_refill_be_t data_req_write_be; + + hpdcache_refill_data_t data_amo_write_data; + hpdcache_refill_be_t data_amo_write_be; + + hpdcache_way_vector_t data_way; + + hpdcache_data_ram_row_idx_t data_ram_row; + hpdcache_data_ram_way_idx_t data_ram_word; + + // }}} + + // Init FSM + // {{{ + always_comb + begin : init_comb + init_dir_wentry.tag = '0; + init_dir_wentry.reserved = '0; + init_dir_cs = '0; + init_dir_we = '0; + init_d = init_q; + init_set_d = init_set_q; + + case (init_q) + 1'b0: begin + init_d = (hpdcache_uint'(init_set_q) == (HPDCACHE_SETS - 1)); + init_set_d = init_set_q + 1; + init_dir_cs = '1; + init_dir_we = '1; + end + + 1'b1: begin + init_d = 1'b1; + init_set_d = init_set_q; + end + endcase + end + + assign ready_o = init_q; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : init_ff + if (!rst_ni) begin + init_q <= 1'b0; + init_set_q <= 0; + dir_valid_q <= '0; + end else begin + init_q <= init_d; + init_set_q <= init_set_d; + dir_valid_q <= dir_valid_d; + end + end + // }}} + + // Memory arrays + // {{{ + hpdcache_memarray hpdcache_memarray_i( + .clk_i, + .rst_ni, + + .dir_addr_i (dir_addr), + .dir_cs_i (dir_cs), + .dir_we_i (dir_we), + .dir_wentry_i (dir_wentry), + .dir_rentry_o (dir_rentry), + + .data_addr_i (data_addr), + .data_cs_i (data_cs), + .data_we_i (data_we), + .data_wbyteenable_i (data_wbyteenable), + .data_wentry_i (data_wentry), + .data_rentry_o (data_rentry) + ); + // }}} + + // Directory RAM request mux + // {{{ + always_comb + begin : dir_ctrl_comb + case (1'b1) + // Cache directory initialization + ~init_q: begin + dir_addr = init_set_q; + dir_cs = init_dir_cs; + dir_we = init_dir_we; + dir_wentry = {HPDCACHE_WAYS{init_dir_wentry}}; + end + + // Cache directory match tag -> hit + dir_match_i: begin + dir_addr = dir_match_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Cache directory AMO match tag -> hit + dir_amo_match_i: begin + dir_addr = dir_amo_match_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Cache directory update + dir_refill_i: begin + dir_addr = dir_refill_set_i; + dir_cs = dir_victim_way_o; + dir_we = dir_victim_way_o; + dir_wentry = {HPDCACHE_WAYS{dir_refill_entry_i}}; + end + + // Cache directory CMO match tag + dir_cmo_check_i: begin + dir_addr = dir_cmo_check_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Do nothing + default: begin + dir_addr = '0; + dir_cs = '0; + dir_we = '0; + dir_wentry = '0; + end + endcase + end + // }}} + + // Directory valid logic + // {{{ + always_comb + begin : dir_valid_comb + dir_valid_d = dir_valid_q; + + unique case (1'b1) + // Refill the cache after a miss + dir_refill_i: begin + dir_valid_d[dir_refill_set_i] = dir_valid_q[dir_refill_set_i] | dir_victim_way_o; + end + // CMO invalidate a set + dir_cmo_inval_i: begin + dir_valid_d[dir_cmo_inval_set_i] = dir_valid_q[dir_cmo_inval_set_i] & ~dir_cmo_inval_way_i; + end + default: begin + // do nothing + end + endcase + end + // }}} + + // Directory hit logic + // {{{ + assign dir_req_set_d = dir_match_i ? dir_match_set_i : + dir_amo_match_i ? dir_amo_match_set_i : + dir_cmo_check_i ? dir_cmo_check_set_i : + dir_req_set_q ; + + generate + hpdcache_way_vector_t req_hit; + hpdcache_way_vector_t amo_hit; + hpdcache_way_vector_t cmo_hit; + + for (gen_i = 0; gen_i < int'(HPDCACHE_WAYS); gen_i++) + begin : dir_match_tag_gen + assign req_hit[gen_i] = (dir_rentry[gen_i].tag == dir_match_tag_i), + amo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_amo_match_tag_i), + cmo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_cmo_check_tag_i); + + assign dir_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & req_hit[gen_i], + dir_amo_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & amo_hit[gen_i], + dir_cmo_check_hit_way_o[gen_i] = dir_valid_q[dir_req_set_q][gen_i] & cmo_hit[gen_i]; + end + endgenerate + // }}} + + // Directory victim select logic + // {{{ + logic plru_updt; + hpdcache_way_vector_t plru_updt_way; + + assign plru_updt = dir_update_lru_i | dir_amo_update_plru_i, + plru_updt_way = dir_update_lru_i ? dir_hit_way_o : dir_amo_hit_way_o; + + hpdcache_plru #( + .SETS (HPDCACHE_SETS), + .WAYS (HPDCACHE_WAYS) + ) plru_i ( + .clk_i, + .rst_ni, + + .updt_i (plru_updt), + .updt_set_i (dir_req_set_q), + .updt_way_i (plru_updt_way), + + .repl_i (dir_refill_i), + .repl_set_i (dir_refill_set_i), + .repl_dir_valid_i (dir_valid_q[dir_refill_set_i]), + .repl_updt_plru_i (dir_refill_updt_plru_i), + + .victim_way_o (dir_victim_way_o) + ); + // }}} + + // Data RAM request multiplexor + // {{{ + + // Upsize the request interface to match the maximum access width of the data RAM + generate + if (HPDCACHE_DATA_REQ_RATIO > 1) begin : upsize_data_req_write_gen + // demux request DATA + assign data_req_write_data = {HPDCACHE_DATA_REQ_RATIO{data_req_write_data_i}}; + + // demux request BE + hpdcache_demux #( + .NOUTPUT (HPDCACHE_DATA_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8), + .ONE_HOT_SEL (1'b0) + ) data_req_write_be_demux_i ( + .data_i (data_req_write_be_i), + .sel_i (data_req_write_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +: + $clog2(HPDCACHE_DATA_REQ_RATIO)]), + .data_o (data_req_write_be) + ); + end else begin + assign data_req_write_data = data_req_write_data_i, + data_req_write_be = data_req_write_be_i; + end + endgenerate + + // Upsize the AMO data interface to match the maximum access width of the data RAM + generate + localparam hpdcache_uint AMO_DATA_RATIO = HPDCACHE_DATA_RAM_ACCESS_WIDTH/64; + localparam hpdcache_uint AMO_DATA_INDEX_WIDTH = $clog2(AMO_DATA_RATIO); + + if (AMO_DATA_RATIO > 1) begin + assign data_amo_write_data = {AMO_DATA_RATIO{data_amo_write_data_i}}; + + hpdcache_demux #( + .NOUTPUT (AMO_DATA_RATIO), + .DATA_WIDTH (8), + .ONE_HOT_SEL (1'b0) + ) amo_be_demux_i ( + .data_i (data_amo_write_be_i), + .sel_i (data_amo_write_word_i[0 +: AMO_DATA_INDEX_WIDTH]), + .data_o (data_amo_write_be) + ); + end else begin + assign data_amo_write_data = data_amo_write_data_i, + data_amo_write_be = data_amo_write_be_i; + end + endgenerate + + // Multiplex between data write requests + always_comb + begin : data_write_comb + case (1'b1) + data_refill_i: begin + data_write = 1'b1; + data_write_enable = 1'b1; + data_write_set = data_refill_set_i; + data_write_size = hpdcache_req_size_t'($clog2(HPDCACHE_DATA_RAM_ACCESS_WIDTH/8)); + data_write_word = data_refill_word_i; + data_write_data = data_refill_data_i; + data_write_be = '1; + end + + data_req_write_i: begin + data_write = 1'b1; + data_write_enable = data_req_write_enable_i; + data_write_set = data_req_write_set_i; + data_write_size = data_req_write_size_i; + data_write_word = data_req_write_word_i; + data_write_data = data_req_write_data; + data_write_be = data_req_write_be; + end + + data_amo_write_i: begin + data_write = 1'b1; + data_write_enable = data_amo_write_enable_i; + data_write_set = data_amo_write_set_i; + data_write_size = data_amo_write_size_i; + data_write_word = data_amo_write_word_i; + data_write_data = data_amo_write_data; + data_write_be = data_amo_write_be; + end + + default: begin + data_write = 1'b0; + data_write_enable = 1'b0; + data_write_set = '0; + data_write_size = '0; + data_write_word = '0; + data_write_data = '0; + data_write_be = '0; + end + endcase + end + + // Multiplex between read and write access on the data RAM + assign data_way = data_refill_i ? data_refill_way_i : + data_amo_write_i ? dir_amo_hit_way_o : + dir_hit_way_o; + + // Decode way index + assign data_ram_word = hpdcache_way_to_data_ram_word(data_way), + data_ram_row = hpdcache_way_to_data_ram_row(data_way); + + always_comb + begin : data_ctrl_comb + case (1'b1) + // Select data read inputs + data_req_read_i: begin + data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_req_read_set_i, + data_req_read_word_i)}}; + + data_we = '0; + data_wbyteenable = '0; + data_wentry = '0; + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + data_cs[i] = hpdcache_compute_data_ram_cs(data_req_read_size_i, + data_req_read_word_i); + end + end + + // Select data write inputs + data_write: begin + data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_write_set, + data_write_word)}}; + + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + for (int unsigned j = 0; j < HPDCACHE_DATA_RAM_X_CUTS; j++) begin + data_wentry[i][j] = {HPDCACHE_DATA_WAYS_PER_RAM_WORD{data_write_data[j]}}; + end + end + + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + data_cs[i] = hpdcache_compute_data_ram_cs(data_write_size, data_write_word); + + if (i == hpdcache_uint'(data_ram_row)) begin + data_we[i] = data_write_enable ? data_cs[i] : '0; + end else begin + data_we[i] = '0; + end + + // Build the write mask + for (int unsigned j = 0; j < HPDCACHE_ACCESS_WORDS; j++) begin + for (int unsigned k = 0; k < HPDCACHE_DATA_WAYS_PER_RAM_WORD; k++) begin + data_wbyteenable[i][j][k] = (k == hpdcache_uint'(data_ram_word)) ? + data_write_be[j] : '0; + end + end + end + end + + // Do nothing + default: begin + data_addr = '0; + data_cs = '0; + data_we = '0; + data_wbyteenable = '0; + data_wentry = '0; + end + endcase + end + // }}} + + // Data RAM read data multiplexor + // {{{ + generate + hpdcache_req_data_t [HPDCACHE_DATA_REQ_RATIO-1:0][HPDCACHE_WAYS-1:0] data_read_words; + hpdcache_req_data_t [HPDCACHE_WAYS-1:0] data_read_req_word; + + // Organize the read data by words (all ways for the same word are contiguous) + for (gen_i = 0; gen_i < int'(HPDCACHE_DATA_REQ_RATIO); gen_i++) begin + for (gen_j = 0; gen_j < int'(HPDCACHE_WAYS); gen_j++) begin + for (gen_k = 0; gen_k < int'(HPDCACHE_REQ_WORDS); gen_k++) begin + assign data_read_words[gen_i][gen_j][gen_k] = + data_rentry[(gen_j / HPDCACHE_DATA_WAYS_PER_RAM_WORD)] + [(gen_i * HPDCACHE_REQ_WORDS ) + gen_k] + [(gen_j % HPDCACHE_DATA_WAYS_PER_RAM_WORD)]; + end + end + end + + // Mux the data according to the access word + if (HPDCACHE_DATA_REQ_RATIO > 1) begin : req_width_lt_ram_width + typedef logic [$clog2(HPDCACHE_DATA_REQ_RATIO)-1:0] data_req_word_t; + data_req_word_t data_read_req_word_index_q; + + hpdcache_mux #( + .NINPUT (HPDCACHE_DATA_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH*HPDCACHE_WAYS) + ) data_read_req_word_mux_i( + .data_i (data_read_words), + .sel_i (data_read_req_word_index_q), + .data_o (data_read_req_word) + ); + + always_ff @(posedge clk_i) + begin : data_req_read_word_ff + data_read_req_word_index_q <= + data_req_read_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +: + $clog2(HPDCACHE_DATA_REQ_RATIO)]; + end + end + + // Request data interface width is equal to the data RAM width + else begin : req_width_eq_ram_width + assign data_read_req_word = data_read_words; + end + + // Mux the data according to the hit way + hpdcache_mux #( + .NINPUT (HPDCACHE_WAYS), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH), + .ONE_HOT_SEL (1'b1) + ) data_read_req_word_way_mux_i( + .data_i (data_read_req_word), + .sel_i (dir_hit_way_o), + .data_o (data_req_read_data_o) + ); + endgenerate + + + // Delay the accessed set for checking the tag from the directory in the + // next cycle (hit logic) + always_ff @(posedge clk_i) + begin : req_read_ff + if (dir_match_i || dir_amo_match_i || dir_cmo_check_i) begin + dir_req_set_q <= dir_req_set_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + concurrent_dir_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0({dir_match_i, dir_amo_match_i, dir_cmo_check_i, dir_refill_i})) else + $error("hpdcache_memctrl: more than one process is accessing the cache directory"); + + concurrent_data_access_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + $onehot0({data_req_read_i, data_req_write_i, data_amo_write_i, data_refill_i})) else + $error("hpdcache_memctrl: more than one process is accessing the cache data"); + // pragma translate_on + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv new file mode 100644 index 00000000..97ecf466 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv @@ -0,0 +1,659 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Miss Handler + * History : + */ +module hpdcache_miss_handler +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + output logic mshr_empty_o, + output logic mshr_full_o, + // }}} + + // Configuration signals + // {{{ + input logic cfg_prefetch_updt_plru_i, + // }}} + + // CHECK interface + // {{{ + input logic mshr_check_i, + input mshr_set_t mshr_check_set_i, + input mshr_tag_t mshr_check_tag_i, + output logic mshr_check_hit_o, + // }}} + + // MISS interface + // {{{ + // MISS request interface + output logic mshr_alloc_ready_o, + input logic mshr_alloc_i, + input logic mshr_alloc_cs_i, + input hpdcache_nline_t mshr_alloc_nline_i, + output logic mshr_alloc_full_o, + input hpdcache_req_tid_t mshr_alloc_tid_i, + input hpdcache_req_sid_t mshr_alloc_sid_i, + input hpdcache_word_t mshr_alloc_word_i, + input logic mshr_alloc_need_rsp_i, + input logic mshr_alloc_is_prefetch_i, + + // REFILL MISS interface + input logic refill_req_ready_i, + output logic refill_req_valid_o, + output logic refill_busy_o, + output logic refill_updt_plru_o, + output hpdcache_set_t refill_set_o, + output hpdcache_dir_entry_t refill_dir_entry_o, + input hpdcache_way_vector_t refill_victim_way_i, + output logic refill_write_dir_o, + output logic refill_write_data_o, + output hpdcache_way_vector_t refill_victim_way_o, + output hpdcache_refill_data_t refill_data_o, + output hpdcache_word_t refill_word_o, + output hpdcache_nline_t refill_nline_o, + output logic refill_updt_rtab_o, + + // REFILL core response interface + output logic refill_core_rsp_valid_o, + output hpdcache_rsp_t refill_core_rsp_o, + // }}} + + // MEMORY interface + // {{{ + input logic mem_req_ready_i, + output logic mem_req_valid_o, + output hpdcache_mem_req_t mem_req_o, + + output logic mem_resp_ready_o, + input logic mem_resp_valid_i, + input hpdcache_mem_resp_r_t mem_resp_i + // }}} +); +// }}} + + // Declaration of constants and types + // {{{ + localparam int unsigned REFILL_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS; + + typedef enum logic { + MISS_REQ_IDLE = 1'b0, + MISS_REQ_SEND = 1'b1 + } miss_req_fsm_e; + + typedef enum { + REFILL_IDLE, + REFILL_WRITE, + REFILL_WRITE_DIR + } refill_fsm_e; + + typedef struct packed { + hpdcache_mem_error_e r_error; + hpdcache_mem_id_t r_id; + } mem_resp_metadata_t; + + function automatic mshr_set_t get_ack_mshr_set(hpdcache_mem_id_t id); + return id[0 +: HPDCACHE_MSHR_SET_WIDTH]; + endfunction + + function automatic mshr_way_t get_ack_mshr_way(hpdcache_mem_id_t id); + return id[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_WAY_WIDTH]; + endfunction + // }}} + + // Declaration of internal signals and registers + // {{{ + miss_req_fsm_e miss_req_fsm_q, miss_req_fsm_d; + mshr_way_t mshr_alloc_way_q, mshr_alloc_way_d; + mshr_set_t mshr_alloc_set_q, mshr_alloc_set_d; + mshr_tag_t mshr_alloc_tag_q, mshr_alloc_tag_d; + + refill_fsm_e refill_fsm_q, refill_fsm_d; + hpdcache_set_t refill_set_q; + hpdcache_tag_t refill_tag_q; + hpdcache_way_vector_t refill_way_q; + hpdcache_req_sid_t refill_sid_q; + hpdcache_req_tid_t refill_tid_q; + hpdcache_word_t refill_cnt_q, refill_cnt_d; + logic refill_need_rsp_q; + logic refill_is_prefetch_q; + hpdcache_word_t refill_core_rsp_word_q; + logic refill_way_bypass; + + mem_resp_metadata_t refill_fifo_resp_meta_wdata, refill_fifo_resp_meta_rdata; + logic refill_fifo_resp_meta_w, refill_fifo_resp_meta_wok; + logic refill_fifo_resp_meta_r, refill_fifo_resp_meta_rok; + + logic refill_fifo_resp_data_w, refill_fifo_resp_data_wok; + hpdcache_refill_data_t refill_fifo_resp_data_rdata; + logic refill_fifo_resp_data_r; + + logic refill_core_rsp_valid; + hpdcache_req_data_t refill_core_rsp_rdata; + hpdcache_req_sid_t refill_core_rsp_sid; + hpdcache_req_tid_t refill_core_rsp_tid; + logic refill_core_rsp_error; + hpdcache_word_t refill_core_rsp_word; + hpdcache_rsp_t refill_core_rsp; + + logic refill_is_error; + + logic mshr_alloc; + logic mshr_alloc_cs; + logic mshr_ack; + logic mshr_ack_cs; + mshr_set_t mshr_ack_set; + mshr_way_t mshr_ack_way; + hpdcache_nline_t mshr_ack_nline; + hpdcache_req_sid_t mshr_ack_src_id; + hpdcache_req_tid_t mshr_ack_req_id; + hpdcache_word_t mshr_ack_word; + logic mshr_ack_need_rsp; + logic mshr_ack_is_prefetch; + logic mshr_empty; + // }}} + + // Miss Request FSM + // {{{ + always_comb + begin : miss_req_fsm_comb + mshr_alloc_ready_o = 1'b0; + mshr_alloc = 1'b0; + mshr_alloc_cs = 1'b0; + mem_req_valid_o = 1'b0; + + miss_req_fsm_d = miss_req_fsm_q; + + case (miss_req_fsm_q) + MISS_REQ_IDLE: begin + mshr_alloc_ready_o = 1'b1; + mshr_alloc = mshr_alloc_i; + mshr_alloc_cs = mshr_alloc_cs_i; + if (mshr_alloc_i) begin + miss_req_fsm_d = MISS_REQ_SEND; + end else begin + miss_req_fsm_d = MISS_REQ_IDLE; + end + end + MISS_REQ_SEND: begin + mem_req_valid_o = 1'b1; + if (mem_req_ready_i) begin + miss_req_fsm_d = MISS_REQ_IDLE; + end else begin + miss_req_fsm_d = MISS_REQ_SEND; + end + end + endcase + end + + localparam hpdcache_uint REFILL_REQ_SIZE = $clog2(HPDcacheMemDataWidth/8); + localparam hpdcache_uint REFILL_REQ_LEN = HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth; + + assign mem_req_o.mem_req_addr = {mshr_alloc_tag_q, mshr_alloc_set_q, {HPDCACHE_OFFSET_WIDTH{1'b0}} }, + mem_req_o.mem_req_len = hpdcache_mem_len_t'(REFILL_REQ_LEN-1), + mem_req_o.mem_req_size = hpdcache_mem_size_t'(REFILL_REQ_SIZE), + mem_req_o.mem_req_id = hpdcache_mem_id_t'({mshr_alloc_way_q, mshr_alloc_set_q}), + mem_req_o.mem_req_command = HPDCACHE_MEM_READ, + mem_req_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD, + mem_req_o.mem_req_cacheable = 1'b1; + + always_ff @(posedge clk_i) + begin : miss_req_fsm_internal_ff + if (mshr_alloc) begin + mshr_alloc_way_q <= mshr_alloc_way_d; + mshr_alloc_set_q <= mshr_alloc_set_d; + mshr_alloc_tag_q <= mshr_alloc_tag_d; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : miss_req_fsm_ff + if (!rst_ni) begin + miss_req_fsm_q <= MISS_REQ_IDLE; + end else begin + miss_req_fsm_q <= miss_req_fsm_d; + end + end + // }}} + + // Refill FSM + // {{{ + + // ask permission to the refill arbiter if there is a pending refill + assign refill_req_valid_o = refill_fsm_q == REFILL_IDLE ? refill_fifo_resp_meta_rok : 1'b0; + + // forward the victim way directly from the victim selection logic or + // from the internal register + assign refill_victim_way_o = refill_way_bypass ? refill_victim_way_i : refill_way_q; + + always_comb + begin : miss_resp_fsm_comb + automatic hpdcache_uint REFILL_LAST_CHUNK_WORD; + REFILL_LAST_CHUNK_WORD = HPDCACHE_CL_WORDS - HPDCACHE_ACCESS_WORDS; + + refill_updt_plru_o = 1'b0; + refill_set_o = '0; + refill_write_dir_o = 1'b0; + refill_write_data_o = 1'b0; + refill_updt_rtab_o = 1'b0; + refill_cnt_d = refill_cnt_q; + refill_way_bypass = 1'b0; + + refill_core_rsp_valid = 1'b0; + refill_core_rsp_sid = '0; + refill_core_rsp_tid = '0; + refill_core_rsp_error = 1'b0; + refill_core_rsp_word = 0; + + refill_fifo_resp_meta_r = 1'b0; + refill_fifo_resp_data_r = 1'b0; + + mshr_ack_cs = 1'b0; + mshr_ack = 1'b0; + + refill_fsm_d = refill_fsm_q; + + case (refill_fsm_q) + // Wait for refill responses + // {{{ + REFILL_IDLE: begin + if (refill_fifo_resp_meta_rok) begin + // anticipate the activation of the MSHR independently of the grant signal from + // the refill arbiter. This is to avoid the introduction of unnecessary timing + // paths (however there could be a minor augmentation of the power + // consumption). + mshr_ack_cs = 1'b1; + + // if the permission is granted, start refilling + if (refill_req_ready_i) begin + refill_fsm_d = REFILL_WRITE; + + // read the MSHR and reset the valid bit for the + // corresponding entry + mshr_ack = 1'b1; + + // initialize the counter for refill words + refill_cnt_d = 0; + end + end + end + // }}} + + // Write refill data into the cache + // {{{ + REFILL_WRITE: begin + automatic logic is_prefetch; + + // Respond to the core (when needed) + if (refill_cnt_q == 0) begin + automatic hpdcache_uint _core_rsp_word; + _core_rsp_word = hpdcache_uint'(mshr_ack_word)/HPDCACHE_ACCESS_WORDS; + + if (mshr_ack_need_rsp) begin + refill_core_rsp_valid = (hpdcache_uint'(_core_rsp_word) == 0); + end + + refill_core_rsp_sid = mshr_ack_src_id; + refill_core_rsp_tid = mshr_ack_req_id; + refill_core_rsp_error = refill_is_error; + refill_core_rsp_word = hpdcache_word_t'( + hpdcache_uint'(mshr_ack_word)/HPDCACHE_REQ_WORDS); + end else begin + automatic hpdcache_uint _core_rsp_word; + _core_rsp_word = hpdcache_uint'(refill_core_rsp_word_q)/ + HPDCACHE_ACCESS_WORDS; + + if (refill_need_rsp_q) begin + automatic hpdcache_uint _refill_cnt; + _refill_cnt = hpdcache_uint'(refill_cnt_q)/HPDCACHE_ACCESS_WORDS; + refill_core_rsp_valid = (_core_rsp_word == _refill_cnt); + end + + refill_core_rsp_sid = refill_sid_q; + refill_core_rsp_tid = refill_tid_q; + refill_core_rsp_error = refill_is_error; + refill_core_rsp_word = hpdcache_word_t'( + hpdcache_uint'(refill_core_rsp_word_q)/HPDCACHE_REQ_WORDS); + end + + // Write the the data in the cache data array + if (refill_cnt_q == 0) begin + refill_set_o = mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH]; + refill_way_bypass = 1'b1; + is_prefetch = mshr_ack_is_prefetch; + end else begin + refill_set_o = refill_set_q; + refill_way_bypass = 1'b0; + is_prefetch = refill_is_prefetch_q; + end + refill_write_data_o = ~refill_is_error; + + // Consume chunk of data from the FIFO buffer in the memory interface + refill_fifo_resp_data_r = 1'b1; + + // Update directory on the last chunk of data + refill_cnt_d = refill_cnt_q + hpdcache_word_t'(HPDCACHE_ACCESS_WORDS); + + if (hpdcache_uint'(refill_cnt_q) == REFILL_LAST_CHUNK_WORD) begin + if (REFILL_LAST_CHUNK_WORD == 0) begin + // Special case: if the cache-line data can be written in a single cycle, + // wait an additional cycle to write the directory. This allows to prevent + // a RAM-to-RAM timing path between the MSHR and the DIR. + refill_fsm_d = REFILL_WRITE_DIR; + end else begin + // Write the new entry in the cache directory + refill_write_dir_o = ~refill_is_error; + + // Update the PLRU bits. Only in the following cases: + // - There is no error in response AND + // - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR + // - It is a read miss. + refill_updt_plru_o = ~refill_is_error & + (~is_prefetch | cfg_prefetch_updt_plru_i); + + // Update dependency flags in the retry table + refill_updt_rtab_o = 1'b1; + + // consume the response from the network + refill_fifo_resp_meta_r = 1'b1; + + refill_fsm_d = REFILL_IDLE; + end + end + end + // }}} + + // Write cache directory (this state is only visited when ACCESS_WORDS == CL_WORDS, + // this is when the entire cache-line can be written in a single cycle) + // {{{ + REFILL_WRITE_DIR: begin + automatic logic is_prefetch; + is_prefetch = refill_is_prefetch_q; + + // Select the target set and way + refill_set_o = refill_set_q; + refill_way_bypass = 1'b0; + + // Write the new entry in the cache directory + refill_write_dir_o = ~refill_is_error; + + // Update the PLRU bits. Only in the following cases: + // - There is no error in response AND + // - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR + // - It is a read miss. + refill_updt_plru_o = ~refill_is_error & + (~is_prefetch | cfg_prefetch_updt_plru_i); + + // Update dependency flags in the retry table + refill_updt_rtab_o = 1'b1; + + // consume the response from the network + refill_fifo_resp_meta_r = 1'b1; + + refill_fsm_d = REFILL_IDLE; + end + // }}} + + default: begin + // pragma translate_off + $error("Illegal state"); + // pragma translate_on + end + endcase + end + + assign refill_is_error = (refill_fifo_resp_meta_rdata.r_error == HPDCACHE_MEM_RESP_NOK); + + assign refill_busy_o = (refill_fsm_q != REFILL_IDLE), + refill_nline_o = {refill_tag_q, refill_set_q}, + refill_word_o = refill_cnt_q; + + assign mshr_ack_set = get_ack_mshr_set(refill_fifo_resp_meta_rdata.r_id), + mshr_ack_way = get_ack_mshr_way(refill_fifo_resp_meta_rdata.r_id); + + assign refill_dir_entry_o.tag = refill_tag_q, + refill_dir_entry_o.reserved = '0; + + assign refill_core_rsp.rdata = refill_core_rsp_rdata, + refill_core_rsp.sid = refill_core_rsp_sid, + refill_core_rsp.tid = refill_core_rsp_tid, + refill_core_rsp.error = refill_core_rsp_error, + refill_core_rsp.aborted = 1'b0; + + hpdcache_fifo_reg #( + .FIFO_DEPTH (1), + .FEEDTHROUGH (HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH), + .fifo_data_t (hpdcache_rsp_t) + ) i_refill_core_rsp_buf( + .clk_i, + .rst_ni, + .w_i (refill_core_rsp_valid), + .wok_o (/*unused*/), + .wdata_i (refill_core_rsp), + .r_i (1'b1), // core shall always be ready to consume a response + .rok_o (refill_core_rsp_valid_o), + .rdata_o (refill_core_rsp_o) + ); + + generate + // refill's width is bigger than the width of the core's interface + if (REFILL_REQ_RATIO > 1) begin : core_rsp_data_mux_gen + hpdcache_mux #( + .NINPUT (REFILL_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH) + ) data_read_rsp_mux_i( + .data_i (refill_data_o), + .sel_i (refill_core_rsp_word[0 +: $clog2(REFILL_REQ_RATIO)]), + .data_o (refill_core_rsp_rdata) + ); + end + + // refill's width is equal to the width of the core's interface + else begin + assign refill_core_rsp_rdata = refill_data_o; + end + endgenerate + + /* FIXME: when multiple chunks, in case of error, the error bit is not + * necessarily set on all chunks */ + assign refill_fifo_resp_meta_wdata = '{ + r_error: mem_resp_i.mem_resp_r_error, + r_id : mem_resp_i.mem_resp_r_id + }; + + hpdcache_fifo_reg #( + .FIFO_DEPTH (2), + .fifo_data_t (mem_resp_metadata_t) + ) i_r_metadata_fifo ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_meta_w), + .wok_o (refill_fifo_resp_meta_wok), + .wdata_i(refill_fifo_resp_meta_wdata), + + .r_i (refill_fifo_resp_meta_r), + .rok_o (refill_fifo_resp_meta_rok), + .rdata_o(refill_fifo_resp_meta_rdata) + ); + + generate + if (HPDcacheMemDataWidth < HPDCACHE_REFILL_DATA_WIDTH) begin + hpdcache_data_upsize #( + .WR_WIDTH(HPDcacheMemDataWidth), + .RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH), + .DEPTH(2*(HPDCACHE_CL_WIDTH/HPDCACHE_REFILL_DATA_WIDTH)) + ) i_rdata_upsize ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wlast_i (mem_resp_i.mem_resp_r_last), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end else if (HPDcacheMemDataWidth > HPDCACHE_REFILL_DATA_WIDTH) begin + hpdcache_data_downsize #( + .WR_WIDTH(HPDcacheMemDataWidth), + .RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH), + .DEPTH(2*(HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth)) + ) i_rdata_downsize ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end else begin + hpdcache_fifo_reg #( + .FIFO_DEPTH (2), + .fifo_data_t (hpdcache_refill_data_t) + ) i_rdata_fifo ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end + endgenerate + + assign refill_data_o = refill_fifo_resp_data_rdata; + + assign refill_fifo_resp_data_w = mem_resp_valid_i & + (refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last), + refill_fifo_resp_meta_w = mem_resp_valid_i & + (refill_fifo_resp_data_wok & mem_resp_i.mem_resp_r_last), + mem_resp_ready_o = refill_fifo_resp_data_wok & + (refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last); + + always_ff @(posedge clk_i or negedge rst_ni) + begin : miss_resp_fsm_ff + if (!rst_ni) begin + refill_fsm_q <= REFILL_IDLE; + end else begin + refill_fsm_q <= refill_fsm_d; + end + end + + always_ff @(posedge clk_i) + begin : miss_resp_fsm_internal_ff + if ((refill_fsm_q == REFILL_WRITE) && (refill_cnt_q == 0)) begin + refill_set_q <= mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH]; + refill_tag_q <= mshr_ack_nline[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];; + refill_way_q <= refill_victim_way_i; + refill_sid_q <= mshr_ack_src_id; + refill_tid_q <= mshr_ack_req_id; + refill_need_rsp_q <= mshr_ack_need_rsp; + refill_is_prefetch_q <= mshr_ack_is_prefetch; + refill_core_rsp_word_q <= mshr_ack_word; + end + refill_cnt_q <= refill_cnt_d; + end + // }}} + + // Miss Status Holding Register component + // {{{ + hpdcache_mshr hpdcache_mshr_i ( + .clk_i, + .rst_ni, + + .empty_o (mshr_empty), + .full_o (mshr_full_o), + + .check_i (mshr_check_i), + .check_set_i (mshr_check_set_i), + .check_tag_i (mshr_check_tag_i), + .hit_o (mshr_check_hit_o), + .alloc_i (mshr_alloc), + .alloc_cs_i (mshr_alloc_cs), + .alloc_nline_i (mshr_alloc_nline_i), + .alloc_req_id_i (mshr_alloc_tid_i), + .alloc_src_id_i (mshr_alloc_sid_i), + .alloc_word_i (mshr_alloc_word_i), + .alloc_need_rsp_i (mshr_alloc_need_rsp_i), + .alloc_is_prefetch_i (mshr_alloc_is_prefetch_i), + .alloc_full_o (mshr_alloc_full_o), + .alloc_set_o (mshr_alloc_set_d), + .alloc_tag_o (mshr_alloc_tag_d), + .alloc_way_o (mshr_alloc_way_d), + + .ack_i (mshr_ack), + .ack_cs_i (mshr_ack_cs), + .ack_set_i (mshr_ack_set), + .ack_way_i (mshr_ack_way), + .ack_req_id_o (mshr_ack_req_id), + .ack_src_id_o (mshr_ack_src_id), + .ack_nline_o (mshr_ack_nline), + .ack_word_o (mshr_ack_word), + .ack_need_rsp_o (mshr_ack_need_rsp), + .ack_is_prefetch_o (mshr_ack_is_prefetch) + ); + + // Indicate to the cache controller that there is no pending miss. This + // is, when the MSHR is empty, and the MISS handler has finished of + // processing the last miss response. + assign mshr_empty_o = mshr_empty & ~refill_busy_o; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_SET_WIDTH + HPDCACHE_MSHR_WAY_WIDTH)) else + $error("miss_handler: not enough ID bits in the memory interface"); + // pragma translate_on + // }}} + +endmodule +// }}} diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv new file mode 100644 index 00000000..f63e408f --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv @@ -0,0 +1,385 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Miss Status Holding Register (MSHR) + * History : + */ +module hpdcache_mshr +import hpdcache_pkg::*; + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + + // Check and allocation interface + input logic check_i, + input mshr_set_t check_set_i, + input mshr_tag_t check_tag_i, + output logic hit_o, + input logic alloc_i, + input logic alloc_cs_i, + input hpdcache_nline_t alloc_nline_i, + input hpdcache_req_tid_t alloc_req_id_i, + input hpdcache_req_sid_t alloc_src_id_i, + input hpdcache_word_t alloc_word_i, + input logic alloc_need_rsp_i, + input logic alloc_is_prefetch_i, + output logic alloc_full_o, + output mshr_set_t alloc_set_o, + output mshr_tag_t alloc_tag_o, + output mshr_way_t alloc_way_o, + + // Acknowledge interface + input logic ack_i, + input logic ack_cs_i, + input mshr_set_t ack_set_i, + input mshr_way_t ack_way_i, + output hpdcache_req_tid_t ack_req_id_o, + output hpdcache_req_sid_t ack_src_id_o, + output hpdcache_nline_t ack_nline_o, + output hpdcache_word_t ack_word_o, + output logic ack_need_rsp_o, + output logic ack_is_prefetch_o +); + // }}} + + // Definition of constants and types + // {{{ + typedef struct packed { + mshr_tag_t tag; + hpdcache_req_tid_t req_id; + hpdcache_req_sid_t src_id; + hpdcache_word_t word_idx; + logic need_rsp; + logic is_prefetch; + } mshr_entry_t; + + + // Compute the width of MSHR entries depending on the support of write + // bitmask or not (write byte enable) + localparam int unsigned HPDCACHE_MSHR_ENTRY_BITS = $bits(mshr_entry_t); + + localparam int unsigned HPDCACHE_MSHR_RAM_ENTRY_BITS = + HPDCACHE_MSHR_RAM_WBYTEENABLE ? + ((HPDCACHE_MSHR_ENTRY_BITS + 7)/8) * 8 : // align to 8 bits + HPDCACHE_MSHR_ENTRY_BITS; // or use the exact number of bits + + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_data_t; + // }}} + + // Definition of internal wires and registers + // {{{ + logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_q, mshr_valid_d; + mshr_set_t check_set_q; + mshr_set_t alloc_set; + mshr_tag_t alloc_tag; + hpdcache_set_t alloc_dcache_set; + mshr_way_t ack_way_q; + mshr_set_t ack_set_q; + hpdcache_set_t ack_dcache_set; + hpdcache_tag_t ack_dcache_tag; + + logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_set, mshr_valid_rst; + mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wentry; + mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wdata; + mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rentry; + mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rdata; + + logic mshr_we; + logic mshr_cs; + mshr_set_t mshr_addr; + logic check; + // }}} + + // Control part for the allocation and check operations + // {{{ + + // The allocation operation is prioritary with respect to the check operation + assign check = check_i & ~alloc_i; + + assign alloc_set = alloc_nline_i[0 +: HPDCACHE_MSHR_SET_WIDTH], + alloc_tag = alloc_nline_i[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH], + alloc_dcache_set = alloc_nline_i[0 +: HPDCACHE_SET_WIDTH]; + + // Look for an available way in case of allocation + always_comb + begin + automatic mshr_way_t found_available_way; + + found_available_way = 0; + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(alloc_set)]) begin + found_available_way = mshr_way_t'(i); + break; + end + end + alloc_way_o = found_available_way; + end + + // Look if the mshr can accept the checked nline (in case of allocation) + always_comb + begin + automatic bit found_available; + + found_available = 1'b0; + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(check_set_q)]) begin + found_available = 1'b1; + break; + end + end + alloc_full_o = ~found_available; + end + + assign alloc_set_o = alloc_set, + alloc_tag_o = alloc_tag; + + // Write when there is an allocation operation + assign mshr_we = alloc_i; + + // HPDcache SET to MSHR SET translation table + hpdcache_mshr_to_cache_set trlt_i ( + .clk_i, + .write_i (mshr_we), + .write_dcache_set_i (alloc_dcache_set), + .write_mshr_way_i (alloc_way_o), + .read_mshr_set_i (ack_set_q), + .read_mshr_way_i (ack_way_q), + .read_dcache_set_o (ack_dcache_set) + ); + + + // Generate write data and mask depending on the available way + always_comb + begin + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wentry[i].tag = alloc_tag; + mshr_wentry[i].req_id = alloc_req_id_i; + mshr_wentry[i].src_id = alloc_src_id_i; + mshr_wentry[i].word_idx = alloc_word_i; + mshr_wentry[i].need_rsp = alloc_need_rsp_i; + mshr_wentry[i].is_prefetch = alloc_is_prefetch_i; + end + end + // }}} + + // Shared control signals + // {{{ + assign mshr_cs = check_i | alloc_cs_i | ack_cs_i; + assign mshr_addr = ack_i ? ack_set_i : + (alloc_i ? alloc_set : check_set_i); + + always_comb + begin : mshr_valid_comb + automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_alloc_slot; + automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_ack_slot; + + mshr_alloc_slot = {alloc_way_o, alloc_set}; + mshr_ack_slot = { ack_way_i, ack_set_i}; + + for (int unsigned i = 0; i < HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS; i++) begin + mshr_valid_rst[i] = (i == hpdcache_uint'(mshr_ack_slot)) ? ack_i : 1'b0; + mshr_valid_set[i] = (i == hpdcache_uint'(mshr_alloc_slot)) ? alloc_i : 1'b0; + end + end + assign mshr_valid_d = (~mshr_valid_q & mshr_valid_set) | (mshr_valid_q & ~mshr_valid_rst); + // }}} + + // Read interface (ack) + // {{{ + generate + // extract HPDcache tag from the MSb of the MSHT TAG + if (HPDCACHE_SETS >= HPDCACHE_MSHR_SETS) begin : ack_dcache_set_ge_mshr_set_gen + assign ack_dcache_tag = mshr_rentry[ack_way_q].tag[ + HPDCACHE_MSHR_TAG_WIDTH - 1 : + HPDCACHE_MSHR_TAG_WIDTH - HPDCACHE_TAG_WIDTH]; + end + + // extract HPDcache tag from MSb of the MSHR set concatenated with the MSHR tag + else begin : ack_dcache_set_lt_mshr_set_gen + assign ack_dcache_tag = { + mshr_rentry[ack_way_q].tag , + ack_set_q[HPDCACHE_MSHR_SET_WIDTH - 1:HPDCACHE_SET_WIDTH]}; + end + endgenerate + + assign ack_req_id_o = mshr_rentry[ack_way_q].req_id, + ack_src_id_o = mshr_rentry[ack_way_q].src_id, + ack_nline_o = {ack_dcache_tag, ack_dcache_set}, + ack_word_o = mshr_rentry[ack_way_q].word_idx, + ack_need_rsp_o = mshr_rentry[ack_way_q].need_rsp, + ack_is_prefetch_o = mshr_rentry[ack_way_q].is_prefetch; + // }}} + + // Global control signals + // {{{ + assign empty_o = ~|mshr_valid_q; + assign full_o = &mshr_valid_q; + + always_comb + begin : hit_comb + automatic bit [HPDCACHE_MSHR_WAYS-1:0] __hit_way; + + for (int unsigned w = 0; w < HPDCACHE_MSHR_WAYS; w++) begin + automatic bit __valid; + automatic bit __match; + __valid = mshr_valid_q[w*HPDCACHE_MSHR_SETS + int'(check_set_q)]; + __match = (mshr_rentry[w].tag == check_tag_i); + __hit_way[w] = (__valid && __match); + end + + hit_o = |__hit_way; + end + // }}} + + // Internal state assignment + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : mshr_ff_set + if (!rst_ni) begin + mshr_valid_q <= '0; + ack_way_q <= '0; + ack_set_q <= '0; + check_set_q <= '0; + end else begin + mshr_valid_q <= mshr_valid_d; + if (ack_i) begin + ack_way_q <= ack_way_i; + ack_set_q <= ack_set_i; + end + if (check) begin + check_set_q <= check_set_i; + end + end + end + // }}} + + // Internal components + // {{{ + generate + if (HPDCACHE_MSHR_RAM_WBYTEENABLE) begin : mshr_wbyteenable_gen + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS/8-1:0] mshr_sram_wbyteenable_t; + mshr_sram_wbyteenable_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wbyteenable; + + always_comb + begin : mshr_wbyteenable_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wbyteenable[i] = (int'(alloc_way_o) == i) ? '1 : '0; + end + end + + if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen + hpdcache_regbank_wbyteenable_1rw #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wbyteenable (mshr_wbyteenable), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end else begin : mshr_sram_gen + hpdcache_sram_wbyteenable #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wbyteenable (mshr_wbyteenable), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end + end else begin : mshr_wmask_gen + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_wmask_t; + mshr_sram_wmask_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wmask; + + always_comb + begin : mshr_wmask_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wmask[i] = (int'(alloc_way_o) == i) ? '1 : '0; + end + end + + if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen + hpdcache_regbank_wmask_1rw #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wmask (mshr_wmask), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end else begin : mshr_sram_gen + hpdcache_sram_wmask #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wmask (mshr_wmask), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end + end + endgenerate + + always_comb + begin : ram_word_fitting_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wdata[i] = mshr_sram_data_t'(mshr_wentry[i]); + mshr_rentry[i] = mshr_entry_t'(mshr_rdata[i][0 +: HPDCACHE_MSHR_ENTRY_BITS]); + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + one_command_assert: assert property (@(posedge clk_i) + (ack_i -> !(alloc_i || check_i))) else + $error("MSHR: ack with concurrent alloc or check"); + // pragma translate_on + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv new file mode 100644 index 00000000..3dc8b73a --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv @@ -0,0 +1,105 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache MSHR set translation table + * History : + */ +module hpdcache_mshr_to_cache_set +import hpdcache_pkg::*; +// Ports +// {{{ +( + // Clock signals + input logic clk_i, + + // Write interface + input logic write_i, + input hpdcache_set_t write_dcache_set_i, + input mshr_way_t write_mshr_way_i, + + // Read interface + input mshr_way_t read_mshr_way_i, + input mshr_set_t read_mshr_set_i, + output hpdcache_set_t read_dcache_set_o +); +// }}} + // + + generate + // Number of HPDcache sets is bigger than the MSHR sets + // In this case, a translation table (in flip-flops) is needed + // {{{ + // Write most significant bits of the HPDcache set into the + // translation table + if (HPDCACHE_SETS > HPDCACHE_MSHR_SETS) begin : hpdcache_sets_gt_mshr_sets_gen + localparam hpdcache_uint TRLT_TAB_ENTRY_WIDTH = + HPDCACHE_SET_WIDTH - HPDCACHE_MSHR_SET_WIDTH; + typedef logic [TRLT_TAB_ENTRY_WIDTH-1:0] trlt_entry_t; + + + // Translation table + // + // This table is used to store the most significant bits of the HPDcache set + trlt_entry_t [HPDCACHE_MSHR_SETS-1:0][HPDCACHE_MSHR_WAYS-1:0] tab; + trlt_entry_t tab_wdata; + mshr_set_t write_mshr_set; + + // Write operation + // {{{ + // Write most significant bits of the HPDcache set into the + // translation table + always_ff @(posedge clk_i) + begin + if (write_i) begin + tab[write_mshr_set][write_mshr_way_i] <= tab_wdata; + end + end + + assign tab_wdata = write_dcache_set_i[HPDCACHE_MSHR_SET_WIDTH +: + TRLT_TAB_ENTRY_WIDTH], + write_mshr_set = write_dcache_set_i[0 +: HPDCACHE_MSHR_SET_WIDTH]; + // }}} + + // Read operation + // {{{ + // Concatenate the mshr set with the most significant bits of the + // dcache set stored in the translation table + assign read_dcache_set_o = {tab[read_mshr_set_i][read_mshr_way_i], read_mshr_set_i}; + // }}} + end + // }}} + + // Number of HPDcache sets is smaller or equal than the MSHR sets + // In this case, no translation table is needed + // {{{ + else begin : hpdcache_sets_le_mshr_sets_gen + assign read_dcache_set_o = hpdcache_set_t'(read_mshr_set_i); + end + // }}} + endgenerate + +// Assertions +// {{{ +// pragma translate_off +// pragma translate_on +// }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv new file mode 100755 index 00000000..8a96a169 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv @@ -0,0 +1,623 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Write-Through (WT), High-Throughput (HTPUT) HPDcache Package + * History : + */ +package hpdcache_pkg; + // Definition of global constants for the HPDcache data and directory + // {{{ + + // HPDcache physical address width (bits) + localparam int unsigned HPDCACHE_PA_WIDTH = hpdcache_params_pkg::PARAM_PA_WIDTH; + + // HPDcache number of sets + localparam int unsigned HPDCACHE_SETS = hpdcache_params_pkg::PARAM_SETS; + + // HPDcache number of ways + localparam int unsigned HPDCACHE_WAYS = hpdcache_params_pkg::PARAM_WAYS; + + // HPDcache word width (bits) + localparam int unsigned HPDCACHE_WORD_WIDTH = hpdcache_params_pkg::PARAM_WORD_WIDTH; + + // HPDcache cache-line width (bits) + localparam int unsigned HPDCACHE_CL_WORDS = hpdcache_params_pkg::PARAM_CL_WORDS; + + // HPDcache number of words in the request data channels (request and response) + localparam int unsigned HPDCACHE_REQ_WORDS = hpdcache_params_pkg::PARAM_REQ_WORDS; + + // HPDcache request transaction ID width (bits) + localparam int unsigned HPDCACHE_REQ_TRANS_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_TRANS_ID_WIDTH; + + // HPDcache request source ID width (bits) + localparam int unsigned HPDCACHE_REQ_SRC_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_SRC_ID_WIDTH; + // }}} + + // Utility definitions + // {{{ + typedef logic unsigned [31:0] hpdcache_uint; + typedef logic signed [31:0] hpdcache_int; + typedef logic unsigned [31:0] hpdcache_uint32; + typedef logic signed [31:0] hpdcache_int32; + typedef logic unsigned [63:0] hpdcache_uint64; + typedef logic signed [63:0] hpdcache_int64; + // }}} + + // Definition of constants and types for HPDcache directory memory + // {{{ + localparam int unsigned HPDCACHE_CL_WIDTH = HPDCACHE_CL_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_OFFSET_WIDTH = $clog2(HPDCACHE_CL_WIDTH/8); + localparam int unsigned HPDCACHE_NLINE_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_OFFSET_WIDTH; + localparam int unsigned HPDCACHE_SET_WIDTH = $clog2(HPDCACHE_SETS); + localparam int unsigned HPDCACHE_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_SET_WIDTH; + localparam int unsigned HPDCACHE_WORD_IDX_WIDTH = $clog2(HPDCACHE_CL_WORDS); + + typedef logic unsigned [ HPDCACHE_OFFSET_WIDTH-1:0] hpdcache_offset_t; + typedef logic unsigned [ HPDCACHE_NLINE_WIDTH-1:0] hpdcache_nline_t; + typedef logic unsigned [ HPDCACHE_SET_WIDTH-1:0] hpdcache_set_t; + typedef logic unsigned [ HPDCACHE_TAG_WIDTH-1:0] hpdcache_tag_t; + typedef logic unsigned [ $clog2(HPDCACHE_WAYS)-1:0] hpdcache_way_t; + typedef logic unsigned [ HPDCACHE_WAYS-1:0] hpdcache_way_vector_t; + typedef logic unsigned [HPDCACHE_WORD_IDX_WIDTH-1:0] hpdcache_word_t; + + typedef struct packed { + hpdcache_tag_t tag; + logic [1:0] reserved; + } hpdcache_dir_entry_t; + + localparam int unsigned HPDCACHE_DIR_RAM_WIDTH = $bits(hpdcache_dir_entry_t); + localparam int unsigned HPDCACHE_DIR_RAM_DEPTH = HPDCACHE_SETS; + localparam int unsigned HPDCACHE_DIR_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DIR_RAM_DEPTH); + + typedef logic [HPDCACHE_DIR_RAM_ADDR_WIDTH-1:0] hpdcache_dir_addr_t; + + function automatic hpdcache_way_t hpdcache_way_vector_to_index(input hpdcache_way_vector_t way); + for (int unsigned i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_way_t'(i); + end + return 0; + endfunction + + // }}} + + // Definition of constants and types for HPDcache data memory + // {{{ + localparam int unsigned HPDCACHE_DATA_WAYS_PER_RAM_WORD = + hpdcache_params_pkg::PARAM_DATA_WAYS_PER_RAM_WORD; + + localparam int unsigned HPDCACHE_DATA_SETS_PER_RAM = /* FIXME this parameter is currently ignored */ + hpdcache_params_pkg::PARAM_DATA_SETS_PER_RAM; + + // HPDcache DATA RAM implements write byte enable + localparam bit HPDCACHE_DATA_RAM_WBYTEENABLE = + hpdcache_params_pkg::PARAM_DATA_RAM_WBYTEENABLE; + + // Define the number of memory contiguous words that can be accessed + // simultaneously from the cache. + // - This limits the maximum width for the data channel from requesters + // - This impacts the refill latency + localparam int unsigned HPDCACHE_ACCESS_WORDS = hpdcache_params_pkg::PARAM_ACCESS_WORDS; + + + localparam int unsigned HPDCACHE_DATA_RAM_WIDTH = + HPDCACHE_DATA_WAYS_PER_RAM_WORD*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_DATA_RAM_Y_CUTS = HPDCACHE_WAYS/HPDCACHE_DATA_WAYS_PER_RAM_WORD; + localparam int unsigned HPDCACHE_DATA_RAM_X_CUTS = HPDCACHE_ACCESS_WORDS; + localparam int unsigned HPDCACHE_DATA_RAM_ACCESS_WIDTH = HPDCACHE_ACCESS_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_DATA_RAM_ENTR_PER_SET = HPDCACHE_CL_WORDS/HPDCACHE_ACCESS_WORDS; + localparam int unsigned HPDCACHE_DATA_RAM_DEPTH = HPDCACHE_SETS*HPDCACHE_DATA_RAM_ENTR_PER_SET; + localparam int unsigned HPDCACHE_DATA_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DATA_RAM_DEPTH); + + typedef logic [ HPDCACHE_WORD_WIDTH-1:0] hpdcache_data_word_t; + typedef logic [ HPDCACHE_WORD_WIDTH/8-1:0] hpdcache_data_be_t; + typedef logic [ $clog2(HPDCACHE_DATA_RAM_Y_CUTS)-1:0] hpdcache_data_ram_row_idx_t; + typedef logic [ $clog2(HPDCACHE_DATA_WAYS_PER_RAM_WORD)-1:0] hpdcache_data_ram_way_idx_t; + + typedef logic [HPDCACHE_DATA_RAM_ADDR_WIDTH-1:0] hpdcache_data_ram_addr_t; + typedef hpdcache_data_word_t[HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_data_t; + typedef hpdcache_data_be_t [HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_be_t; + + typedef hpdcache_data_ram_data_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_entry_t; + + typedef hpdcache_data_ram_be_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_be_entry_t; + + typedef logic + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_row_enable_t; + + typedef hpdcache_data_row_enable_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + hpdcache_data_enable_t; + + typedef hpdcache_data_ram_addr_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_addr_t; + // }}} + + // Definition of interface with miss handler + // {{{ + localparam int unsigned HPDCACHE_REFILL_DATA_WIDTH = HPDCACHE_DATA_RAM_ACCESS_WIDTH; + + // Use feedthrough FIFOs from the refill handler to the core. This + // reduces the latency (by one cycle) but adds an additional timing path + localparam bit HPDCACHE_REFILL_CORE_RSP_FEEDTHROUGH = + hpdcache_params_pkg::PARAM_REFILL_CORE_RSP_FEEDTHROUGH; + + typedef hpdcache_data_word_t[HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_data_t; + typedef hpdcache_data_be_t [HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_be_t; + // }}} + + // Definition of interface with requesters + // {{{ + localparam int unsigned HPDCACHE_REQ_DATA_WIDTH = HPDCACHE_REQ_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_REQ_DATA_BYTES = HPDCACHE_REQ_DATA_WIDTH/8; + localparam int unsigned HPDCACHE_REQ_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_WORDS); + localparam int unsigned HPDCACHE_REQ_BYTE_OFFSET_WIDTH = $clog2(HPDCACHE_REQ_DATA_BYTES); + localparam int unsigned HPDCACHE_REQ_OFFSET_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_TAG_WIDTH; + + typedef logic [HPDCACHE_PA_WIDTH-1:0] hpdcache_req_addr_t; + typedef logic [HPDCACHE_REQ_OFFSET_WIDTH-1:0] hpdcache_req_offset_t; + typedef hpdcache_data_word_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_data_t; + typedef hpdcache_data_be_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_be_t; + typedef logic [2:0] hpdcache_req_size_t; + typedef logic [HPDCACHE_REQ_SRC_ID_WIDTH-1:0] hpdcache_req_sid_t; + typedef logic [HPDCACHE_REQ_TRANS_ID_WIDTH-1:0] hpdcache_req_tid_t; + + // Definition of operation codes + // {{{ + typedef enum logic [3:0] { + HPDCACHE_REQ_LOAD = 4'h0, + HPDCACHE_REQ_STORE = 4'h1, + // RESERVED = 4'h2, + // RESERVED = 4'h3, + HPDCACHE_REQ_AMO_LR = 4'h4, + HPDCACHE_REQ_AMO_SC = 4'h5, + HPDCACHE_REQ_AMO_SWAP = 4'h6, + HPDCACHE_REQ_AMO_ADD = 4'h7, + HPDCACHE_REQ_AMO_AND = 4'h8, + HPDCACHE_REQ_AMO_OR = 4'h9, + HPDCACHE_REQ_AMO_XOR = 4'ha, + HPDCACHE_REQ_AMO_MAX = 4'hb, + HPDCACHE_REQ_AMO_MAXU = 4'hc, + HPDCACHE_REQ_AMO_MIN = 4'hd, + HPDCACHE_REQ_AMO_MINU = 4'he, + HPDCACHE_REQ_CMO = 4'hf + } hpdcache_req_op_t; + // }}} + + // Definition of CMO codes + // {{{ + typedef enum hpdcache_req_size_t { + HPDCACHE_REQ_CMO_FENCE = 3'h0, + // RESERVED = 3'h1, + HPDCACHE_REQ_CMO_INVAL_NLINE = 3'h2, + HPDCACHE_REQ_CMO_INVAL_SET_WAY = 3'h3, + HPDCACHE_REQ_CMO_INVAL_ALL = 3'h4, + HPDCACHE_REQ_CMO_PREFETCH = 3'h5 + } hpdcache_req_cmo_t; + // }}} + + // Definition of PMA flags + // {{{ + typedef struct packed + { + logic uncacheable; + logic io; // FIXME: for future use + } hpdcache_pma_t; + // }}} + + // Definition of interfaces + // {{{ + // Request Interface + typedef struct packed + { + hpdcache_req_offset_t addr_offset; + hpdcache_req_data_t wdata; + hpdcache_req_op_t op; + hpdcache_req_be_t be; + hpdcache_req_size_t size; + hpdcache_req_sid_t sid; + hpdcache_req_tid_t tid; + logic need_rsp; + + // only valid in case of physically indexed requests + logic phys_indexed; + hpdcache_tag_t addr_tag; + hpdcache_pma_t pma; + } hpdcache_req_t; + + // Response Interface + typedef struct packed + { + hpdcache_req_data_t rdata; + hpdcache_req_sid_t sid; + hpdcache_req_tid_t tid; + logic error; + logic aborted; + } hpdcache_rsp_t; + // }}} + + // Definition of functions + // {{{ + function automatic logic is_load(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_LOAD: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_store(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_STORE: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_LR, + HPDCACHE_REQ_AMO_SC, + HPDCACHE_REQ_AMO_SWAP, + HPDCACHE_REQ_AMO_ADD, + HPDCACHE_REQ_AMO_AND, + HPDCACHE_REQ_AMO_OR, + HPDCACHE_REQ_AMO_XOR, + HPDCACHE_REQ_AMO_MAX, + HPDCACHE_REQ_AMO_MAXU, + HPDCACHE_REQ_AMO_MIN, + HPDCACHE_REQ_AMO_MINU: + return 1'b1; + default: + return 1'b0; + endcase + endfunction + + function automatic logic is_amo_lr(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_LR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_sc(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_SC: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_swap(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_SWAP: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_add(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_ADD: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_and(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_AND: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_or(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_OR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_xor(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_XOR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_max(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MAX: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_maxu(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MAXU: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_min(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MIN: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_minu(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MINU: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_cmo_inval( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: + case (sz) + HPDCACHE_REQ_CMO_INVAL_NLINE, + HPDCACHE_REQ_CMO_INVAL_SET_WAY, + HPDCACHE_REQ_CMO_INVAL_ALL: begin + return 1'b1; + end + default: begin + return 1'b0; + end + endcase + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic logic is_cmo_inval_by_nline(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_NLINE); + endfunction + + function automatic logic is_cmo_inval_by_set(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_SET_WAY); + endfunction + + function automatic logic is_cmo_inval_all(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_ALL); + endfunction + + function automatic logic is_cmo_fence( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: begin + return (sz == HPDCACHE_REQ_CMO_FENCE); + end + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic logic is_cmo_prefetch( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: begin + return (sz == HPDCACHE_REQ_CMO_PREFETCH); + end + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic hpdcache_tag_t hpdcache_get_req_addr_tag(input hpdcache_req_addr_t addr); + return addr[(HPDCACHE_OFFSET_WIDTH + HPDCACHE_SET_WIDTH) +: HPDCACHE_TAG_WIDTH]; + endfunction + + function automatic hpdcache_set_t hpdcache_get_req_addr_set(input hpdcache_req_addr_t addr); + return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH]; + endfunction + + function automatic hpdcache_word_t hpdcache_get_req_addr_word(input hpdcache_req_addr_t addr); + return addr[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH]; + endfunction + + function automatic hpdcache_offset_t hpdcache_get_req_addr_offset(input hpdcache_req_addr_t addr); + return addr[0 +: HPDCACHE_OFFSET_WIDTH]; + endfunction + + function automatic hpdcache_nline_t hpdcache_get_req_addr_nline(input hpdcache_req_addr_t addr); + return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH]; + endfunction + + function automatic hpdcache_set_t hpdcache_get_req_offset_set(input hpdcache_req_offset_t offset); + return offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH]; + endfunction + + function automatic hpdcache_word_t hpdcache_get_req_offset_word(input hpdcache_req_offset_t offset); + return offset[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH]; + endfunction + + // }}} + // }}} + + // Definition of constants and types for the Miss Status Holding Register (MSHR) + // {{{ + + // HPDcache MSHR number of sets + localparam int unsigned HPDCACHE_MSHR_SETS = + hpdcache_params_pkg::PARAM_MSHR_SETS; + + // HPDcache MSHR number of ways + localparam int unsigned HPDCACHE_MSHR_WAYS = + hpdcache_params_pkg::PARAM_MSHR_WAYS; + + // HPDcache MSHR number of ways in the same SRAM word + localparam int unsigned HPDCACHE_MSHR_WAYS_PER_RAM_WORD = + hpdcache_params_pkg::PARAM_MSHR_WAYS_PER_RAM_WORD; /* FIXME this parameter is currently ignored */ + + // HPDcache MSHR number of sets in the same SRAM + localparam int unsigned HPDCACHE_MSHR_SETS_PER_RAM = + hpdcache_params_pkg::PARAM_MSHR_SETS_PER_RAM; /* FIXME this parameter is currently ignored */ + + // HPDcache MSHR implements write byte enable + localparam bit HPDCACHE_MSHR_RAM_WBYTEENABLE = + hpdcache_params_pkg::PARAM_MSHR_RAM_WBYTEENABLE; + localparam bit HPDCACHE_MSHR_USE_REGBANK = + hpdcache_params_pkg::PARAM_MSHR_USE_REGBANK; + + localparam int unsigned HPDCACHE_MSHR_SET_WIDTH = $clog2(HPDCACHE_MSHR_SETS); + localparam int unsigned HPDCACHE_MSHR_WAY_WIDTH = $clog2(HPDCACHE_MSHR_WAYS); + localparam int unsigned HPDCACHE_MSHR_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_MSHR_SET_WIDTH; + + typedef logic unsigned [HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_set_t; + typedef logic unsigned [HPDCACHE_MSHR_TAG_WIDTH-1:0] mshr_tag_t; + typedef logic unsigned [HPDCACHE_MSHR_WAY_WIDTH-1:0] mshr_way_t; + // }}} + + // Definition of interface with memory + // {{{ + typedef logic [7:0] hpdcache_mem_len_t; + typedef logic [2:0] hpdcache_mem_size_t; + + typedef enum logic [1:0] { + HPDCACHE_MEM_RESP_OK = 2'b00, + HPDCACHE_MEM_RESP_NOK = 2'b01 + } hpdcache_mem_error_e; + + typedef enum logic [1:0] { + HPDCACHE_MEM_READ = 2'b00, + HPDCACHE_MEM_WRITE = 2'b01, + HPDCACHE_MEM_ATOMIC = 2'b10 + // Reserved = 2'b11 - TODO: CMO ? + } hpdcache_mem_command_e; + + typedef enum logic [3:0] { + HPDCACHE_MEM_ATOMIC_ADD = 4'b0000, + HPDCACHE_MEM_ATOMIC_CLR = 4'b0001, + HPDCACHE_MEM_ATOMIC_SET = 4'b0010, + HPDCACHE_MEM_ATOMIC_EOR = 4'b0011, + HPDCACHE_MEM_ATOMIC_SMAX = 4'b0100, + HPDCACHE_MEM_ATOMIC_SMIN = 4'b0101, + HPDCACHE_MEM_ATOMIC_UMAX = 4'b0110, + HPDCACHE_MEM_ATOMIC_UMIN = 4'b0111, + HPDCACHE_MEM_ATOMIC_SWAP = 4'b1000, + // Reserved = 4'b1001, + // Reserved = 4'b1010, + // Reserved = 4'b1011, + HPDCACHE_MEM_ATOMIC_LDEX = 4'b1100, + HPDCACHE_MEM_ATOMIC_STEX = 4'b1101 + // Reserved = 4'b1110, + // Reserved = 4'b1111 + } hpdcache_mem_atomic_e; + + function automatic hpdcache_mem_size_t get_hpdcache_mem_size(int unsigned bytes); + if (bytes == 0) return 0; + else if (bytes <= 2) return 1; + else if (bytes <= 4) return 2; + else if (bytes <= 8) return 3; + else if (bytes <= 16) return 4; + else if (bytes <= 32) return 5; + else if (bytes <= 64) return 6; + else if (bytes <= 128) return 7; + // pragma translate_off + else $error("hpdcache: unsupported number of bytes"); + // pragma translate_on + endfunction + // }}} + + // Definition of constants and types for the Write Buffer (WBUF) + // {{{ + localparam int unsigned HPDCACHE_WBUF_DIR_ENTRIES = + hpdcache_params_pkg::PARAM_WBUF_DIR_ENTRIES; + + localparam int unsigned HPDCACHE_WBUF_DATA_ENTRIES = + hpdcache_params_pkg::PARAM_WBUF_DATA_ENTRIES; + + localparam int unsigned HPDCACHE_WBUF_WORDS = + hpdcache_params_pkg::PARAM_WBUF_WORDS; + + localparam int unsigned HPDCACHE_WBUF_TIMECNT_WIDTH = + hpdcache_params_pkg::PARAM_WBUF_TIMECNT_WIDTH; + + // Use feedthrough FIFOs from the write-buffer to the NoC. This reduces + // the latency (by one cycle) but adds an additional timing path + localparam bit HPDCACHE_WBUF_SEND_FEEDTHROUGH = + hpdcache_params_pkg::PARAM_WBUF_SEND_FEEDTHROUGH; + + localparam int unsigned HPDCACHE_WBUF_DATA_WIDTH = HPDCACHE_REQ_DATA_WIDTH* + HPDCACHE_WBUF_WORDS; + localparam int unsigned HPDCACHE_WBUF_DATA_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DATA_ENTRIES); + localparam int unsigned HPDCACHE_WBUF_DIR_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DIR_ENTRIES); + + typedef hpdcache_req_addr_t wbuf_addr_t; + typedef hpdcache_nline_t wbuf_match_t; + typedef hpdcache_req_data_t wbuf_data_t; + typedef hpdcache_req_be_t wbuf_be_t; + typedef wbuf_data_t[HPDCACHE_WBUF_WORDS-1:0] wbuf_data_buf_t; + typedef wbuf_be_t [HPDCACHE_WBUF_WORDS-1:0] wbuf_be_buf_t; + typedef logic unsigned [ HPDCACHE_WBUF_TIMECNT_WIDTH-1:0] wbuf_timecnt_t; + typedef logic unsigned [ HPDCACHE_WBUF_DIR_PTR_WIDTH-1:0] wbuf_dir_ptr_t; + typedef logic unsigned [HPDCACHE_WBUF_DATA_PTR_WIDTH-1:0] wbuf_data_ptr_t; + // }}} + + // Definition of constants and types for the Replay Table (RTAB) + // {{{ + localparam int HPDCACHE_RTAB_ENTRIES = hpdcache_params_pkg::PARAM_RTAB_ENTRIES; + + typedef logic [$clog2(HPDCACHE_RTAB_ENTRIES)-1:0] rtab_ptr_t; + // }}} + + // Definition of constants and types for the uncacheable request handler (UC) + // {{{ + typedef struct packed { + logic is_ld; + logic is_st; + logic is_amo_lr; + logic is_amo_sc; + logic is_amo_swap; + logic is_amo_add; + logic is_amo_and; + logic is_amo_or; + logic is_amo_xor; + logic is_amo_max; + logic is_amo_maxu; + logic is_amo_min; + logic is_amo_minu; + } hpdcache_uc_op_t; + // }}} + + // Definition of constants and types for the CMO request handler (CMOH) + // {{{ + typedef struct packed { + logic is_inval_by_nline; + logic is_inval_by_set; + logic is_inval_all; + logic is_fence; + } hpdcache_cmoh_op_t; + // }}} +endpackage diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv new file mode 100644 index 00000000..7697737d --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv @@ -0,0 +1,138 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache Pseudo-LRU replacement policy + * History : + */ +module hpdcache_plru + // Parameters + // {{{ +#( + parameter int unsigned SETS = 0, + parameter int unsigned WAYS = 0, + + localparam type set_t = logic [$clog2(SETS)-1:0], + localparam type way_vector_t = logic [WAYS-1:0] +) + // }}} + + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + // PLRU update interface + input logic updt_i, + input set_t updt_set_i, + input way_vector_t updt_way_i, + + // Victim replacement interface + input logic repl_i, + input set_t repl_set_i, + input way_vector_t repl_dir_valid_i, + input logic repl_updt_plru_i, + + output way_vector_t victim_way_o +); + // }}} + + // Internal signals and registers + // {{{ + way_vector_t [SETS-1:0] plru_q, plru_d; + way_vector_t updt_plru; + way_vector_t repl_plru; + way_vector_t used_victim_way, unused_victim_way; + // }}} + + // Victim way selection + // {{{ + hpdcache_prio_1hot_encoder #(.N(WAYS)) + used_victim_select_i ( + .val_i (~plru_q[repl_set_i]), + .val_o (used_victim_way) + ); + + hpdcache_prio_1hot_encoder #(.N(WAYS)) + unused_victim_select_i ( + .val_i (~repl_dir_valid_i), + .val_o (unused_victim_way) + ); + + // If there is a free entry in the directory (valid == 0), choose it as victim + assign victim_way_o = |unused_victim_way ? unused_victim_way : used_victim_way; + // }}} + + // Pseudo-LRU update process + // {{{ + assign updt_plru = plru_q[updt_set_i] | updt_way_i; + assign repl_plru = plru_q[repl_set_i] | victim_way_o; + + always_comb + begin : plru_update_comb + plru_d = plru_q; + + case (1'b1) + // When replacing a cache-line, set the PLRU bit of the new line + repl_i: + if (repl_updt_plru_i) begin + // If all PLRU bits of a given would be set, reset them all + // but the currently accessed way + if (&repl_plru) begin + plru_d[repl_set_i] = victim_way_o; + end else begin + plru_d[repl_set_i] = repl_plru; + end + end + + // When accessing a cache-line, set the corresponding PLRU bit + updt_i: + // If all PLRU bits of a given would be set, reset them all + // but the currently accessed way + if (&updt_plru) begin + plru_d[updt_set_i] = updt_way_i; + end else begin + plru_d[updt_set_i] = updt_plru; + end + + default: begin + // do nothing + end + endcase + end + // }}} + + // Set state process + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : lru_ff + if (!rst_ni) begin + plru_q <= '0; + end else begin + if (updt_i || repl_i) begin + plru_q <= plru_d; + end + end + end + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv new file mode 100755 index 00000000..d7d9d640 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv @@ -0,0 +1,666 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : September, 2021 + * Description : HPDcache Replay Table + * History : + */ +module hpdcache_rtab +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter type rtab_entry_t = logic +) +// }}} +// Ports +// {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, // RTAB is empty + output logic full_o, // RTAB is full + + // Check RTAB signals + // This interface allows to check if there is an address-overlapping + // request in the RTAB with respect to the given nline. + input logic check_i, // Check for hit (nline) in the RTAB + input hpdcache_nline_t check_nline_i, + output logic check_hit_o, + + // Allocate signals + // This interface allows to allocate a new request in a new linked list + input logic alloc_i, + input logic alloc_and_link_i, + input rtab_entry_t alloc_req_i, + input logic alloc_mshr_hit_i, + input logic alloc_mshr_full_i, + input logic alloc_mshr_ready_i, + input logic alloc_wbuf_hit_i, + input logic alloc_wbuf_not_ready_i, + + // Pop signals + // This interface allows to read (and remove) a request from the RTAB + output logic pop_try_valid_o, // Request ready to be replayed + input logic pop_try_i, + output rtab_entry_t pop_try_req_o, + output rtab_ptr_t pop_try_ptr_o, + + // Pop Commit signals + // This interface allows to actually remove a popped request + input logic pop_commit_i, + input rtab_ptr_t pop_commit_ptr_i, + + // Pop Rollback signals + // This interface allows to put back a popped request + input logic pop_rback_i, + input rtab_ptr_t pop_rback_ptr_i, + input logic pop_rback_mshr_hit_i, + input logic pop_rback_mshr_full_i, + input logic pop_rback_mshr_ready_i, + input logic pop_rback_wbuf_hit_i, + input logic pop_rback_wbuf_not_ready_i, + + + // Control signals from/to WBUF + output hpdcache_req_addr_t wbuf_addr_o, // address to check against ongoing writes + output logic wbuf_is_read_o, // monitored request is read + input logic wbuf_hit_open_i, // Hit on open entry in the write buf + input logic wbuf_hit_pend_i, // Hit on pend entry in the write buf + input logic wbuf_hit_sent_i, // Hit on sent entry in the write buf + input logic wbuf_not_ready_i, // Write buffer cannot accept the write + + // Control signals from the Miss Handler + input logic miss_ready_i, // Miss Handler is ready + + // Control signals from the Refill Handler + input logic refill_i, // Active refill + input hpdcache_nline_t refill_nline_i, // Cache-line index being refilled + + // Configuration parameters + input logic cfg_single_entry_i // Enable only one entry of the table +); +// }}} + +// Definition of constants, types and functions +// {{{ + localparam int N = HPDCACHE_RTAB_ENTRIES; + + function automatic rtab_ptr_t rtab_bv_to_index( + input logic [N-1:0] bv); + for (int i = 0; i < N; i++) begin + if (bv[i]) return rtab_ptr_t'(i); + end + return 0; + endfunction + + function automatic logic [N-1:0] rtab_index_to_bv( + input rtab_ptr_t index); + logic [N-1:0] bv; + + for (int i = 0; i < N; i++) begin + bv[i] = (rtab_ptr_t'(i) == index); + end + return bv; + endfunction + + function automatic bit rtab_mshr_set_equal( + input hpdcache_nline_t x, + input hpdcache_nline_t y); + return (x[0 +: HPDCACHE_MSHR_SET_WIDTH] == y[0 +: HPDCACHE_MSHR_SET_WIDTH]); + endfunction + + function automatic logic [N-1:0] rtab_next(rtab_ptr_t [N-1:0] next, rtab_ptr_t x); + return rtab_index_to_bv(next[x]); + endfunction + + typedef enum { + POP_TRY_HEAD, + POP_TRY_NEXT, + POP_TRY_NEXT_WAIT + } rtab_pop_try_state_e; +// }}} + +// Internal signals and registers +// {{{ + rtab_entry_t [N-1:0] req_q; + rtab_ptr_t [N-1:0] next_q; + + rtab_pop_try_state_e pop_try_state_q, pop_try_state_d; + logic [N-1:0] pop_try_next_q, pop_try_next_d; + + logic [N-1:0] valid_q; + logic [N-1:0] valid_set, valid_rst; + logic [N-1:0] alloc_valid_set; + logic [N-1:0] pop_commit_valid_rst; + + // Bits indicating if the corresponding entry is the head of a linked list + logic [N-1:0] head_q; + logic [N-1:0] head_set, head_rst; + logic [N-1:0] alloc_head_set, alloc_head_rst; + logic [N-1:0] pop_try_head_rst; + logic [N-1:0] pop_commit_head_set; + logic [N-1:0] pop_rback_head_set; + + // Bits indicating if the corresponding entry is the tail of a linked list + logic [N-1:0] tail_q; + logic [N-1:0] tail_set, tail_rst; + logic [N-1:0] alloc_tail_set, alloc_tail_rst; + + // There is a pend ing miss on the target nline + logic [N-1:0] deps_mshr_hit_q; + logic [N-1:0] deps_mshr_hit_set, deps_mshr_hit_rst; + logic [N-1:0] alloc_deps_mshr_hit_set; + logic [N-1:0] pop_rback_deps_mshr_hit_set; + + // The MSHR has no available slot for the new miss + logic [N-1:0] deps_mshr_full_q; + logic [N-1:0] deps_mshr_full_set, deps_mshr_full_rst; + logic [N-1:0] alloc_deps_mshr_full_set; + logic [N-1:0] pop_rback_deps_mshr_full_set; + + // The MSHR is not ready to send a new miss requests + logic [N-1:0] deps_mshr_ready_q; + logic [N-1:0] deps_mshr_ready_set, deps_mshr_ready_rst; + logic [N-1:0] alloc_deps_mshr_ready_set; + logic [N-1:0] pop_rback_deps_mshr_ready_set; + + // Hit on an non-e mpty entry of the write buffer + logic [N-1:0] deps_wbuf_hit_q; + logic [N-1:0] deps_wbuf_hit_set, deps_wbuf_hit_rst; + logic [N-1:0] alloc_deps_wbuf_hit_set; + logic [N-1:0] pop_rback_deps_wbuf_hit_set; + + // Hit on a pend entry of the write buffer + logic [N-1:0] deps_wbuf_not_ready_q; + logic [N-1:0] deps_wbuf_not_ready_set, deps_wbuf_not_ready_rst; + logic [N-1:0] alloc_deps_wbuf_not_ready_set; + logic [N-1:0] pop_rback_deps_wbuf_not_ready_set; + + logic [N-1:0] nodeps; + hpdcache_nline_t [N-1:0] nline; + hpdcache_req_addr_t [N-1:0] addr; + logic [N-1:0] is_read; + logic [N-1:0] check_hit; + logic [N-1:0] match_check_nline; + logic [N-1:0] match_check_tail; + logic [N-1:0] match_refill_nline; + logic [N-1:0] match_refill_mshr_set; + + logic [N-1:0] free; + logic [N-1:0] free_alloc; + logic alloc; + + logic [N-1:0] pop_match_next; + logic [N-1:0] pop_rback_ptr_bv; + logic [N-1:0] pop_try_bv; + logic [N-1:0] ready; + + genvar gen_i; +// }}} + +// Compute global control signals +// {{{ + // compute if entries are ready to be replayed + assign nodeps = ~(deps_mshr_hit_q | + deps_mshr_full_q | + deps_mshr_ready_q | + deps_wbuf_hit_q | + deps_wbuf_not_ready_q); + + assign ready = valid_q & head_q & nodeps; + + assign free = ~valid_q; + + // compute the free vector (one-hot signal) + hpdcache_prio_1hot_encoder #( + .N (N) + ) free_encoder_i ( + .val_i (free), + .val_o (free_alloc) + ); + + // full and empty signals + assign empty_o = &(~valid_q); + assign full_o = &( valid_q) | (|valid_q & cfg_single_entry_i); +// }}} + +// Check interface +// {{{ + generate + for (gen_i = 0; gen_i < N; gen_i++) begin : check_gen + assign addr[gen_i] = {req_q[gen_i].addr_tag, req_q[gen_i].addr_offset}, + nline[gen_i] = hpdcache_get_req_addr_nline(addr[gen_i]), + match_check_nline[gen_i] = (check_nline_i == nline[gen_i]); + + assign is_read[gen_i] = is_load(req_q[gen_i].op) | + is_cmo_prefetch(req_q[gen_i].op, req_q[gen_i].size); + end + endgenerate + + assign check_hit = valid_q & match_check_nline, + check_hit_o = |check_hit, + match_check_tail = check_hit & tail_q; +// }}} + +// Allocation process +// {{{ + assign alloc = alloc_i | alloc_and_link_i; + + // Set the valid bit-vector of the replay table + assign alloc_valid_set = free_alloc & {N{alloc}}; + + // Set of head and tail bit-vectors during an allocation + // - The head bit is only set when creating a new linked-list + // - The tail bit is always set because new requests are added on the tail. + assign alloc_head_set = free_alloc & {N{alloc_i}}, + alloc_tail_set = alloc_valid_set; + + // Reset of head and tail bit-vectors during an allocation + // - When doing an allocation and link, head bit shall be reset + // - when doing an allocation and link, the "prev" tail shall be reset + assign alloc_head_rst = free_alloc & {N{alloc_and_link_i}}, + alloc_tail_rst = match_check_tail & {N{alloc_and_link_i}}; + + // Set the dependency bits for the allocated entry + assign alloc_deps_mshr_hit_set = alloc_valid_set & {N{ alloc_mshr_hit_i}}, + alloc_deps_mshr_full_set = alloc_valid_set & {N{ alloc_mshr_full_i}}, + alloc_deps_mshr_ready_set = alloc_valid_set & {N{ alloc_mshr_ready_i}}, + alloc_deps_wbuf_hit_set = alloc_valid_set & {N{ alloc_wbuf_hit_i}}, + alloc_deps_wbuf_not_ready_set = alloc_valid_set & {N{alloc_wbuf_not_ready_i}}; +// }}} + +// Update replay table dependencies +// {{{ + // Update write buffer hit dependencies + // {{{ + // Build a bit-vector with HEAD requests waiting for a conflict in the wbuf + logic [N-1:0] wbuf_rd_pending, wbuf_wr_pending; + logic [N-1:0] wbuf_rd_gnt, wbuf_wr_gnt; + logic [ 1:0] wbuf_pending; + logic [ 1:0] wbuf_gnt; + logic wbuf_ready; + logic [N-1:0] wbuf_sel; + + assign wbuf_rd_pending = valid_q & head_q & deps_wbuf_hit_q, + wbuf_wr_pending = valid_q & head_q & deps_wbuf_not_ready_q; + + // Choose in a round-robin manner a ready transaction waiting for a conflict in the wbuf + hpdcache_rrarb #( + .N (N) + ) wbuf_rd_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_rd_pending), + .gnt_o (wbuf_rd_gnt), + .ready_i (wbuf_gnt[0] & wbuf_ready) + ); + + hpdcache_rrarb #( + .N (N) + ) wbuf_wr_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_wr_pending), + .gnt_o (wbuf_wr_gnt), + .ready_i (wbuf_gnt[1] & wbuf_ready) + ); + + assign wbuf_pending = {|wbuf_wr_gnt, |wbuf_rd_gnt}, + wbuf_ready = |(pop_try_bv & (wbuf_rd_gnt | wbuf_wr_gnt)); + + hpdcache_fxarb #( + .N (2) + ) wbuf_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_pending), + .gnt_o (wbuf_gnt), + .ready_i (wbuf_ready) + ); + + assign wbuf_sel = wbuf_gnt[0] ? wbuf_rd_gnt : + wbuf_gnt[1] ? wbuf_wr_gnt : '0; + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_req_addr_t)), + .ONE_HOT_SEL (1'b1) + ) wbuf_pending_addr_mux_i ( + .data_i (addr), + .sel_i (wbuf_sel), + .data_o (wbuf_addr_o) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (1'b1) + ) wbuf_pending_is_read_mux_i ( + .data_i (is_read), + .sel_i (wbuf_sel), + .data_o (wbuf_is_read_o) + ); + + // reset write buffer dependency bits with the output from the write buffer + assign deps_wbuf_hit_rst = + wbuf_sel & ~{N{wbuf_hit_open_i | wbuf_hit_pend_i | wbuf_hit_sent_i}}; + assign deps_wbuf_not_ready_rst = + wbuf_sel & ~{N{wbuf_not_ready_i}}; + // }}} + + // Update miss handler dependency + // {{{ + assign deps_mshr_ready_rst = {N{miss_ready_i}}; + // }}} + + // Update refill dependencies + // {{{ + generate + for (gen_i = 0; gen_i < N; gen_i++) begin : match_refill_gen + assign match_refill_mshr_set[gen_i] = + rtab_mshr_set_equal(refill_nline_i, nline[gen_i]); + assign match_refill_nline[gen_i] = + (refill_nline_i == nline[gen_i]); + end + endgenerate + + assign deps_mshr_full_rst = {N{refill_i}} & match_refill_mshr_set; + assign deps_mshr_hit_rst = {N{refill_i}} & match_refill_nline; + // }}} +// }}} + +// Pop interface +// {{{ + logic [N-1:0] pop_sel; + logic [N-1:0] pop_commit_bv; + + assign pop_commit_bv = rtab_index_to_bv(pop_commit_ptr_i); + + // Pop try process + // {{{ + logic [N-1:0] pop_gnt; + logic pop_head; + + hpdcache_rrarb #( + .N (N) + ) pop_arb_i ( + .clk_i, + .rst_ni, + .req_i (ready), + .gnt_o (pop_gnt), + .ready_i (pop_head) + ); + + always_comb + begin : req_valid_comb + case(pop_try_state_q) + POP_TRY_HEAD : pop_try_valid_o = |ready; + POP_TRY_NEXT : pop_try_valid_o = 1'b1; + POP_TRY_NEXT_WAIT: pop_try_valid_o = 1'b1; + default : pop_try_valid_o = 1'b0; + endcase + end + + always_comb + begin : pop_entry_sel_comb + pop_try_state_d = pop_try_state_q; + pop_try_next_d = pop_try_next_q; + pop_head = 1'b0; + pop_sel = '0; + + case (pop_try_state_q) + POP_TRY_HEAD: begin + // This FSM may be in this state after forwarding the tail of + // a list. In that case, a rollback may arrive in this cycle. + pop_sel = pop_gnt; + if (!pop_rback_i && pop_try_valid_o) begin + if (pop_try_i) begin + // If the request interface accepts the request, go to the next request + // in the list (if the current request is not the tail). Otherwise, stay in + // the same state to to forward a request from a new list + pop_head = 1'b1; + if ((pop_gnt & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end + end + end + end + POP_TRY_NEXT: begin + pop_sel = pop_try_next_q; + if (pop_rback_i) begin + pop_try_state_d = POP_TRY_HEAD; + end else begin + if (pop_try_i) begin + // If the request interface accepts the new request, go to the next request + // in the list (if the current request is not the tail). Otherwise, return + // to the POP_TRY_HEAD state to forward a request from a new list + if ((pop_try_next_q & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end else begin + pop_try_state_d = POP_TRY_HEAD; + end + end else begin + // If the request interface is not ready to consume the new request, wait + // until it is + pop_try_state_d = POP_TRY_NEXT_WAIT; + end + end + end + POP_TRY_NEXT_WAIT: begin + // Wait for the current request to be accepted. Then go to the next request in the + // list or to a new list + pop_sel = pop_try_next_q; + if (pop_try_i) begin + if ((pop_try_next_q & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end else begin + pop_try_state_d = POP_TRY_HEAD; + end + end + end + default: begin + end + endcase + end + + assign pop_commit_head_set = '0; + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(rtab_entry_t)), + .ONE_HOT_SEL (1'b1) + ) pop_mux_i ( + .data_i (req_q), + .sel_i (pop_sel), + .data_o (pop_try_req_o) + ); + + // Temporarily unset the head bit of the popped request to prevent it to be rescheduled + assign pop_try_bv = pop_sel & {N{pop_try_i}}, + pop_try_head_rst = pop_try_bv; + + + // Forward the index of the entry being popped. This is used later by the + // commit or rollback operations + assign pop_try_ptr_o = rtab_bv_to_index(pop_sel); + + // }}} + + // Pop commit process + // {{{ + // Invalidate the entry being popped (head of the linked list) + assign pop_commit_valid_rst = {N{pop_commit_i}} & rtab_index_to_bv(pop_commit_ptr_i); + // }}} + + // Pop rollback process + // {{{ + // Set again the head bit of the rolled-back request + assign pop_rback_ptr_bv = rtab_index_to_bv(pop_rback_ptr_i); + + assign pop_rback_head_set = {N{pop_rback_i}} & pop_rback_ptr_bv; + + assign pop_rback_deps_mshr_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_hit_i}}, + pop_rback_deps_mshr_full_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_full_i}}, + pop_rback_deps_mshr_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_ready_i}}, + pop_rback_deps_wbuf_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_hit_i}}, + pop_rback_deps_wbuf_not_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_not_ready_i}}; + // }}} +// }}} + +// Internal state assignment +// {{{ + assign head_set = alloc_head_set | pop_commit_head_set | pop_rback_head_set, + head_rst = alloc_head_rst | pop_try_head_rst; + + assign tail_set = alloc_tail_set, + tail_rst = alloc_tail_rst; + + assign valid_set = alloc_valid_set, + valid_rst = pop_commit_valid_rst; + + assign deps_mshr_hit_set = alloc_deps_mshr_hit_set | pop_rback_deps_mshr_hit_set, + deps_mshr_full_set = alloc_deps_mshr_full_set | pop_rback_deps_mshr_full_set, + deps_mshr_ready_set = alloc_deps_mshr_ready_set | pop_rback_deps_mshr_ready_set, + deps_wbuf_hit_set = alloc_deps_wbuf_hit_set | pop_rback_deps_wbuf_hit_set, + deps_wbuf_not_ready_set = alloc_deps_wbuf_not_ready_set | pop_rback_deps_wbuf_not_ready_set; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : rtab_valid_ff + if (!rst_ni) begin + valid_q <= '0; + head_q <= '0; + tail_q <= '0; + deps_mshr_hit_q <= '0; + deps_mshr_full_q <= '0; + deps_mshr_ready_q <= '0; + deps_wbuf_hit_q <= '0; + deps_wbuf_not_ready_q <= '0; + next_q <= '0; + end else begin + valid_q <= (~valid_q & valid_set) | + ( valid_q & ~valid_rst); + + // update head and tail flags + head_q <= (~head_q & head_set) | + ( head_q & ~head_rst); + + tail_q <= (~tail_q & tail_set) | + ( tail_q & ~tail_rst); + + // update dependency flags + deps_mshr_hit_q <= (~deps_mshr_hit_q & deps_mshr_hit_set) | + ( deps_mshr_hit_q & ~deps_mshr_hit_rst); + deps_mshr_full_q <= (~deps_mshr_full_q & deps_mshr_full_set) | + ( deps_mshr_full_q & ~deps_mshr_full_rst); + deps_mshr_ready_q <= (~deps_mshr_ready_q & deps_mshr_ready_set) | + ( deps_mshr_ready_q & ~deps_mshr_ready_rst); + deps_wbuf_hit_q <= (~deps_wbuf_hit_q & deps_wbuf_hit_set) | + ( deps_wbuf_hit_q & ~deps_wbuf_hit_rst); + deps_wbuf_not_ready_q <= (~deps_wbuf_not_ready_q & deps_wbuf_not_ready_set) | + ( deps_wbuf_not_ready_q & ~deps_wbuf_not_ready_rst); + + // update the next pointers + for (int i = 0; i < N; i++) begin + if (alloc_and_link_i && match_check_tail[i]) begin + next_q[i] <= rtab_bv_to_index(free_alloc); + end + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : pop_try_ff + if (!rst_ni) begin + pop_try_state_q <= POP_TRY_HEAD; + pop_try_next_q <= '0; + end else begin + pop_try_state_q <= pop_try_state_d; + pop_try_next_q <= pop_try_next_d; + end + end + + always_ff @(posedge clk_i) + begin : rtab_ff + for (int i = 0; i < N; i++) begin + // update the request array + if (valid_set[i]) begin + req_q[i] <= alloc_req_i; + end + end + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) disable iff (!rst_ni) + check_i |-> $onehot0(match_check_tail)) else + $error("rtab: more than one entry matching"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc_and_link_i |-> (check_i & check_hit_o)) else + $error("rtab: alloc and link shall be performed in case of check hit"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc_and_link_i |-> + ({alloc_req_i.addr_tag, hpdcache_get_req_offset_set(alloc_req_i.addr_offset)} == + check_nline_i)) else + $error("rtab: nline for alloc and link shall match the one being checked"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc_i |-> !alloc_and_link_i) else + $error("rtab: only one allocation per cycle is allowed"); + +`ifndef VERILATOR + assert property (@(posedge clk_i) disable iff (!rst_ni) + pop_try_i |-> ##1 (pop_commit_i | pop_rback_i)) else + $error("rtab: a pop try shall be followed by a commit or rollback"); +`endif + + assert property (@(posedge clk_i) disable iff (!rst_ni) + pop_commit_i |-> valid_q[pop_commit_ptr_i]) else + $error("rtab: commiting an invalid entry"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + pop_rback_i |-> valid_q[pop_rback_ptr_i]) else + $error("rtab: rolling-back an invalid entry"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + pop_rback_i |-> !pop_try_i) else + $error("rtab: cache shall not accept a new request while rolling back"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc |-> ~full_o) else + $error("rtab: trying to allocate while the table is full"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + alloc_and_link_i |-> ~cfg_single_entry_i) else + $error("rtab: trying to link a request in single entry mode"); +// pragma translate_on +// }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv new file mode 100644 index 00000000..17519e65 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv @@ -0,0 +1,965 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache uncached and AMO request handler + * History : + */ +module hpdcache_uncached +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) + // }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + input logic wbuf_empty_i, + input logic mshr_empty_i, + input logic rtab_empty_i, + input logic ctrl_empty_i, + // }}} + + // Cache-side request interface + // {{{ + input logic req_valid_i, + output logic req_ready_o, + input hpdcache_uc_op_t req_op_i, + input hpdcache_req_addr_t req_addr_i, + input hpdcache_req_size_t req_size_i, + input hpdcache_req_data_t req_data_i, + input hpdcache_req_be_t req_be_i, + input logic req_uc_i, + input hpdcache_req_sid_t req_sid_i, + input hpdcache_req_tid_t req_tid_i, + input logic req_need_rsp_i, + // }}} + + // Write buffer interface + // {{{ + output logic wbuf_flush_all_o, + // }}} + + // AMO Cache Interface + // {{{ + output logic dir_amo_match_o, + output hpdcache_set_t dir_amo_match_set_o, + output hpdcache_tag_t dir_amo_match_tag_o, + output logic dir_amo_update_plru_o, + input hpdcache_way_vector_t dir_amo_hit_way_i, + + output logic data_amo_write_o, + output logic data_amo_write_enable_o, + output hpdcache_set_t data_amo_write_set_o, + output hpdcache_req_size_t data_amo_write_size_o, + output hpdcache_word_t data_amo_write_word_o, + output logic [63:0] data_amo_write_data_o, + output logic [7:0] data_amo_write_be_o, + // }}} + + // LR/SC reservation buffer + // {{{ + input logic lrsc_snoop_i, + input hpdcache_req_addr_t lrsc_snoop_addr_i, + input hpdcache_req_size_t lrsc_snoop_size_i, + // }}} + + // Core response interface + // {{{ + input logic core_rsp_ready_i, + output logic core_rsp_valid_o, + output hpdcache_rsp_t core_rsp_o, + // }}} + + // MEMORY interfaces + // {{{ + // Memory request unique identifier + input hpdcache_mem_id_t mem_read_id_i, + input hpdcache_mem_id_t mem_write_id_i, + + // Read interface + input logic mem_req_read_ready_i, + output logic mem_req_read_valid_o, + output hpdcache_mem_req_t mem_req_read_o, + + output logic mem_resp_read_ready_o, + input logic mem_resp_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_read_i, + + // Write interface + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o, + + output logic mem_resp_write_ready_o, + input logic mem_resp_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_write_i, + // }}} + + // Configuration interface + // {{{ + input logic cfg_error_on_cacheable_amo_i + // }}} +); +// }}} + +// Definition of constants and types +// {{{ + localparam hpdcache_uint MEM_REQ_RATIO = HPDcacheMemDataWidth/HPDCACHE_REQ_DATA_WIDTH; + localparam hpdcache_uint MEM_REQ_WORD_INDEX_WIDTH = $clog2(MEM_REQ_RATIO); + + typedef enum { + UC_IDLE, + UC_WAIT_PENDING, + UC_MEM_REQ, + UC_MEM_W_REQ, + UC_MEM_WDATA_REQ, + UC_MEM_WAIT_RSP, + UC_CORE_RSP, + UC_AMO_READ_DIR, + UC_AMO_WRITE_DATA + } hpdcache_uc_fsm_t; + + localparam logic AMO_SC_SUCCESS = 1'b0; + localparam logic AMO_SC_FAILURE = 1'b1; + + function automatic logic [63:0] prepare_amo_data_operand( + input logic [63:0] data_i, + input hpdcache_req_size_t size_i, + input hpdcache_req_addr_t addr_i, + input logic sign_extend_i + ); + // 64-bits AMOs are already aligned, thus do nothing + if (size_i == hpdcache_req_size_t'(3)) begin + return data_i; + end + + // 32-bits AMOs + else begin + if (addr_i[2] == 1'b1) begin + if (sign_extend_i) begin + return {{32{data_i[63]}}, data_i[63:32]}; + end else begin + return {{32{ 1'b0}}, data_i[63:32]}; + end + end else begin + if (sign_extend_i) begin + return {{32{data_i[31]}}, data_i[31: 0]}; + end else begin + return {{32{ 1'b0}}, data_i[31: 0]}; + end + end + end + endfunction; + + function automatic logic [63:0] prepare_amo_data_result( + input logic [63:0] data_i, + input hpdcache_req_size_t size_i + ); + // 64-bits AMOs are already aligned, thus do nothing + if (size_i == hpdcache_req_size_t'(3)) begin + return data_i; + end + + // 32-bits AMOs + else begin + return {2{data_i[31:0]}}; + end + endfunction; + + function automatic logic amo_need_sign_extend(hpdcache_uc_op_t op); + unique case (1'b1) + op.is_amo_add, + op.is_amo_max, + op.is_amo_min: return 1'b1; + default : return 1'b0; + endcase; + endfunction +// }}} + +// Internal signals and registers +// {{{ + hpdcache_uc_fsm_t uc_fsm_q, uc_fsm_d; + hpdcache_uc_op_t req_op_q; + hpdcache_req_addr_t req_addr_q; + hpdcache_req_size_t req_size_q; + hpdcache_req_data_t req_data_q; + hpdcache_req_be_t req_be_q; + logic req_uc_q; + hpdcache_req_sid_t req_sid_q; + hpdcache_req_tid_t req_tid_q; + logic req_need_rsp_q; + + logic uc_sc_retcode_q, uc_sc_retcode_d; + + hpdcache_req_data_t rsp_rdata_q, rsp_rdata_d; + logic rsp_error_set, rsp_error_rst; + logic rsp_error_q; + logic mem_resp_write_valid_q, mem_resp_write_valid_d; + logic mem_resp_read_valid_q, mem_resp_read_valid_d; + + hpdcache_req_data_t mem_req_write_data; + logic [63:0] amo_req_ld_data; + logic [63:0] amo_ld_data; + logic [63:0] amo_req_st_data; + logic [63:0] amo_st_data; + logic [ 7:0] amo_st_be; + logic [63:0] amo_result; +// }}} + +// LR/SC reservation buffer logic +// {{{ + logic lrsc_rsrv_valid_q; + hpdcache_req_addr_t lrsc_rsrv_addr_q, lrsc_rsrv_addr_d; + hpdcache_nline_t lrsc_rsrv_nline; + hpdcache_offset_t lrsc_rsrv_word; + + hpdcache_offset_t lrsc_snoop_words; + hpdcache_nline_t lrsc_snoop_nline; + hpdcache_offset_t lrsc_snoop_base, lrsc_snoop_end; + logic lrsc_snoop_hit; + logic lrsc_snoop_reset; + + hpdcache_nline_t lrsc_uc_nline; + hpdcache_offset_t lrsc_uc_word; + logic lrsc_uc_hit; + logic lrsc_uc_set, lrsc_uc_reset; + + // NOTE: Reservation set for LR instruction is always 8-bytes in this + // implementation. + assign lrsc_rsrv_nline = hpdcache_get_req_addr_nline(lrsc_rsrv_addr_q), + lrsc_rsrv_word = hpdcache_get_req_addr_offset(lrsc_rsrv_addr_q) >> 3; + + // Check hit on LR/SC reservation for snoop port (normal write accesses) + assign lrsc_snoop_words = (lrsc_snoop_size_i < 3) ? 1 : hpdcache_offset_t'((8'h1 << lrsc_snoop_size_i) >> 3), + lrsc_snoop_nline = hpdcache_get_req_addr_nline(lrsc_snoop_addr_i), + lrsc_snoop_base = hpdcache_get_req_addr_offset(lrsc_snoop_addr_i) >> 3, + lrsc_snoop_end = lrsc_snoop_base + lrsc_snoop_words; + + assign lrsc_snoop_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_snoop_nline) & + (lrsc_rsrv_word >= lrsc_snoop_base) & + (lrsc_rsrv_word < lrsc_snoop_end ); + + assign lrsc_snoop_reset = lrsc_snoop_i & lrsc_snoop_hit; + + // Check hit on LR/SC reservation for AMOs and SC + assign lrsc_uc_nline = hpdcache_get_req_addr_nline(req_addr_i), + lrsc_uc_word = hpdcache_get_req_addr_offset(req_addr_i) >> 3; + + assign lrsc_uc_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_uc_nline) & + (lrsc_rsrv_word == lrsc_uc_word); +// }}} + +// Uncacheable request FSM +// {{{ + always_comb + begin : uc_fsm_comb + mem_resp_write_valid_d = mem_resp_write_valid_q; + mem_resp_read_valid_d = mem_resp_read_valid_q; + rsp_error_set = 1'b0; + rsp_error_rst = 1'b0; + lrsc_rsrv_addr_d = lrsc_rsrv_addr_q; + uc_sc_retcode_d = uc_sc_retcode_q; + wbuf_flush_all_o = 1'b0; + lrsc_uc_set = 1'b0; + lrsc_uc_reset = 1'b0; + + uc_fsm_d = uc_fsm_q; + + case (uc_fsm_q) + // Wait for a request + // {{{ + UC_IDLE: begin + + if (req_valid_i) begin + wbuf_flush_all_o = 1'b1; + + unique case (1'b1) + req_op_i.is_ld, + req_op_i.is_st: begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + + req_op_i.is_amo_swap, + req_op_i.is_amo_add, + req_op_i.is_amo_and, + req_op_i.is_amo_or, + req_op_i.is_amo_xor, + req_op_i.is_amo_max, + req_op_i.is_amo_maxu, + req_op_i.is_amo_min, + req_op_i.is_amo_minu, + req_op_i.is_amo_lr: begin + // Reset LR/SC reservation if AMO matches its address + lrsc_uc_reset = ~req_op_i.is_amo_lr & lrsc_uc_hit; + + if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end else begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + end + + req_op_i.is_amo_sc: begin + if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end else begin + // Reset previous reservation (if any) + lrsc_uc_reset = 1'b1; + + // SC with valid reservation + if (lrsc_uc_hit) begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + // SC with no valid reservation, thus respond with the failure code + else begin + uc_sc_retcode_d = AMO_SC_FAILURE; + uc_fsm_d = UC_CORE_RSP; + end + end + end + + default: begin + if (req_need_rsp_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end + end + endcase + end + end + // }}} + + // Wait for the write buffer to be empty + // {{{ + UC_WAIT_PENDING: begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + // }}} + + // Send request to memory + // {{{ + UC_MEM_REQ: begin + uc_fsm_d = UC_MEM_REQ; + + mem_resp_write_valid_d = 1'b0; + mem_resp_read_valid_d = 1'b0; + + case (1'b1) + req_op_q.is_ld, + req_op_q.is_amo_lr: begin + if (mem_req_read_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end + end + + req_op_q.is_st, + req_op_q.is_amo_sc, + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + if (mem_req_write_ready_i && mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else if (mem_req_write_ready_i) begin + uc_fsm_d = UC_MEM_WDATA_REQ; + end else if (mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_W_REQ; + end + end + endcase + end + // }}} + + // Send write address + // {{{ + UC_MEM_W_REQ: begin + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + if (mem_req_write_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else begin + uc_fsm_d = UC_MEM_W_REQ; + end + end + // }}} + + // Send write data + // {{{ + UC_MEM_WDATA_REQ: begin + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + if (mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else begin + uc_fsm_d = UC_MEM_WDATA_REQ; + end + end + // }}} + + // Wait for the response from the memory + // {{{ + UC_MEM_WAIT_RSP: begin + automatic bit rd_error; + automatic bit wr_error; + + uc_fsm_d = UC_MEM_WAIT_RSP; + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + rd_error = mem_resp_read_valid_i && + ( mem_resp_read_i.mem_resp_r_error == HPDCACHE_MEM_RESP_NOK); + wr_error = mem_resp_write_valid_i && + (mem_resp_write_i.mem_resp_w_error == HPDCACHE_MEM_RESP_NOK); + rsp_error_set = req_need_rsp_q & (rd_error | wr_error); + + case (1'b1) + req_op_q.is_ld: begin + if (mem_resp_read_valid_i) begin + if (req_need_rsp_q) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_IDLE; + end + end + end + req_op_q.is_st: begin + if (mem_resp_write_valid_i) begin + if (req_need_rsp_q) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_IDLE; + end + end + end + req_op_q.is_amo_lr: begin + if (mem_resp_read_valid_i) begin + // set a new reservation + if (!rd_error) + begin + lrsc_uc_set = 1'b1; + lrsc_rsrv_addr_d = req_addr_q; + end + // in case of a memory error, do not make the reservation and + // invalidate an existing one (if valid) + else begin + lrsc_uc_reset = 1'b1; + end + + if (req_uc_q || rd_error) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + req_op_q.is_amo_sc: begin + if (mem_resp_write_valid_i) begin + automatic bit is_atomic; + + is_atomic = mem_resp_write_i.mem_resp_w_is_atomic && !wr_error; + uc_sc_retcode_d = is_atomic ? AMO_SC_SUCCESS : AMO_SC_FAILURE; + + if (req_uc_q || !is_atomic) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + // wait for both old data and write acknowledged were received + if ((mem_resp_read_valid_i && mem_resp_write_valid_i) || + (mem_resp_read_valid_i && mem_resp_write_valid_q) || + (mem_resp_read_valid_q && mem_resp_write_valid_i)) + begin + if (req_uc_q || rsp_error_q || rd_error || wr_error) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + endcase + end + // }}} + + // Send the response to the requester + // {{{ + UC_CORE_RSP: begin + if (core_rsp_ready_i) begin + rsp_error_rst = 1'b1; + uc_fsm_d = UC_IDLE; + end else begin + uc_fsm_d = UC_CORE_RSP; + end + end + // }}} + + // Check for a cache hit on the AMO target address + // {{{ + UC_AMO_READ_DIR: begin + uc_fsm_d = UC_AMO_WRITE_DATA; + end + // }}} + + // Write the locally computed AMO result in the cache + // {{{ + UC_AMO_WRITE_DATA: begin + uc_fsm_d = UC_CORE_RSP; + end + // }}} + endcase + end +// }}} + +// AMO unit +// {{{ + localparam hpdcache_uint AMO_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_DATA_WIDTH/64); + + generate + if (AMO_WORD_INDEX_WIDTH > 0) begin : amo_operand_mux_gen + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (64), + .ONE_HOT_SEL (1'b0) + ) amo_ld_data_mux_i ( + .data_i (rsp_rdata_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_req_ld_data) + ); + + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (64), + .ONE_HOT_SEL (1'b0) + ) amo_st_data_mux_i ( + .data_i (req_data_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_req_st_data) + ); + + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (8), + .ONE_HOT_SEL (1'b0) + ) amo_st_be_mux_i ( + .data_i (req_be_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_st_be) + ); + + end else begin + assign amo_req_ld_data = rsp_rdata_q; + assign amo_req_st_data = req_data_q; + assign amo_st_be = req_be_q; + end + endgenerate + + assign amo_ld_data = prepare_amo_data_operand(amo_req_ld_data, req_size_q, + req_addr_q, amo_need_sign_extend(req_op_q)); + assign amo_st_data = prepare_amo_data_operand(amo_req_st_data, req_size_q, + req_addr_q, amo_need_sign_extend(req_op_q)); + + hpdcache_amo amo_unit_i ( + .ld_data_i (amo_ld_data), + .st_data_i (amo_st_data), + .op_i (req_op_q), + .result_o (amo_result) + ); + + assign dir_amo_match_o = (uc_fsm_q == UC_AMO_READ_DIR), + dir_amo_match_set_o = hpdcache_get_req_addr_set(req_addr_q), + dir_amo_match_tag_o = hpdcache_get_req_addr_tag(req_addr_q), + dir_amo_update_plru_o = dir_amo_match_o; + + assign data_amo_write_o = (uc_fsm_q == UC_AMO_WRITE_DATA), + data_amo_write_enable_o = |dir_amo_hit_way_i, + data_amo_write_set_o = hpdcache_get_req_addr_set(req_addr_q), + data_amo_write_size_o = req_size_q, + data_amo_write_word_o = hpdcache_get_req_addr_word(req_addr_q), + data_amo_write_data_o = prepare_amo_data_result(amo_result, req_size_q), + data_amo_write_be_o = amo_st_be; +// }}} + +// Core response outputs +// {{{ + assign req_ready_o = (uc_fsm_q == UC_IDLE), + core_rsp_valid_o = (uc_fsm_q == UC_CORE_RSP); +// }}} + +// Memory read request outputs +// {{{ + always_comb + begin : mem_req_read_comb + mem_req_read_o.mem_req_addr = req_addr_q; + mem_req_read_o.mem_req_len = 0; + mem_req_read_o.mem_req_size = req_size_q; + mem_req_read_o.mem_req_id = mem_read_id_i; + mem_req_read_o.mem_req_cacheable = 1'b0; + mem_req_read_o.mem_req_command = HPDCACHE_MEM_READ; + mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + + unique case (1'b1) + req_op_q.is_ld: begin + mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ); + end + req_op_q.is_amo_lr: begin + mem_req_read_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_LDEX; + mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ); + end + default: begin + mem_req_read_valid_o = 1'b0; + end + endcase + end +// }}} + +// Memory write request outputs +// {{{ + always_comb + begin : mem_req_write_comb + mem_req_write_data = req_data_q; + mem_req_write_o.mem_req_addr = req_addr_q; + mem_req_write_o.mem_req_len = 0; + mem_req_write_o.mem_req_size = req_size_q; + mem_req_write_o.mem_req_id = mem_write_id_i; + mem_req_write_o.mem_req_cacheable = 1'b0; + unique case (1'b1) + req_op_q.is_amo_sc: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_STEX; + end + req_op_q.is_amo_swap: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SWAP; + end + req_op_q.is_amo_add: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + end + req_op_q.is_amo_and: begin + mem_req_write_data = ~req_data_q; + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_CLR; + end + req_op_q.is_amo_or: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SET; + end + req_op_q.is_amo_xor: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_EOR; + end + req_op_q.is_amo_max: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMAX; + end + req_op_q.is_amo_maxu: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMAX; + end + req_op_q.is_amo_min: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMIN; + end + req_op_q.is_amo_minu: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMIN; + end + default: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + end + endcase + + unique case (uc_fsm_q) + UC_MEM_REQ: begin + unique case (1'b1) + req_op_q.is_st, + req_op_q.is_amo_sc, + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + mem_req_write_data_valid_o = 1'b1; + mem_req_write_valid_o = 1'b1; + end + + default: begin + mem_req_write_data_valid_o = 1'b0; + mem_req_write_valid_o = 1'b0; + end + endcase + end + + UC_MEM_W_REQ: begin + mem_req_write_valid_o = 1'b1; + mem_req_write_data_valid_o = 1'b0; + end + + UC_MEM_WDATA_REQ: begin + mem_req_write_valid_o = 1'b0; + mem_req_write_data_valid_o = 1'b1; + end + + default: begin + mem_req_write_valid_o = 1'b0; + mem_req_write_data_valid_o = 1'b0; + end + endcase + end + + generate + // memory data width is bigger than the width of the core's interface + if (MEM_REQ_RATIO > 1) begin : mem_req_data_gen + // replicate data + assign mem_req_write_data_o.mem_req_w_data = {MEM_REQ_RATIO{mem_req_write_data}}; + + // demultiplex the byte-enable + hpdcache_demux #( + .NOUTPUT (MEM_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8) + ) mem_write_be_demux_i ( + .data_i (req_be_q), + .sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]), + .data_o (mem_req_write_data_o.mem_req_w_be) + ); + end + + // memory data width is equal to the width of the core's interface + else begin + assign mem_req_write_data_o.mem_req_w_data = mem_req_write_data; + assign mem_req_write_data_o.mem_req_w_be = req_be_q; + end + + assign mem_req_write_data_o.mem_req_w_last = 1'b1; + endgenerate +// }}} + +// Response handling +// {{{ + logic [63:0] sc_retcode; + logic [63:0] sc_rdata; + + assign sc_retcode = {{63{1'b0}}, uc_sc_retcode_q}, + sc_rdata = prepare_amo_data_result(sc_retcode, req_size_q); + + assign core_rsp_o.rdata = req_op_q.is_amo_sc ? {HPDCACHE_REQ_WORDS{sc_rdata}} : rsp_rdata_q, + core_rsp_o.sid = req_sid_q, + core_rsp_o.tid = req_tid_q, + core_rsp_o.error = rsp_error_q, + core_rsp_o.aborted = 1'b0; + + // Resize the memory response data to the core response width + generate + // memory data width is bigger than the width of the core's interface + if (MEM_REQ_RATIO > 1) begin : core_rsp_data_gen + hpdcache_mux #( + .NINPUT (MEM_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH) + ) data_read_rsp_mux_i( + .data_i (mem_resp_read_i.mem_resp_r_data), + .sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]), + .data_o (rsp_rdata_d) + ); + end + + // memory data width is equal to the width of the core's interface + else begin + assign rsp_rdata_d = mem_resp_read_i.mem_resp_r_data; + end + endgenerate + + // This FSM is always ready to accept the response + assign mem_resp_read_ready_o = 1'b1, + mem_resp_write_ready_o = 1'b1; +// }}} + +// Set cache request registers +// {{{ + always_ff @(posedge clk_i) + begin : req_ff + if (req_valid_i && req_ready_o) begin + req_op_q <= req_op_i; + req_addr_q <= req_addr_i; + req_size_q <= req_size_i; + req_data_q <= req_data_i; + req_be_q <= req_be_i; + req_uc_q <= req_uc_i; + req_sid_q <= req_sid_i; + req_tid_q <= req_tid_i; + req_need_rsp_q <= req_need_rsp_i; + end + end +// }}} + +// Uncacheable request FSM set state +// {{{ + logic lrsc_rsrv_valid_set, lrsc_rsrv_valid_reset; + + assign lrsc_rsrv_valid_set = lrsc_uc_set, + lrsc_rsrv_valid_reset = lrsc_uc_reset | lrsc_snoop_reset; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : uc_fsm_ff + if (!rst_ni) begin + uc_fsm_q <= UC_IDLE; + lrsc_rsrv_valid_q <= 1'b0; + end else begin + uc_fsm_q <= uc_fsm_d; + lrsc_rsrv_valid_q <= (~lrsc_rsrv_valid_q & lrsc_rsrv_valid_set ) | + ( lrsc_rsrv_valid_q & ~lrsc_rsrv_valid_reset); + end + end + + always_ff @(posedge clk_i) + begin : uc_amo_ff + lrsc_rsrv_addr_q <= lrsc_rsrv_addr_d; + uc_sc_retcode_q <= uc_sc_retcode_d; + end +// }}} + +// Response registers +// {{{ + always_ff @(posedge clk_i) + begin + if (mem_resp_read_valid_i) begin + rsp_rdata_q <= rsp_rdata_d; + end + mem_resp_write_valid_q <= mem_resp_write_valid_d; + mem_resp_read_valid_q <= mem_resp_read_valid_d; + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + rsp_error_q <= 1'b0; + end else begin + rsp_error_q <= (~rsp_error_q & rsp_error_set) | + ( rsp_error_q & ~rsp_error_rst); + end + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) disable iff (!rst_ni) + (req_valid_i && req_op_i.is_ld) -> req_uc_i) else + $error("uc_handler: unexpected load request on cacheable region"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + (req_valid_i && req_op_i.is_st) -> req_uc_i) else + $error("uc_handler: unexpected store request on cacheable region"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + (req_valid_i && (req_op_i.is_amo_lr || + req_op_i.is_amo_sc || + req_op_i.is_amo_swap || + req_op_i.is_amo_add || + req_op_i.is_amo_and || + req_op_i.is_amo_or || + req_op_i.is_amo_xor || + req_op_i.is_amo_max || + req_op_i.is_amo_maxu || + req_op_i.is_amo_min || + req_op_i.is_amo_minu )) -> req_need_rsp_i) else + $error("uc_handler: amo requests shall need a response"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + (req_valid_i && (req_op_i.is_amo_lr || + req_op_i.is_amo_sc || + req_op_i.is_amo_swap || + req_op_i.is_amo_add || + req_op_i.is_amo_and || + req_op_i.is_amo_or || + req_op_i.is_amo_xor || + req_op_i.is_amo_max || + req_op_i.is_amo_maxu || + req_op_i.is_amo_min || + req_op_i.is_amo_minu )) -> (req_size_i inside {2,3})) else + $error("uc_handler: amo requests shall be 4 or 8 bytes wide"); + + assert property (@(posedge clk_i) disable iff (!rst_ni) + (mem_resp_write_valid_i || mem_resp_read_valid_i) -> (uc_fsm_q == UC_MEM_WAIT_RSP)) else + $error("uc_handler: unexpected response from memory"); +// pragma translate_on +// }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv new file mode 100644 index 00000000..0607440f --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv @@ -0,0 +1,678 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Write Buffer + * History : + */ +module hpdcache_wbuf + // Parameters + // {{{ +#( + // Number of entries in the directory part of the Write Buffer + parameter int unsigned WBUF_DIR_ENTRIES = 0, + // Number of entries in the data part of the Write Buffer + parameter int unsigned WBUF_DATA_ENTRIES = 0, + // Width in bits of the write words + parameter int unsigned WBUF_WORD_WIDTH = 0, + // Number of words per line in the write buffer + parameter int unsigned WBUF_WORDS = 0, + // Width in bits of the physical address + parameter int unsigned WBUF_PA_WIDTH = 0, + // Maximum value of the time counter + parameter int unsigned WBUF_TIMECNT_MAX = 8, + // Number of most significant bits to check for read conflicts + parameter int unsigned WBUF_READ_MATCH_WIDTH = 0, + // Use a feedthrough FIFO on the send interface + parameter bit WBUF_SEND_FEEDTHROUGH = 0, + + localparam int unsigned WBUF_OFFSET_WIDTH = $clog2((WBUF_WORD_WIDTH*WBUF_WORDS)/8), + localparam int unsigned WBUF_TAG_WIDTH = WBUF_PA_WIDTH - WBUF_OFFSET_WIDTH, + localparam int unsigned WBUF_WORD_OFFSET = $clog2(WBUF_WORD_WIDTH/8), + localparam int unsigned WBUF_DATA_PTR_WIDTH = $clog2(WBUF_DATA_ENTRIES), + localparam int unsigned WBUF_DIR_PTR_WIDTH = $clog2(WBUF_DIR_ENTRIES), + localparam int unsigned WBUF_TIMECNT_WIDTH = $clog2(WBUF_TIMECNT_MAX), + localparam type wbuf_addr_t = logic unsigned [ WBUF_PA_WIDTH-1:0], + localparam type wbuf_dir_ptr_t = logic unsigned [ WBUF_DIR_PTR_WIDTH-1:0], + localparam type wbuf_data_ptr_t = logic unsigned [ WBUF_DATA_PTR_WIDTH-1:0], + localparam type wbuf_data_t = logic [ WBUF_WORD_WIDTH-1:0], + localparam type wbuf_be_t = logic [ WBUF_WORD_WIDTH/8-1:0], + localparam type wbuf_data_buf_t = wbuf_data_t [ WBUF_WORDS-1:0], + localparam type wbuf_be_buf_t = wbuf_be_t [ WBUF_WORDS-1:0], + localparam type wbuf_tag_t = logic unsigned [ WBUF_TAG_WIDTH-1:0], + localparam type wbuf_match_t = logic unsigned [WBUF_READ_MATCH_WIDTH-1:0], + localparam type wbuf_timecnt_t = logic unsigned [ WBUF_TIMECNT_WIDTH-1:0] +) + // }}} + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + input logic flush_all_i, + + // Configuration signals + // Timer threshold + input wbuf_timecnt_t cfg_threshold_i, + // Reset timer on write + input logic cfg_reset_timecnt_on_write_i, + // Sequentialize write-after-write hazards + input logic cfg_sequential_waw_i, + // Inhibit write coalescing + input logic cfg_inhibit_write_coalescing_i, + + // Write interface + input logic write_i, + output logic write_ready_o, + input wbuf_addr_t write_addr_i, + input wbuf_data_t write_data_i, + input wbuf_be_t write_be_i, // byte-enable + input logic write_uc_i, // uncacheable write + + // Read hit interface + input wbuf_addr_t read_addr_i, + output logic read_hit_o, + input logic read_flush_hit_i, + + // Replay hit interface + input wbuf_addr_t replay_addr_i, + input logic replay_is_read_i, + output logic replay_open_hit_o, + output logic replay_pend_hit_o, + output logic replay_sent_hit_o, + output logic replay_not_ready_o, + + // Send interface + input logic send_meta_ready_i, + output logic send_meta_valid_o, + output wbuf_addr_t send_addr_o, + output wbuf_dir_ptr_t send_id_o, + output logic send_uc_o, + + input logic send_data_ready_i, + output logic send_data_valid_o, + output wbuf_addr_t send_data_tag_o, + output wbuf_data_buf_t send_data_o, + output wbuf_be_buf_t send_be_o, + + // Acknowledge interface + input logic ack_i, + input wbuf_dir_ptr_t ack_id_i, + input logic ack_error_i +); + // }}} + + // Definition of constants, types and functions + // {{{ + localparam int WBUF_SEND_FIFO_DEPTH = WBUF_DATA_ENTRIES; + + typedef logic unsigned [31:0] wbuf_uint; + + typedef enum logic [1:0] { + WBUF_FREE = 2'b00, // unused/free slot + WBUF_OPEN = 2'b01, // there are pending writes in this slot + WBUF_PEND = 2'b10, // the slot is waiting to be sent + WBUF_SENT = 2'b11 // the slot is sent and waits for the memory acknowledge + } wbuf_state_e; + + typedef struct packed { + wbuf_data_ptr_t ptr; + wbuf_timecnt_t cnt; + wbuf_tag_t tag; + logic uc; + } wbuf_dir_entry_t; + + typedef struct packed { + wbuf_data_buf_t data; + wbuf_be_buf_t be; + } wbuf_data_entry_t; + + typedef struct packed { + wbuf_data_ptr_t send_data_ptr; + wbuf_tag_t send_data_tag; + } wbuf_send_data_t; + + typedef struct packed { + wbuf_tag_t send_meta_tag; + wbuf_dir_ptr_t send_meta_id; + logic send_meta_uc; + } wbuf_send_meta_t; + + function automatic wbuf_dir_ptr_t wbuf_dir_find_next( + input wbuf_dir_ptr_t curr_ptr, + input wbuf_state_e [WBUF_DIR_ENTRIES-1:0] dir_state, + input wbuf_state_e state); + automatic wbuf_dir_ptr_t next_ptr; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + next_ptr = wbuf_dir_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DIR_ENTRIES); + if (dir_state[next_ptr] == state) begin + return next_ptr; + end + end + return curr_ptr; + endfunction + + function automatic wbuf_data_ptr_t wbuf_data_find_next( + input wbuf_data_ptr_t curr_ptr, + input logic [WBUF_DATA_ENTRIES-1:0] data_valid, + input logic state); + automatic wbuf_data_ptr_t next_ptr; + for (int unsigned i = 0; i < WBUF_DATA_ENTRIES; i++) begin + next_ptr = wbuf_data_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DATA_ENTRIES); + if (data_valid[next_ptr] == state) begin + return next_ptr; + end + end + return curr_ptr; + endfunction + + function automatic void wbuf_data_write( + output wbuf_data_buf_t wbuf_ret_data, + output wbuf_be_buf_t wbuf_ret_be, + input wbuf_data_buf_t wbuf_old_data, + input wbuf_be_buf_t wbuf_old_be, + input wbuf_data_buf_t wbuf_new_data, + input wbuf_be_buf_t wbuf_new_be); + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + for (int unsigned b = 0; b < WBUF_WORD_WIDTH/8; b++) begin + wbuf_ret_data[w][b*8 +: 8] = wbuf_new_be[w][b] ? + wbuf_new_data[w][b*8 +: 8] : + wbuf_old_data[w][b*8 +: 8]; + end + wbuf_ret_be[w] = wbuf_old_be[w] | wbuf_new_be[w]; + end + endfunction + + function automatic wbuf_match_t wbuf_tag_to_match_addr(wbuf_tag_t tag); + return tag[WBUF_TAG_WIDTH - 1:WBUF_TAG_WIDTH - WBUF_READ_MATCH_WIDTH]; + endfunction + // }}} + + // Definition of internal wires and registers + // {{{ + wbuf_state_e [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_state_q, wbuf_dir_state_d; + wbuf_dir_entry_t [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_q, wbuf_dir_d; + logic [WBUF_DATA_ENTRIES-1:0] wbuf_data_valid_q, wbuf_data_valid_d; + wbuf_data_entry_t [WBUF_DATA_ENTRIES-1:0] wbuf_data_q, wbuf_data_d; + + wbuf_dir_ptr_t wbuf_dir_free_ptr_q, wbuf_dir_free_ptr_d; + logic wbuf_dir_free; + wbuf_dir_ptr_t wbuf_dir_send_ptr_q, wbuf_dir_send_ptr_d; + wbuf_data_ptr_t wbuf_data_free_ptr_q, wbuf_data_free_ptr_d; + logic wbuf_data_free; + + logic wbuf_write_free; + logic wbuf_write_hit_open; + logic wbuf_write_hit_pend; + logic wbuf_write_hit_sent; + wbuf_dir_ptr_t wbuf_write_hit_open_dir_ptr; + wbuf_dir_ptr_t wbuf_write_hit_pend_dir_ptr; + + logic send_meta_valid; + logic send_meta_ready; + wbuf_send_meta_t send_meta_wdata, send_meta_rdata; + + logic send_data_wok; + logic send_data_w; + wbuf_send_data_t send_data_d; + wbuf_send_data_t send_data_q; + + wbuf_tag_t write_tag; + wbuf_data_buf_t write_data; + wbuf_be_buf_t write_be; + + logic [WBUF_DIR_ENTRIES-1:0] replay_match; + logic [WBUF_DIR_ENTRIES-1:0] replay_open_hit; + logic [WBUF_DIR_ENTRIES-1:0] replay_pend_hit; + logic [WBUF_DIR_ENTRIES-1:0] replay_sent_hit; + + genvar gen_i; + // }}} + + // Global control signals + // {{{ + always_comb + begin : empty_comb + empty_o = 1'b1; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + empty_o &= (wbuf_dir_state_q[i] == WBUF_FREE); + end + end + + always_comb + begin : full_comb + full_o = 1'b1; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + full_o &= (wbuf_dir_state_q[i] != WBUF_FREE); + end + end + // }}} + + // Write control + // {{{ + assign write_tag = write_addr_i[WBUF_PA_WIDTH-1:WBUF_OFFSET_WIDTH]; + + always_comb + begin : wbuf_write_data_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + write_data[w] = write_data_i; + end + end + + generate + if (WBUF_OFFSET_WIDTH > WBUF_WORD_OFFSET) begin : wbuf_write_be_gt_gen + always_comb + begin : wbuf_write_be_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + if (w == int'(write_addr_i[WBUF_OFFSET_WIDTH-1:WBUF_WORD_OFFSET])) begin + write_be[w] = write_be_i; + end else begin + write_be[w] = '0; + end + end + end + end else begin : wbuf_write_be_le_gen + always_comb + begin : wbuf_write_be_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + write_be[w] = write_be_i; + end + end + end + endgenerate + + always_comb + begin : wbuf_free_comb + wbuf_dir_free_ptr_d = wbuf_dir_free_ptr_q; + if (ack_i) begin + wbuf_dir_free_ptr_d = ack_id_i; + end else if (write_i && wbuf_write_free) begin + wbuf_dir_free_ptr_d = wbuf_dir_find_next(wbuf_dir_free_ptr_q, wbuf_dir_state_q, WBUF_FREE); + end + + wbuf_data_free_ptr_d = wbuf_data_free_ptr_q; + if (send_data_valid_o && send_data_ready_i) begin + wbuf_data_free_ptr_d = send_data_q.send_data_ptr; + end else if (write_i && wbuf_write_free) begin + wbuf_data_free_ptr_d = wbuf_data_find_next(wbuf_data_free_ptr_q, wbuf_data_valid_q, 1'b0); + end + end + + assign wbuf_dir_free = (wbuf_dir_state_q[wbuf_dir_free_ptr_q] == WBUF_FREE); + assign wbuf_data_free = ~wbuf_data_valid_q[wbuf_data_free_ptr_q]; + + always_comb + begin : wbuf_write_hit_comb + wbuf_write_hit_open = 1'b0; + wbuf_write_hit_pend = 1'b0; + wbuf_write_hit_sent = 1'b0; + + wbuf_write_hit_open_dir_ptr = 0; + wbuf_write_hit_pend_dir_ptr = 0; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + if (wbuf_dir_q[i].tag == write_tag) begin + unique case (wbuf_dir_state_q[i]) + WBUF_OPEN: begin + wbuf_write_hit_open = 1'b1; + wbuf_write_hit_open_dir_ptr = wbuf_dir_ptr_t'(i); + end + WBUF_PEND: begin + wbuf_write_hit_pend = 1'b1; + wbuf_write_hit_pend_dir_ptr = wbuf_dir_ptr_t'(i); + end + WBUF_SENT: begin + wbuf_write_hit_sent = 1'b1; + end + default: begin + /* do nothing */ + end + endcase + end + end + end + + // Check if there is a match between the read address and the tag of one + // of the used slots in the write buffer directory + always_comb + begin : read_hit_comb + automatic logic [WBUF_DIR_ENTRIES-1:0] read_hit; + + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + read_hit[i] = 1'b0; + unique case (wbuf_dir_state_q[i]) + WBUF_OPEN, WBUF_PEND, WBUF_SENT: begin + automatic wbuf_addr_t wbuf_addr; + automatic wbuf_match_t wbuf_tag; + automatic wbuf_match_t read_tag; + + wbuf_addr = wbuf_addr_t'(wbuf_dir_q[i].tag) << WBUF_OFFSET_WIDTH; + read_tag = read_addr_i[WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]; + wbuf_tag = wbuf_addr [WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]; + read_hit[i] = (read_tag == wbuf_tag) ? 1'b1 : 1'b0; + end + default: begin + /* do nothing */ + end + endcase + end + + read_hit_o = |read_hit; + end + + // Check if there is a match between the replay address and the tag of one + // of the used slots in the write buffer directory + generate + for (gen_i = 0; gen_i < WBUF_DIR_ENTRIES; gen_i++) begin : replay_match_gen + assign replay_match[gen_i] = replay_is_read_i ? + /* replay is read: compare address block tag (e.g. cache line) */ + (wbuf_tag_to_match_addr(wbuf_dir_q[gen_i].tag) == + replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]) : + /* replay is write: compare wbuf tag */ + (wbuf_dir_q[gen_i].tag == + replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_TAG_WIDTH]); + + assign replay_open_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_OPEN); + assign replay_pend_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_PEND); + assign replay_sent_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_SENT); + end + endgenerate + + assign replay_open_hit_o = |replay_open_hit, + replay_pend_hit_o = |replay_pend_hit, + replay_sent_hit_o = |replay_sent_hit; + + always_comb + begin : replay_wbuf_not_ready_comb + replay_not_ready_o = 1'b0; + if (replay_pend_hit_o) begin + replay_not_ready_o = 1'b1; + end else if (replay_sent_hit_o && cfg_sequential_waw_i) begin + replay_not_ready_o = 1'b1; + end else if (!replay_open_hit_o && (!wbuf_dir_free || !wbuf_data_free)) begin + replay_not_ready_o = 1'b1; + end + end + + assign wbuf_write_free = + wbuf_dir_free + & wbuf_data_free + & ~wbuf_write_hit_open + & ~wbuf_write_hit_pend + & ~(wbuf_write_hit_sent & cfg_sequential_waw_i); + + assign write_ready_o = wbuf_write_free + | ((wbuf_write_hit_open | wbuf_write_hit_pend) + & ~cfg_inhibit_write_coalescing_i); + // }}} + + // Update control + // {{{ + always_comb + begin : wbuf_update_comb + automatic bit timeout; + automatic bit write_hit; + automatic bit read_hit; + automatic bit match_open_ptr; + automatic bit match_pend_ptr; + automatic bit match_free; + automatic bit send; + + timeout = 1'b0; + write_hit = 1'b0; + read_hit = 1'b0; + match_open_ptr = 1'b0; + match_pend_ptr = 1'b0; + match_free = 1'b0; + send = 1'b0; + + wbuf_dir_state_d = wbuf_dir_state_q; + wbuf_dir_d = wbuf_dir_q; + wbuf_data_d = wbuf_data_q; + + send_data_w = 1'b0; + send_meta_valid = 1'b0; + + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + case (wbuf_dir_state_q[i]) + WBUF_FREE: begin + match_free = wbuf_write_free && (i == int'(wbuf_dir_free_ptr_q)); + + if (write_i && match_free) begin + send = (cfg_threshold_i == 0) + | write_uc_i + | flush_all_i + | cfg_inhibit_write_coalescing_i; + + wbuf_dir_state_d[i] = send ? WBUF_PEND : WBUF_OPEN; + wbuf_dir_d[i].tag = write_tag; + wbuf_dir_d[i].cnt = 0; + wbuf_dir_d[i].ptr = wbuf_data_free_ptr_q; + wbuf_dir_d[i].uc = write_uc_i; + + wbuf_data_write( + wbuf_data_d[wbuf_data_free_ptr_q].data, + wbuf_data_d[wbuf_data_free_ptr_q].be, + '0, + '0, + write_data, + write_be + ); + end + end + + WBUF_OPEN: begin + match_open_ptr = (i == int'(wbuf_write_hit_open_dir_ptr)); + timeout = (wbuf_dir_q[i].cnt == (cfg_threshold_i - 1)); + read_hit = read_flush_hit_i & wbuf_write_hit_open & match_open_ptr; + write_hit = write_i + & wbuf_write_hit_open + & match_open_ptr + & ~cfg_inhibit_write_coalescing_i; + + if (!flush_all_i) begin + if (write_hit && cfg_reset_timecnt_on_write_i) begin + timeout = 1'b0; + wbuf_dir_d[i].cnt = 0; + end else if (!timeout) begin + wbuf_dir_d[i].cnt = wbuf_dir_q[i].cnt + 1; + end + + if (read_hit | timeout | cfg_inhibit_write_coalescing_i) begin + wbuf_dir_state_d[i] = WBUF_PEND; + end + end else begin + wbuf_dir_state_d[i] = WBUF_PEND; + end + + if (write_hit) begin + wbuf_data_write( + wbuf_data_d[wbuf_dir_q[i].ptr].data, + wbuf_data_d[wbuf_dir_q[i].ptr].be, + wbuf_data_q[wbuf_dir_q[i].ptr].data, + wbuf_data_q[wbuf_dir_q[i].ptr].be, + write_data, + write_be + ); + end + end + + WBUF_PEND: begin + match_pend_ptr = (i == int'(wbuf_write_hit_pend_dir_ptr)); + write_hit = write_i + & wbuf_write_hit_pend + & match_pend_ptr + & ~cfg_inhibit_write_coalescing_i; + + if (write_hit) begin + wbuf_data_write( + wbuf_data_d[wbuf_dir_q[i].ptr].data, + wbuf_data_d[wbuf_dir_q[i].ptr].be, + wbuf_data_q[wbuf_dir_q[i].ptr].data, + wbuf_data_q[wbuf_dir_q[i].ptr].be, + write_data, + write_be + ); + end + + if (i == int'(wbuf_dir_send_ptr_q)) begin + send_data_w = send_meta_ready; + send_meta_valid = send_data_wok; + if (send_meta_ready && send_data_wok) begin + wbuf_dir_state_d[i] = WBUF_SENT; + end + end + end + + WBUF_SENT: begin + if (ack_i && (i == int'(ack_id_i))) begin + wbuf_dir_state_d[i] = WBUF_FREE; + end + end + endcase + end + end + + always_comb + begin : wbuf_data_valid_comb + wbuf_data_valid_d = wbuf_data_valid_q; + + // allocate a free data buffer on new write + if (write_i && wbuf_write_free) begin + wbuf_data_valid_d[wbuf_data_free_ptr_q] = 1'b1; + end + + // de-allocate a data buffer as soon as it is send + if (send_data_valid_o && send_data_ready_i) begin + wbuf_data_valid_d[send_data_q.send_data_ptr] = 1'b0; + end + end + // }}} + + // Send control + // {{{ + // Data channel + hpdcache_fifo_reg #( + .FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH), + .FEEDTHROUGH (WBUF_SEND_FEEDTHROUGH), + .fifo_data_t (wbuf_send_data_t) + ) send_data_ptr_fifo_i ( + .clk_i, + .rst_ni, + .w_i (send_data_w), + .wok_o (send_data_wok), + .wdata_i (send_data_d), + .r_i (send_data_ready_i), + .rok_o (send_data_valid_o), + .rdata_o (send_data_q) + ); + + assign send_data_d.send_data_ptr = wbuf_dir_q[wbuf_dir_send_ptr_q].ptr, + send_data_d.send_data_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag; + + assign send_data_tag_o = wbuf_addr_t'(send_data_q.send_data_tag), + send_data_o = wbuf_data_q[send_data_q.send_data_ptr].data, + send_be_o = wbuf_data_q[send_data_q.send_data_ptr].be; + + // Meta-data channel + hpdcache_fifo_reg #( + .FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH), + .FEEDTHROUGH (WBUF_SEND_FEEDTHROUGH), + .fifo_data_t (wbuf_send_meta_t) + ) send_meta_fifo_i ( + .clk_i, + .rst_ni, + .w_i (send_meta_valid), + .wok_o (send_meta_ready), + .wdata_i (send_meta_wdata), + .r_i (send_meta_ready_i), + .rok_o (send_meta_valid_o), + .rdata_o (send_meta_rdata) + ); + + assign send_meta_wdata.send_meta_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag, + send_meta_wdata.send_meta_id = wbuf_dir_send_ptr_q, + send_meta_wdata.send_meta_uc = wbuf_dir_q[wbuf_dir_send_ptr_q].uc; + + assign send_addr_o = { send_meta_rdata.send_meta_tag, {WBUF_OFFSET_WIDTH{1'b0}} }, + send_id_o = send_meta_rdata.send_meta_id, + send_uc_o = send_meta_rdata.send_meta_uc; + + // Send pointer + always_comb + begin : wbuf_send_comb + wbuf_dir_send_ptr_d = wbuf_dir_find_next(wbuf_dir_send_ptr_q, wbuf_dir_state_q, WBUF_PEND); + if (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND) begin + if (!send_meta_valid || !send_meta_ready) begin + wbuf_dir_send_ptr_d = wbuf_dir_send_ptr_q; + end + end + end + // }}} + + // Internal state assignment + // {{{ + always_ff @(posedge clk_i) wbuf_data_q <= wbuf_data_d; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : wbuf_state_ff + if (!rst_ni) begin + wbuf_dir_q <= '0; + wbuf_dir_state_q <= {WBUF_DIR_ENTRIES{WBUF_FREE}}; + wbuf_data_valid_q <= '0; + wbuf_dir_free_ptr_q <= 0; + wbuf_dir_send_ptr_q <= 0; + wbuf_data_free_ptr_q <= 0; + end else begin + wbuf_dir_q <= wbuf_dir_d; + wbuf_dir_state_q <= wbuf_dir_state_d; + wbuf_data_valid_q <= wbuf_data_valid_d; + wbuf_dir_free_ptr_q <= wbuf_dir_free_ptr_d; + wbuf_dir_send_ptr_q <= wbuf_dir_send_ptr_d; + wbuf_data_free_ptr_q <= wbuf_data_free_ptr_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert(WBUF_WORDS inside {1, 2, 4, 8, 16}) else + $error("WBUF: width of data buffers must be a power of 2"); + ack_sent_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + (ack_i -> (wbuf_dir_state_q[ack_id_i] == WBUF_SENT))) else + $error("WBUF: acknowledging a not SENT slot"); + send_pend_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + (send_meta_valid -> (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND))) else + $error("WBUF: sending a not PEND slot"); + send_valid_data_assert: assert property (@(posedge clk_i) disable iff (!rst_ni) + (send_data_valid_o -> (wbuf_data_valid_q[send_data_q.send_data_ptr] == 1'b1))) else + $error("WBUF: sending a not valid data"); + // pragma translate_on + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv new file mode 100644 index 00000000..1792ff47 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv @@ -0,0 +1,228 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Write Buffer Wrapper + * History : + */ +/* This wrapper adapts the send interface of the write buffer to the memory + * interface of the cache. + */ +module hpdcache_wbuf_wrapper +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) + // }}} + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + input logic flush_all_i, + + // Configuration signals + // Timer threshold + input wbuf_timecnt_t cfg_threshold_i, + // Reset timer on write + input logic cfg_reset_timecnt_on_write_i, + // Sequentialize write-after-write hazards + input logic cfg_sequential_waw_i, + // Inhibit write coalescing + input logic cfg_inhibit_write_coalescing_i, + + // Write interface + input logic write_i, + output logic write_ready_o, + input wbuf_addr_t write_addr_i, + input wbuf_data_t write_data_i, + input wbuf_be_t write_be_i, // byte-enable + input logic write_uc_i, // uncacheable write + + // Read hit interface + input wbuf_addr_t read_addr_i, + output logic read_hit_o, + input logic read_flush_hit_i, + + // Replay hit interface + input wbuf_addr_t replay_addr_i, + input logic replay_is_read_i, + output logic replay_open_hit_o, + output logic replay_pend_hit_o, + output logic replay_sent_hit_o, + output logic replay_not_ready_o, + + // Memory interface + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o, + + output logic mem_resp_write_ready_o, + input logic mem_resp_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_write_i +); + // }}} + + // Internal signals + // {{{ + wbuf_addr_t send_addr; + wbuf_dir_ptr_t send_id; + logic send_uc; + wbuf_addr_t send_data_tag; + wbuf_data_buf_t send_data; + wbuf_be_buf_t send_be; + wbuf_dir_ptr_t ack_id; + logic ack_error; + // }}} + + // Wrapped write buffer + // {{{ + hpdcache_wbuf #( + .WBUF_DIR_ENTRIES (HPDCACHE_WBUF_DIR_ENTRIES), + .WBUF_DATA_ENTRIES (HPDCACHE_WBUF_DATA_ENTRIES), + .WBUF_WORD_WIDTH (HPDCACHE_REQ_DATA_WIDTH), + .WBUF_WORDS (HPDCACHE_WBUF_WORDS), + .WBUF_PA_WIDTH (HPDCACHE_PA_WIDTH), + .WBUF_TIMECNT_MAX ((2**HPDCACHE_WBUF_TIMECNT_WIDTH) - 1), + .WBUF_READ_MATCH_WIDTH (HPDCACHE_NLINE_WIDTH), + .WBUF_SEND_FEEDTHROUGH (HPDCACHE_WBUF_SEND_FEEDTHROUGH) + ) hpdcache_wbuf_i ( + .clk_i, + .rst_ni, + .empty_o, + .full_o, + .flush_all_i, + .cfg_threshold_i, + .cfg_reset_timecnt_on_write_i, + .cfg_sequential_waw_i, + .cfg_inhibit_write_coalescing_i, + .write_i, + .write_ready_o, + .write_addr_i, + .write_data_i, + .write_be_i, + .write_uc_i, + .read_addr_i, + .read_hit_o, + .read_flush_hit_i, + .replay_addr_i, + .replay_is_read_i, + .replay_open_hit_o, + .replay_pend_hit_o, + .replay_sent_hit_o, + .replay_not_ready_o, + .send_meta_ready_i (mem_req_write_ready_i), + .send_meta_valid_o (mem_req_write_valid_o), + .send_addr_o (send_addr), + .send_id_o (send_id), + .send_uc_o (send_uc), + .send_data_ready_i (mem_req_write_data_ready_i), + .send_data_valid_o (mem_req_write_data_valid_o), + .send_data_tag_o (send_data_tag), + .send_data_o (send_data), + .send_be_o (send_be), + .ack_i (mem_resp_write_valid_i), + .ack_id_i (ack_id), + .ack_error_i (ack_error) + ); + // }}} + + // Memory interface + // {{{ + assign mem_req_write_o.mem_req_addr = send_addr, + mem_req_write_o.mem_req_len = 0, + mem_req_write_o.mem_req_size = get_hpdcache_mem_size(HPDCACHE_WBUF_DATA_WIDTH/8), + mem_req_write_o.mem_req_id = hpdcache_mem_id_t'(send_id), + mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE, + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD, + mem_req_write_o.mem_req_cacheable = ~send_uc; + + generate + localparam int unsigned WBUF_MEM_DATA_RATIO = HPDcacheMemDataWidth/HPDCACHE_WBUF_DATA_WIDTH; + localparam int unsigned WBUF_MEM_DATA_WORD_INDEX_WIDTH = $clog2(WBUF_MEM_DATA_RATIO); + + assign mem_req_write_data_o.mem_req_w_last = 1'b1; + + if (WBUF_MEM_DATA_RATIO > 1) + begin : wbuf_data_upsizing_gen + logic [HPDCACHE_WBUF_DATA_WIDTH/8-1:0][WBUF_MEM_DATA_RATIO-1:0] mem_req_be; + + // demux send BE + hpdcache_demux #( + .NOUTPUT (WBUF_MEM_DATA_RATIO), + .DATA_WIDTH (HPDCACHE_WBUF_DATA_WIDTH/8), + .ONE_HOT_SEL (1'b0) + ) mem_write_be_demux_i ( + .data_i (send_be), + .sel_i (send_data_tag[0 +: WBUF_MEM_DATA_WORD_INDEX_WIDTH]), + .data_o (mem_req_be) + ); + + assign mem_req_write_data_o.mem_req_w_data = {WBUF_MEM_DATA_RATIO{send_data}}, + mem_req_write_data_o.mem_req_w_be = mem_req_be; + + end else if (WBUF_MEM_DATA_RATIO == 1) + begin : wbuf_data_forwarding_gen + assign mem_req_write_data_o.mem_req_w_data = send_data, + mem_req_write_data_o.mem_req_w_be = send_be; + end + + // Assertions + // {{{ + // pragma translate_off + initial assert(WBUF_MEM_DATA_RATIO > 0) else + $error($sformatf("WBUF: data width of mem interface (%d) shall be g.e. to wbuf data width(%d)", + HPDcacheMemDataWidth, HPDCACHE_WBUF_DATA_WIDTH)); + // pragma translate_on + // }}} + endgenerate + + assign mem_resp_write_ready_o = 1'b1, + ack_id = mem_resp_write_i.mem_resp_w_id[0 +: HPDCACHE_WBUF_DIR_PTR_WIDTH], + ack_error = (mem_resp_write_i.mem_resp_w_error != HPDCACHE_MEM_RESP_OK); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert (HPDCACHE_WBUF_DIR_PTR_WIDTH <= HPDcacheMemIdWidth) else + $fatal("HPDcacheMemIdWidth is not wide enough to fit all possible write buffer transactions"); + // pragma translate_on + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv new file mode 100644 index 00000000..dfef92da --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv @@ -0,0 +1,374 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Maintainers(s): Cesar Fuguet + * Creation Date : June, 2021 + * Description : HPDcache Linear Hardware Memory Prefetcher. + * History : + */ +module hwpf_stride +import hwpf_stride_pkg::*; +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int CACHE_LINE_BYTES = 64 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // CSR + input logic csr_base_set_i, + input hwpf_stride_base_t csr_base_i, + input logic csr_param_set_i, + input hwpf_stride_param_t csr_param_i, + input logic csr_throttle_set_i, + input hwpf_stride_throttle_t csr_throttle_i, + + output hwpf_stride_base_t csr_base_o, + output hwpf_stride_param_t csr_param_o, + output hwpf_stride_throttle_t csr_throttle_o, + + // If high, the prefetcher is enabled and active + output logic busy_o, + + // Snooping + // Address to snoop on requests ports + output hpdcache_nline_t snoop_nline_o, + // If set to one, the snoop address matched one of the requests + input snoop_match_i, + + // D-Cache interface + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i +); +// }}} + + import hpdcache_pkg::hpdcache_req_addr_t; + + // Definition of constants + // {{{ + localparam int STRIDE_WIDTH = $bits(csr_param_i.stride); + localparam int NBLOCKS_WIDTH = $bits(csr_param_i.nblocks); + localparam int NLINES_WIDTH = $bits(csr_param_i.nlines); + localparam int NWAIT_WIDTH = $bits(csr_throttle_i.nwait); + localparam int INFLIGHT_WIDTH = $bits(csr_throttle_i.ninflight); + localparam int NLINES_CNT_WIDTH = NLINES_WIDTH; + // }}} + + // Internal registers and signals + // {{{ + // FSM + enum { + IDLE, + SNOOP, + SEND_REQ, + WAIT, + DONE, + ABORT + } state_d, state_q; + + logic [NBLOCKS_WIDTH-1:0] nblocks_cnt_d, nblocks_cnt_q; + logic [NLINES_CNT_WIDTH-1:0] nlines_cnt_d, nlines_cnt_q; + logic [NWAIT_WIDTH-1:0] nwait_cnt_d, nwait_cnt_q; + logic [INFLIGHT_WIDTH-1:0] inflight_cnt_d, inflight_cnt_q; + logic inflight_inc, inflight_dec; + + hwpf_stride_base_t csr_base_q; + hwpf_stride_base_t shadow_base_q, shadow_base_d; + hwpf_stride_param_t csr_param_q; + hwpf_stride_param_t shadow_param_q, shadow_param_d; + hwpf_stride_throttle_t csr_throttle_q; + hwpf_stride_throttle_t shadow_throttle_q, shadow_throttle_d; + hpdcache_nline_t request_nline_q, request_nline_d; + + hpdcache_set_t hpdcache_req_set; + hpdcache_tag_t hpdcache_req_tag; + + logic csr_base_update; + hpdcache_nline_t increment_stride; + logic is_inflight_max; + + // Default assignment + assign increment_stride = hpdcache_nline_t'(shadow_param_q.stride) + 1'b1; + assign inflight_dec = hpdcache_rsp_valid_i; + assign snoop_nline_o = shadow_base_q.base_cline; + assign is_inflight_max = ( shadow_throttle_q.ninflight == '0 ) ? + 1'b0 : ( inflight_cnt_q >= shadow_throttle_q.ninflight ); + assign csr_base_o = csr_base_q; + assign csr_param_o = csr_param_q; + assign csr_throttle_o = csr_throttle_q; + // }}} + + // Dcache outputs + // {{{ + assign hpdcache_req_set = request_nline_q[0 +: HPDCACHE_SET_WIDTH], + hpdcache_req_tag = request_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH]; + + assign hpdcache_req_o.addr_offset = { hpdcache_req_set, {HPDCACHE_OFFSET_WIDTH{1'b0}} }, + hpdcache_req_o.wdata = '0, + hpdcache_req_o.op = HPDCACHE_REQ_CMO, + hpdcache_req_o.be = '1, + hpdcache_req_o.size = HPDCACHE_REQ_CMO_PREFETCH, + hpdcache_req_o.sid = '0, // this is set when connecting to the dcache + hpdcache_req_o.tid = '0, // this is set by the wrapper of the prefetcher + hpdcache_req_o.need_rsp = 1'b1, + hpdcache_req_o.phys_indexed = 1'b1, + hpdcache_req_o.addr_tag = hpdcache_req_tag, + hpdcache_req_o.pma.uncacheable = 1'b0, + hpdcache_req_o.pma.io = 1'b0; + // }}} + + // Set state of internal registers + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + csr_base_q <= '0; + csr_param_q <= '0; + shadow_base_q <= '0; + shadow_param_q <= '0; + shadow_throttle_q <= '0; + request_nline_q <= '0; + state_q <= IDLE; + end else begin + if (csr_base_set_i) csr_base_q <= csr_base_i; + else if (csr_base_update) csr_base_q <= shadow_base_d; + if (csr_param_set_i) csr_param_q <= csr_param_i; + if (csr_throttle_set_i) csr_throttle_q <= csr_throttle_i; + shadow_base_q <= shadow_base_d; + shadow_param_q <= shadow_param_d; + shadow_throttle_q <= shadow_throttle_d; + request_nline_q <= request_nline_d; + state_q <= state_d; + end + end + // }}} + + // Update internal counters + // {{{ + always_comb begin : inflight_cnt + inflight_cnt_d = inflight_cnt_q; + + // Every time we send a dcache request, increment the counter + if ( inflight_inc ) begin + inflight_cnt_d++; + end + + // Every time we got a response from the cache, decrement the counter + if ( inflight_dec && ( inflight_cnt_q > 0 )) begin + inflight_cnt_d--; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + nblocks_cnt_q <= '0; + nlines_cnt_q <= '0; + nwait_cnt_q <= '0; + inflight_cnt_q <= '0; + end else begin + nblocks_cnt_q <= nblocks_cnt_d; + nlines_cnt_q <= nlines_cnt_d; + nwait_cnt_q <= nwait_cnt_d; + inflight_cnt_q <= inflight_cnt_d; + end + end + // }}} + + // FSM + // {{{ + always_comb begin : fsm_control + // default assignments + hpdcache_req_valid_o = 1'b0; + nblocks_cnt_d = nblocks_cnt_q; + nlines_cnt_d = nlines_cnt_q; + nwait_cnt_d = nwait_cnt_q; + inflight_inc = 1'b0; + busy_o = 1'b0; + csr_base_update = 1'b0; + + shadow_base_d = shadow_base_q; + shadow_param_d = shadow_param_q; + shadow_throttle_d = shadow_throttle_q; + request_nline_d = request_nline_q; + state_d = state_q; + + case ( state_q ) + + IDLE: begin + // If enabled, go snooping the dcache ports + if ( csr_base_q.enable ) begin + shadow_base_d = csr_base_q; + if (( csr_param_q.nlines > 0 ) || ( csr_param_q.nblocks > 0 )) begin + shadow_param_d = csr_param_q; + shadow_throttle_d = csr_throttle_q; + state_d = SNOOP; + end else begin + // no prefetch needed, disarm immediately + shadow_base_d.enable = 1'b0; + csr_base_update = 1'b1; + end + end + end + + + SNOOP: begin + if ( csr_base_q.enable ) begin + // If a snooper matched an address, send the request + if ( snoop_match_i ) begin + state_d = SEND_REQ; + + if ( shadow_param_q.nlines == 0 ) begin + // skip the first block + request_nline_d = shadow_base_q.base_cline + + hpdcache_nline_t'(increment_stride); + nblocks_cnt_d = ( shadow_param_q.nblocks > 0 ) ? + shadow_param_q.nblocks - 1 : 0; + nlines_cnt_d = 0; + + // update the base cacheline to the first one of the next block + shadow_base_d.base_cline = request_nline_d; + end else begin + // skip the first cacheline (of the first block) + request_nline_d = shadow_base_q.base_cline + 1'b1; + nblocks_cnt_d = shadow_param_q.nblocks; + nlines_cnt_d = shadow_param_q.nlines - 1; + end + end + end else begin + state_d = IDLE; + end + end + + + SEND_REQ: begin + busy_o = 1'b1; + + // make the prefetch request to memory + hpdcache_req_valid_o = 1'b1; + + // we've got a grant, so we can move to the next request + if ( hpdcache_req_ready_i ) begin + inflight_inc = 1'b1; + + if ( nlines_cnt_q == 0 ) begin + // go to the first cacheline of the next block + request_nline_d = shadow_base_q.base_cline + + hpdcache_nline_t'(increment_stride); + nblocks_cnt_d = ( nblocks_cnt_q > 0 ) ? nblocks_cnt_q - 1 : 0; + nlines_cnt_d = shadow_param_q.nlines; + + // update the base cacheline to the first one of the next block + shadow_base_d.base_cline = request_nline_d; + end else begin + // go to the next cacheline (within the same block) + request_nline_d = request_nline_q + 1'b1; + nlines_cnt_d = nlines_cnt_q - 1; + end + + // if the NWAIT parameter is equal 0, we can issue a request every cycle + if (( nblocks_cnt_q == 0 ) && ( nlines_cnt_q == 0 )) begin + state_d = DONE; + end else if ( shadow_throttle_q.nwait == 0 ) begin + // Wait if the number of inflight requests is greater than + // the maximum indicated. Otherwise, send the next request + state_d = is_inflight_max ? WAIT : SEND_REQ; + end else begin + // Wait the indicated cycles before sending the next request + nwait_cnt_d = shadow_throttle_q.nwait; + state_d = WAIT; + end + + if ( !csr_base_q.enable ) state_d = ABORT; + end + end + + + WAIT: begin + // Wait until: + // - the indicated number of wait cycles between requests is reached (nwait) + // - the number of inflight requests is below the indicated maximum (ninflight) + busy_o = 1'b1; + if ( csr_base_q.enable ) begin + if ( !is_inflight_max && ( nwait_cnt_q == 0 )) begin + state_d = SEND_REQ; + end + + if ( nwait_cnt_q > 0 ) begin + nwait_cnt_d = nwait_cnt_q - 1; + end + end else begin + state_d = ABORT; + end + end + + + DONE: begin + busy_o = 1'b1; + if ( csr_base_q.enable ) begin + if (( inflight_cnt_q == 0 ) && !is_inflight_max && ( nwait_cnt_q == 0 )) begin + // Copy back shadow base register into the user visible one + csr_base_update = 1'b1; + + // Check the rearm bit + if ( shadow_base_q.rearm ) begin + state_d = SNOOP; + end else begin + state_d = IDLE; + + // disarm the prefetcher + shadow_base_d.enable = 1'b0; + end + + // Check the cycle bit + if ( shadow_base_q.cycle ) begin + // restore the base address + shadow_base_d.base_cline = csr_base_q.base_cline; + end + end + + if ( nwait_cnt_q > 0 ) begin + nwait_cnt_d = nwait_cnt_q - 1; + end + end else begin + state_d = ABORT; + end + end + + ABORT: begin + busy_o = 1'b1; + if ( inflight_cnt_q == 0 ) begin + state_d = IDLE; + end + end + endcase + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv new file mode 100644 index 00000000..1aa9df48 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv @@ -0,0 +1,117 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Author(s) : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Hw prefetchers arbiter + * History : + */ +module hwpf_stride_arb +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter NUM_HW_PREFETCH = 4 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Dcache input interface + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid_i, + output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready_o, + input hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req_i, + output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_valid_o, + output hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_o, // Not used + + // Dcache output interface + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i // Not used +); +// }}} + + // Internal signals + // {{{ + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req; + logic [NUM_HW_PREFETCH-1:0] arb_req_gnt; + // }}} + + // Requesters arbiter + // {{{ + // Pack request ports + genvar gen_i; + generate + for (gen_i = 0; gen_i < NUM_HW_PREFETCH; gen_i++) begin : gen_hwpf_stride_req + assign hwpf_stride_req_ready_o[gen_i] = arb_req_gnt[gen_i] & hpdcache_req_ready_i, + hwpf_stride_req_valid[gen_i] = hwpf_stride_req_valid_i[gen_i], + hwpf_stride_req[gen_i] = hwpf_stride_req_i[gen_i]; + end + endgenerate + + // Arbiter + hpdcache_rrarb #( + .N (NUM_HW_PREFETCH) + ) hwpf_stride_req_arbiter_i ( + .clk_i, + .rst_ni, + .req_i (hwpf_stride_req_valid), + .gnt_o (arb_req_gnt), + .ready_i (hpdcache_req_ready_i) + ); + + // Request Multiplexor + hpdcache_mux #( + .NINPUT (NUM_HW_PREFETCH), + .DATA_WIDTH ($bits(hpdcache_req_t)), + .ONE_HOT_SEL (1'b1) + ) hwpf_stride_req_mux_i ( + .data_i (hwpf_stride_req), + .sel_i (arb_req_gnt), + .data_o (hpdcache_req_o) + ); + + assign hpdcache_req_valid_o = |arb_req_gnt; + // }}} + + // Response demultiplexor + // {{{ + // As the HW prefetcher does not need the TID field in the request, we + // use it to transport the identifier of the specific hardware + // prefetcher. + // This way we share the same SID for all HW prefetchers. Using + // different SIDs means that we need different ports to the cache and + // we actually want to reduce those. + always_comb + begin : resp_demux + for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin + hwpf_stride_rsp_valid_o[i] = hpdcache_rsp_valid_i && (i == int'(hpdcache_rsp_i.tid)); + hwpf_stride_rsp_o[i] = hpdcache_rsp_i; + end + end + // }}} +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv new file mode 100644 index 00000000..3470b786 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv @@ -0,0 +1,68 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : January, 2023 + * Description : High-Performance, Data-cache (HPDcache) HW memory + * prefetcher package + * History : + */ +package hwpf_stride_pkg; + // Base address configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:6] base_cline; + logic [5:3] unused; + logic cycle; + logic rearm; + logic enable; + } hwpf_stride_base_t; + // }}} + + // Parameters configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:48] nblocks; + logic [47:32] nlines; + logic [31:0] stride; + } hwpf_stride_param_t; + // }}} + + // Throttle configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [31:16] ninflight; + logic [15:0] nwait; + } hwpf_stride_throttle_t; + // }}} + + // Status register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:48] unused1; + logic [47:32] busy; + logic free; + logic [30:20] unused0; + logic [19:16] free_index; + logic [15:0] enabled; + } hwpf_stride_status_t; + // }}} + +endpackage diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv new file mode 100644 index 00000000..ba995b50 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv @@ -0,0 +1,38 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Snooper used by the hardware memory prefetcher + * History : + */ +module hwpf_stride_snooper +import hpdcache_pkg::*; +( + input logic en_i, // Snooper enable bit. + input hpdcache_nline_t base_nline_i, // Address to check + input hpdcache_nline_t snoop_addr_i, // Input address to snoop + output snoop_match_o // If high, the Snoopers matched the snoop_address +); + + // The snooper match if enabled and the two addresses are equal + assign snoop_match_o = en_i && ( base_nline_i == snoop_addr_i ); + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv new file mode 100644 index 00000000..fa1cfa4f --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv @@ -0,0 +1,265 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Linear Hardware Memory Prefetcher wrapper. + * History : + */ +module hwpf_stride_wrapper +import hwpf_stride_pkg::*; +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter NUM_HW_PREFETCH = 4, + parameter NUM_SNOOP_PORTS = 1 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // CSR + // {{{ + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_base_set_i, + input hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_i, + output hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_o, + + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_param_set_i, + input hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_i, + output hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_o, + + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_set_i, + input hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_i, + output hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_o, + + output hwpf_stride_status_t hwpf_stride_status_o, + // }}} + + // Snooping + // {{{ + input logic [NUM_SNOOP_PORTS-1:0] snoop_valid_i, + input logic [NUM_SNOOP_PORTS-1:0] snoop_abort_i, + input hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_i, + input hpdcache_tag_t [NUM_SNOOP_PORTS-1:0] snoop_addr_tag_i, + input logic [NUM_SNOOP_PORTS-1:0] snoop_phys_indexed_i, + // }}} + + // Dcache interface + // {{{ + input hpdcache_req_sid_t hpdcache_req_sid_i, + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + output logic hpdcache_req_abort_o, + output hpdcache_tag_t hpdcache_req_tag_o, + output hpdcache_pma_t hpdcache_req_pma_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i + // }}} +); +// }}} + + // Internal registers + // {{{ + logic [NUM_SNOOP_PORTS-1:0] snoop_valid_q; + hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_q; + // }}} + + // Internal signals + // {{{ + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_enable; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_free; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_status_busy; + logic [3:0] hwpf_stride_status_free_idx; + + hpdcache_nline_t [NUM_HW_PREFETCH-1:0] hwpf_snoop_nline; + logic [NUM_HW_PREFETCH-1:0] hwpf_snoop_match; + + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req; + + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_valid; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_ready; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp_valid; + hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin + max_hwpf_stride_assert: assert (NUM_HW_PREFETCH <= 16) else + $error("hwpf_stride: maximum number of HW prefetchers is 16"); + end + // pragma translate_on + // }}} + + // Compute the status information + // {{{ + always_comb begin: hwpf_stride_priority_encoder + hwpf_stride_status_free_idx = '0; + for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin + if (hwpf_stride_free[i]) begin + hwpf_stride_status_free_idx = i; + break; + end + end + end + + // Free flag of engines + assign hwpf_stride_free = ~(hwpf_stride_enable | hwpf_stride_status_busy); + // Busy flags + assign hwpf_stride_status_o[63:32] = {{32-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_status_busy}; + // Global free flag + assign hwpf_stride_status_o[31] = |hwpf_stride_free; + // Free Index + assign hwpf_stride_status_o[30:16] = {11'b0, hwpf_stride_status_free_idx}; + // Enable flags + assign hwpf_stride_status_o[15:0] = {{16-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_enable}; + // }}} + + // Hardware prefetcher engines + // {{{ + generate + for (genvar j = 0; j < NUM_SNOOP_PORTS; j++) begin + always_ff @(posedge clk_i or negedge rst_ni) + begin : snoop_ff + if (!rst_ni) begin + snoop_valid_q[j] <= 1'b0; + snoop_addr_offset_q[j] <= '0; + end else begin + if (snoop_phys_indexed_i[j]) begin + snoop_valid_q[j] <= snoop_valid_i[j]; + snoop_addr_offset_q[j] <= snoop_addr_offset_i[j]; + end + end + end + end + + for (genvar i = 0; i < NUM_HW_PREFETCH; i++) begin + assign hwpf_stride_enable[i] = hwpf_stride_base_o[i].enable; + + // Compute snoop match signals + // {{{ + always_comb + begin : snoop_comb + hwpf_snoop_match[i] = 1'b0; + for (int j = 0; j < NUM_SNOOP_PORTS; j++) begin + automatic logic snoop_valid; + automatic hpdcache_req_offset_t snoop_offset; + automatic hpdcache_nline_t snoop_nline; + + if (snoop_phys_indexed_i[j]) begin + snoop_valid = snoop_valid_i[j]; + snoop_offset = snoop_addr_offset_i[j]; + end else begin + snoop_valid = snoop_valid_q[j]; + snoop_offset = snoop_addr_offset_q[j]; + end + snoop_nline = {snoop_addr_tag_i[j], snoop_offset}; + hwpf_snoop_match[i] |= (snoop_valid && !snoop_abort_i[j] && + (hwpf_snoop_nline[i] == snoop_nline)); + end + end + // }}} + + hwpf_stride #( + .CACHE_LINE_BYTES ( HPDCACHE_CL_WIDTH/8 ) + ) hwpf_stride_i( + .clk_i, + .rst_ni, + + .csr_base_set_i ( hwpf_stride_base_set_i[i] ), + .csr_base_i ( hwpf_stride_base_i[i] ), + .csr_param_set_i ( hwpf_stride_param_set_i[i] ), + .csr_param_i ( hwpf_stride_param_i[i] ), + .csr_throttle_set_i ( hwpf_stride_throttle_set_i[i] ), + .csr_throttle_i ( hwpf_stride_throttle_i[i] ), + + .csr_base_o ( hwpf_stride_base_o[i] ), + .csr_param_o ( hwpf_stride_param_o[i] ), + .csr_throttle_o ( hwpf_stride_throttle_o[i] ), + + .busy_o ( hwpf_stride_status_busy[i] ), + + .snoop_nline_o ( hwpf_snoop_nline[i] ), + .snoop_match_i ( hwpf_snoop_match[i] ), + + .hpdcache_req_valid_o ( hwpf_stride_req_valid[i] ), + .hpdcache_req_ready_i ( hwpf_stride_req_ready[i] ), + .hpdcache_req_o ( hwpf_stride_req[i] ), + .hpdcache_rsp_valid_i ( hwpf_stride_arb_in_rsp_valid[i] ), + .hpdcache_rsp_i ( hwpf_stride_arb_in_rsp[i] ) + ); + + assign hwpf_stride_req_ready[i] = hwpf_stride_arb_in_req_ready[i], + hwpf_stride_arb_in_req_valid[i] = hwpf_stride_req_valid[i], + hwpf_stride_arb_in_req[i].addr_offset = hwpf_stride_req[i].addr_offset, + hwpf_stride_arb_in_req[i].wdata = hwpf_stride_req[i].wdata, + hwpf_stride_arb_in_req[i].op = hwpf_stride_req[i].op, + hwpf_stride_arb_in_req[i].be = hwpf_stride_req[i].be, + hwpf_stride_arb_in_req[i].size = hwpf_stride_req[i].size, + hwpf_stride_arb_in_req[i].sid = hpdcache_req_sid_i, + hwpf_stride_arb_in_req[i].tid = hpdcache_req_tid_t'(i), + hwpf_stride_arb_in_req[i].need_rsp = hwpf_stride_req[i].need_rsp, + hwpf_stride_arb_in_req[i].phys_indexed = hwpf_stride_req[i].phys_indexed, + hwpf_stride_arb_in_req[i].addr_tag = '0, + hwpf_stride_arb_in_req[i].pma = '0; + end + endgenerate + // }}} + + // Hardware prefetcher arbiter betweem engines + // {{{ + hwpf_stride_arb #( + .NUM_HW_PREFETCH ( NUM_HW_PREFETCH ) + ) hwpf_stride_arb_i ( + .clk_i, + .rst_ni, + + // DCache input interface + .hwpf_stride_req_valid_i ( hwpf_stride_arb_in_req_valid ), + .hwpf_stride_req_ready_o ( hwpf_stride_arb_in_req_ready ), + .hwpf_stride_req_i ( hwpf_stride_arb_in_req ), + .hwpf_stride_rsp_valid_o ( hwpf_stride_arb_in_rsp_valid ), + .hwpf_stride_rsp_o ( hwpf_stride_arb_in_rsp ), + + // DCache output interface + .hpdcache_req_valid_o, + .hpdcache_req_ready_i, + .hpdcache_req_o, + .hpdcache_rsp_valid_i, + .hpdcache_rsp_i + ); + + assign hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests + hpdcache_req_tag_o = '0, // unused on physically indexed requests + hpdcache_req_pma_o = '0; // unused on physically indexed requests + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv new file mode 100644 index 00000000..cb32acf5 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv @@ -0,0 +1,103 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache Memory Read Request Channel Arbiter + * History : + */ +module hpdcache_mem_req_read_arbiter +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter hpdcache_uint N = 0, + parameter type hpdcache_mem_req_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_req_read_ready_o [N-1:0], + input logic mem_req_read_valid_i [N-1:0], + input hpdcache_mem_req_t mem_req_read_i [N-1:0], + + input logic mem_req_read_ready_i, + output logic mem_req_read_valid_o, + output hpdcache_mem_req_t mem_req_read_o +); +// }}} + + logic [N-1:0] mem_read_arb_req_valid; + hpdcache_mem_req_t [N-1:0] mem_read_arb_req; + logic [N-1:0] mem_read_arb_req_gnt; + + logic req_valid; + + genvar gen_i; + + + // Pack inputs + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen + assign mem_read_arb_req_valid[gen_i] = mem_req_read_valid_i[gen_i], + mem_read_arb_req [gen_i] = mem_req_read_i[gen_i]; + end + endgenerate + + assign req_valid = |(mem_read_arb_req_gnt & mem_read_arb_req_valid); + + // Fixed-priority arbiter + hpdcache_fxarb #( + .N (N) + ) hpdcache_fxarb_mem_req_write_i ( + .clk_i, + .rst_ni, + .req_i (mem_read_arb_req_valid), + .gnt_o (mem_read_arb_req_gnt), + .ready_i (mem_req_read_ready_i) + ); + + // Demultiplexor for the ready signal + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen + assign mem_req_read_ready_o[gen_i] = mem_req_read_ready_i & + mem_read_arb_req_gnt[gen_i] & mem_read_arb_req_valid[gen_i]; + end + endgenerate + + assign mem_req_read_valid_o = req_valid; + + // Multiplexor for requests + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_t)), + .ONE_HOT_SEL (1'b1) + ) mem_read_req_mux_i ( + .data_i (mem_read_arb_req), + .sel_i (mem_read_arb_req_gnt), + .data_o (mem_req_read_o) + ); + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv new file mode 100644 index 00000000..a7916eca --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv @@ -0,0 +1,193 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache Memory Write Channels Arbiter + * History : + */ +module hpdcache_mem_req_write_arbiter +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter hpdcache_uint N = 0, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_req_write_ready_o [N-1:0], + input logic mem_req_write_valid_i [N-1:0], + input hpdcache_mem_req_t mem_req_write_i [N-1:0], + + output logic mem_req_write_data_ready_o [N-1:0], + input logic mem_req_write_data_valid_i [N-1:0], + input hpdcache_mem_req_w_t mem_req_write_data_i [N-1:0], + + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o +); +// }}} + + typedef enum { + REQ_IDLE, + REQ_META_SENT, + REQ_DATA_SENT + } req_send_fsm_t; + + req_send_fsm_t req_send_fsm_q, req_send_fsm_d; + logic req_valid; + logic req_data_valid; + + logic [N-1:0] mem_write_arb_req_valid; + hpdcache_mem_req_t [N-1:0] mem_write_arb_req; + logic [N-1:0] mem_write_arb_req_data_valid; + hpdcache_mem_req_w_t [N-1:0] mem_write_arb_req_data; + logic [N-1:0] mem_write_arb_req_gnt; + logic mem_write_arb_req_ready; + + genvar gen_i; + + + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen + assign mem_write_arb_req_valid [gen_i] = mem_req_write_valid_i[gen_i], + mem_write_arb_req [gen_i] = mem_req_write_i[gen_i], + mem_write_arb_req_data_valid[gen_i] = mem_req_write_data_valid_i[gen_i], + mem_write_arb_req_data [gen_i] = mem_req_write_data_i[gen_i]; + end + endgenerate + + // Fixed-priority arbiter + hpdcache_fxarb #( + .N (2) + ) hpdcache_fxarb_mem_req_write_i ( + .clk_i, + .rst_ni, + .req_i (mem_write_arb_req_valid), + .gnt_o (mem_write_arb_req_gnt), + .ready_i (mem_write_arb_req_ready) + ); + + assign req_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_valid); + assign req_data_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_data_valid); + + // Request sent FSM + // + // This FSM allows to make sure that the request and its corresponding + // data are sent in order. This is, when a requester sends a request, this + // FSM keeps the grant signal on this requester until it has sent the + // corresponding data. + // + // {{{ + always_comb + begin : req_send_fsm_comb + req_send_fsm_d = req_send_fsm_q; + mem_write_arb_req_ready = 1'b0; + case (req_send_fsm_q) + REQ_IDLE: + if (req_valid && mem_req_write_ready_i) begin + if (req_data_valid) begin + if (mem_req_write_data_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end else begin + req_send_fsm_d = REQ_META_SENT; + end + end + end else if (req_data_valid && mem_req_write_data_ready_i) begin + req_send_fsm_d = REQ_DATA_SENT; + end + + REQ_META_SENT: + if (req_data_valid && mem_req_write_data_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end + + REQ_DATA_SENT: + if (req_valid && mem_req_write_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : req_send_fsm_ff + if (!rst_ni) begin + req_send_fsm_q <= REQ_IDLE; + end else begin + req_send_fsm_q <= req_send_fsm_d; + end + end + // }}} + + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen + assign mem_req_write_ready_o[gen_i] = + (mem_write_arb_req_gnt[gen_i] & mem_req_write_ready_i) & + (req_send_fsm_q != REQ_META_SENT); + + assign mem_req_write_data_ready_o[gen_i] = + (mem_write_arb_req_gnt[gen_i] & mem_req_write_data_ready_i) & + (req_send_fsm_q != REQ_DATA_SENT); + end + endgenerate + + // Output assignments + // {{{ + assign mem_req_write_valid_o = req_valid & (req_send_fsm_q != REQ_META_SENT); + assign mem_req_write_data_valid_o = req_data_valid & (req_send_fsm_q != REQ_DATA_SENT); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_t)), + .ONE_HOT_SEL (1'b1) + ) mem_write_req_mux_i ( + .data_i (mem_write_arb_req), + .sel_i (mem_write_arb_req_gnt), + .data_o (mem_req_write_o) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_w_t)), + .ONE_HOT_SEL (1'b1) + ) mem_write_data_req_mux_i ( + .data_i (mem_write_arb_req_data), + .sel_i (mem_write_arb_req_gnt), + .data_o (mem_req_write_data_o) + ); + // }}} + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv new file mode 100644 index 00000000..c1502a98 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv @@ -0,0 +1,108 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : June, 2022 + * Description : Dcache Memory Reponse Demultiplexer + * History : + */ +module hpdcache_mem_resp_demux +// Parameters +// {{{ +#( + parameter int N = 0, + parameter type resp_t = logic, + parameter type resp_id_t = logic, + + localparam int RT_DEPTH = (1 << $bits(resp_id_t)), + localparam type rt_t = resp_id_t [RT_DEPTH-1:0] +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_resp_ready_o, + input logic mem_resp_valid_i, + input resp_id_t mem_resp_id_i, + input resp_t mem_resp_i, + + input logic mem_resp_ready_i [N-1:0], + output logic mem_resp_valid_o [N-1:0], + output resp_t mem_resp_o [N-1:0], + + input rt_t mem_resp_rt_i +); +// }}} + + typedef logic [$clog2(N)-1:0] sel_t; + + logic [N-1:0] mem_resp_demux_valid; + resp_t [N-1:0] mem_resp_demux; + logic [N-1:0] mem_resp_demux_ready; + sel_t mem_resp_demux_sel; + + // Route the response according to the response ID and the routing table + assign mem_resp_demux_sel = mem_resp_rt_i[int'(mem_resp_id_i)]; + + // Forward the response to the corresponding output port + hpdcache_demux #( + .NOUTPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (0) + ) i_resp_valid_demux ( + .data_i (mem_resp_valid_i), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_demux_valid) + ); + + hpdcache_demux #( + .NOUTPUT (N), + .DATA_WIDTH ($bits(resp_t)), + .ONE_HOT_SEL (0) + ) i_resp_demux ( + .data_i (mem_resp_i), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_demux) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (0) + ) i_resp_ready_mux ( + .data_i (mem_resp_demux_ready), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_ready_o) + ); + + // Pack/unpack responses + generate + for (genvar gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_unpack_resp_gen + assign mem_resp_valid_o [gen_i] = mem_resp_demux_valid [gen_i]; + assign mem_resp_o [gen_i] = mem_resp_demux [gen_i]; + assign mem_resp_demux_ready [gen_i] = mem_resp_ready_i [gen_i]; + end + endgenerate + +endmodule : hpdcache_mem_resp_demux diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv new file mode 100644 index 00000000..ec3fad74 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv @@ -0,0 +1,95 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache memory request to axi read channels + * History : + */ +module hpdcache_mem_to_axi_read +import hpdcache_pkg::*; +#( + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic +) +( + output logic req_ready_o, + input logic req_valid_i, + input hpdcache_mem_req_t req_i, + + input logic resp_ready_i, + output logic resp_valid_o, + output hpdcache_mem_resp_r_t resp_o, + + output logic axi_ar_valid_o, + output ar_chan_t axi_ar_o, + input logic axi_ar_ready_i, + + input logic axi_r_valid_i, + input r_chan_t axi_r_i, + output logic axi_r_ready_o +); + + logic lock; + axi_pkg::cache_t cache; + hpdcache_mem_error_e resp; + + assign lock = (req_i.mem_req_command == HPDCACHE_MEM_ATOMIC) && + (req_i.mem_req_atomic == HPDCACHE_MEM_ATOMIC_LDEX); + + assign cache = req_i.mem_req_cacheable ? + axi_pkg::CACHE_BUFFERABLE | + axi_pkg::CACHE_MODIFIABLE | + axi_pkg::CACHE_RD_ALLOC | + axi_pkg::CACHE_WR_ALLOC : '0; + + always_comb + begin : resp_decode_comb + case (axi_r_i.resp) + axi_pkg::RESP_SLVERR, + axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK; + default: resp = HPDCACHE_MEM_RESP_OK; + endcase + end + + assign req_ready_o = axi_ar_ready_i, + axi_ar_valid_o = req_valid_i, + axi_ar_o.id = req_i.mem_req_id, + axi_ar_o.addr = req_i.mem_req_addr, + axi_ar_o.len = req_i.mem_req_len, + axi_ar_o.size = req_i.mem_req_size, + axi_ar_o.burst = axi_pkg::BURST_INCR, + axi_ar_o.lock = lock, + axi_ar_o.cache = cache, + axi_ar_o.prot = '0, + axi_ar_o.qos = '0, + axi_ar_o.region = '0, + axi_ar_o.user = '0; + + assign axi_r_ready_o = resp_ready_i, + resp_valid_o = axi_r_valid_i, + resp_o.mem_resp_r_error = resp, + resp_o.mem_resp_r_id = axi_r_i.id, + resp_o.mem_resp_r_data = axi_r_i.data, + resp_o.mem_resp_r_last = axi_r_i.last; + +endmodule diff --git a/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv new file mode 100644 index 00000000..8d8eb9f6 --- /dev/null +++ b/test/type_param/core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv @@ -0,0 +1,148 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache memory request to axi write channels + * History : + */ +module hpdcache_mem_to_axi_write +import hpdcache_pkg::*; +#( + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic +) +( + output logic req_ready_o, + input logic req_valid_i, + input hpdcache_mem_req_t req_i, + + output logic req_data_ready_o, + input logic req_data_valid_i, + input hpdcache_mem_req_w_t req_data_i, + + input logic resp_ready_i, + output logic resp_valid_o, + output hpdcache_mem_resp_w_t resp_o, + + output logic axi_aw_valid_o, + output aw_chan_t axi_aw_o, + input logic axi_aw_ready_i, + + output logic axi_w_valid_o, + output w_chan_t axi_w_o, + input logic axi_w_ready_i, + + input logic axi_b_valid_i, + input b_chan_t axi_b_i, + output logic axi_b_ready_o +); + + logic lock; + axi_pkg::atop_t atop; + axi_pkg::cache_t cache; + hpdcache_mem_error_e resp; + + always_comb + begin : atop_comb + lock = 1'b0; + atop = '0; + case (req_i.mem_req_command) + HPDCACHE_MEM_ATOMIC: begin + case (req_i.mem_req_atomic) + HPDCACHE_MEM_ATOMIC_STEX: lock = 1'b1; + HPDCACHE_MEM_ATOMIC_ADD : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_ADD}; + HPDCACHE_MEM_ATOMIC_CLR : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_CLR}; + HPDCACHE_MEM_ATOMIC_SET : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SET}; + HPDCACHE_MEM_ATOMIC_EOR : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_EOR}; + HPDCACHE_MEM_ATOMIC_SMAX: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SMAX}; + HPDCACHE_MEM_ATOMIC_SMIN: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SMIN}; + HPDCACHE_MEM_ATOMIC_UMAX: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_UMAX}; + HPDCACHE_MEM_ATOMIC_UMIN: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_UMIN}; + HPDCACHE_MEM_ATOMIC_SWAP: atop = axi_pkg::ATOP_ATOMICSWAP; + endcase + end + endcase + end + + assign cache = (req_i.mem_req_cacheable && !lock) ? + axi_pkg::CACHE_BUFFERABLE | + axi_pkg::CACHE_MODIFIABLE | + axi_pkg::CACHE_RD_ALLOC | + axi_pkg::CACHE_WR_ALLOC : '0; + + always_comb + begin : resp_decode_comb + case (axi_b_i.resp) + axi_pkg::RESP_SLVERR, + axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK; + default: resp = HPDCACHE_MEM_RESP_OK; + endcase + end + + assign req_ready_o = axi_aw_ready_i, + axi_aw_valid_o = req_valid_i, + axi_aw_o.id = req_i.mem_req_id, + axi_aw_o.addr = req_i.mem_req_addr, + axi_aw_o.len = req_i.mem_req_len, + axi_aw_o.size = req_i.mem_req_size, + axi_aw_o.burst = axi_pkg::BURST_INCR, + axi_aw_o.lock = lock, + axi_aw_o.cache = cache, + axi_aw_o.prot = '0, + axi_aw_o.qos = '0, + axi_aw_o.region = '0, + axi_aw_o.atop = atop, + axi_aw_o.user = '0; + + assign req_data_ready_o = axi_w_ready_i, + axi_w_valid_o = req_data_valid_i, + axi_w_o.data = req_data_i.mem_req_w_data, + axi_w_o.strb = req_data_i.mem_req_w_be, + axi_w_o.last = req_data_i.mem_req_w_last, + axi_w_o.user = '0; + + assign axi_b_ready_o = resp_ready_i, + resp_valid_o = axi_b_valid_i, + resp_o.mem_resp_w_error = resp, + resp_o.mem_resp_w_id = axi_b_i.id, + resp_o.mem_resp_w_is_atomic = (axi_b_i.resp == axi_pkg::RESP_EXOKAY); + +endmodule diff --git a/test/type_param/core/cache_subsystem/miss_handler.sv b/test/type_param/core/cache_subsystem/miss_handler.sv new file mode 100644 index 00000000..4755d0b2 --- /dev/null +++ b/test/type_param/core/cache_subsystem/miss_handler.sv @@ -0,0 +1,826 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 12.11.2017 +// Description: Handles cache misses. + +// -------------- +// MISS Handler +// -------------- + +module miss_handler + import ariane_pkg::*; + import std_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NR_PORTS = 4, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, // flush request + output logic flush_ack_o, // acknowledge successful flush + output logic miss_o, + input logic busy_i, // dcache is busy with something + // Bypass or miss + input logic [NR_PORTS-1:0][$bits(miss_req_t)-1:0] miss_req_i, + // Bypass handling + output logic [NR_PORTS-1:0] bypass_gnt_o, + output logic [NR_PORTS-1:0] bypass_valid_o, + output logic [NR_PORTS-1:0][63:0] bypass_data_o, + + // AXI port + output axi_req_t axi_bypass_o, + input axi_rsp_t axi_bypass_i, + + // Miss handling (~> cacheline refill) + output logic [NR_PORTS-1:0] miss_gnt_o, + output logic [NR_PORTS-1:0] active_serving_o, + + output logic [63:0] critical_word_o, + output logic critical_word_valid_o, + output axi_req_t axi_data_o, + input axi_rsp_t axi_data_i, + + input logic [NR_PORTS-1:0][55:0] mshr_addr_i, + output logic [NR_PORTS-1:0] mshr_addr_matches_o, + output logic [NR_PORTS-1:0] mshr_index_matches_o, + // AMO + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // Port to SRAMs, for refill and eviction + output logic [DCACHE_SET_ASSOC-1:0] req_o, + output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array + output cache_line_t data_o, + output cl_be_t be_o, + input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, + output logic we_o +); + + // Three MSHR ports + AMO port + parameter NR_BYPASS_PORTS = NR_PORTS + 1; + + // FSM states + enum logic [3:0] { + IDLE, // 0 + FLUSHING, // 1 + FLUSH, // 2 + WB_CACHELINE_FLUSH, // 3 + FLUSH_REQ_STATUS, // 4 + WB_CACHELINE_MISS, // 5 + WAIT_GNT_SRAM, // 6 + MISS, // 7 + REQ_CACHELINE, // 8 + MISS_REPL, // 9 + SAVE_CACHELINE, // A + INIT, // B + AMO_REQ, // C + AMO_WAIT_RESP // D + } + state_d, state_q; + + // Registers + mshr_t mshr_d, mshr_q; + logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; + logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q; + // cache line to evict + cache_line_t evict_cl_d, evict_cl_q; + + logic serve_amo_d, serve_amo_q; + // Request from one FSM + logic [ NR_PORTS-1:0] miss_req_valid; + logic [ NR_PORTS-1:0] miss_req_bypass; + logic [ NR_PORTS-1:0][63:0] miss_req_addr; + logic [ NR_PORTS-1:0][63:0] miss_req_wdata; + logic [ NR_PORTS-1:0] miss_req_we; + logic [ NR_PORTS-1:0][ 7:0] miss_req_be; + logic [ NR_PORTS-1:0][ 1:0] miss_req_size; + + // Bypass AMO port + bypass_req_t amo_bypass_req; + bypass_rsp_t amo_bypass_rsp; + + // Bypass ports <-> Arbiter + bypass_req_t [ NR_BYPASS_PORTS-1:0] bypass_ports_req; + bypass_rsp_t [ NR_BYPASS_PORTS-1:0] bypass_ports_rsp; + + // Arbiter <-> Bypass AXI adapter + bypass_req_t bypass_adapter_req; + bypass_rsp_t bypass_adapter_rsp; + + // Cache Line Refill <-> AXI + logic req_fsm_miss_valid; + logic [ 63:0] req_fsm_miss_addr; + logic [ DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata; + logic req_fsm_miss_we; + logic [ (DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be; + ariane_pkg::ad_req_t req_fsm_miss_req; + logic [ 1:0] req_fsm_miss_size; + + logic gnt_miss_fsm; + logic valid_miss_fsm; + logic [ (DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm; + + // Cache Management <-> LFSR + logic lfsr_enable; + logic [ DCACHE_SET_ASSOC-1:0] lfsr_oh; + logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin; + // AMOs + ariane_pkg::amo_t amo_op; + logic [ 63:0] amo_operand_b; + + // ------------------------------ + // Cache Management + // ------------------------------ + always_comb begin : cache_management + automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way; + + for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin + evict_way[i] = data_i[i].valid & data_i[i].dirty; + valid_way[i] = data_i[i].valid; + end + // ---------------------- + // Default Assignments + // ---------------------- + // memory array + req_o = '0; + addr_o = '0; + data_o = '0; + be_o = '0; + we_o = '0; + // Cache controller + miss_gnt_o = '0; + active_serving_o = '0; + // LFSR replacement unit + lfsr_enable = 1'b0; + // to AXI refill + req_fsm_miss_valid = 1'b0; + req_fsm_miss_addr = '0; + req_fsm_miss_wdata = '0; + req_fsm_miss_we = 1'b0; + req_fsm_miss_be = '0; + req_fsm_miss_req = ariane_pkg::CACHE_LINE_REQ; + req_fsm_miss_size = 2'b11; + // to AXI bypass + amo_bypass_req.req = 1'b0; + amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ; + amo_bypass_req.amo = ariane_pkg::AMO_NONE; + amo_bypass_req.addr = '0; + amo_bypass_req.we = 1'b0; + amo_bypass_req.wdata = '0; + amo_bypass_req.be = '0; + amo_bypass_req.size = 2'b11; + amo_bypass_req.id = 4'b1011; + // core + flush_ack_o = 1'b0; + miss_o = 1'b0; // to performance counter + serve_amo_d = serve_amo_q; + // -------------------------------- + // Flush and Miss operation + // -------------------------------- + state_d = state_q; + cnt_d = cnt_q; + evict_way_d = evict_way_q; + evict_cl_d = evict_cl_q; + mshr_d = mshr_q; + // communicate to the requester which unit we are currently serving + active_serving_o[mshr_q.id] = mshr_q.valid; + // AMOs + amo_resp_o.ack = 1'b0; + amo_resp_o.result = '0; + amo_operand_b = '0; + + case (state_q) + + IDLE: begin + // lowest priority are AMOs, wait until everything else is served before going for the AMOs + if (amo_req_i.req && !busy_i) begin + // 1. Flush the cache + state_d = FLUSH_REQ_STATUS; + serve_amo_d = 1'b1; + cnt_d = '0; + end + // check if we want to flush and can flush e.g.: we are not busy anymore + // TODO: Check that the busy flag is indeed needed + if (flush_i && !busy_i) begin + state_d = FLUSH_REQ_STATUS; + cnt_d = '0; + end + + // check if one of the state machines missed + for (int unsigned i = 0; i < NR_PORTS; i++) begin + // here comes the refill portion of code + if (miss_req_valid[i] && !miss_req_bypass[i]) begin + state_d = MISS; + // we are taking another request so don't take the AMO + serve_amo_d = 1'b0; + // save to MSHR + mshr_d.valid = 1'b1; + mshr_d.we = miss_req_we[i]; + mshr_d.id = i; + mshr_d.addr = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0]; + mshr_d.wdata = miss_req_wdata[i]; + mshr_d.be = miss_req_be[i]; + break; + end + end + end + + // ~> we missed on the cache + MISS: begin + // 1. Check if there is an empty cache-line + // 2. If not -> evict one + req_o = '1; + addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + state_d = MISS_REPL; + miss_o = 1'b1; + end + + // ~> second miss cycle + MISS_REPL: begin + // if all are valid we need to evict one, pseudo random from LFSR + if (&valid_way) begin + lfsr_enable = 1'b1; + evict_way_d = lfsr_oh; + // do we need to write back the cache line? + if (data_i[lfsr_bin].dirty) begin + state_d = WB_CACHELINE_MISS; + evict_cl_d.tag = data_i[lfsr_bin].tag; + evict_cl_d.data = data_i[lfsr_bin].data; + cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + // no - we can request a cache line now + end else state_d = REQ_CACHELINE; + // we have at least one free way + end else begin + // get victim cache-line by looking for the first non-valid bit + evict_way_d = get_victim_cl(~valid_way); + state_d = REQ_CACHELINE; + end + end + + // ~> we can just load the cache-line, the way is store in evict_way_q + REQ_CACHELINE: begin + req_fsm_miss_valid = 1'b1; + req_fsm_miss_addr = mshr_q.addr; + + if (gnt_miss_fsm) begin + state_d = SAVE_CACHELINE; + miss_gnt_o[mshr_q.id] = 1'b1; + end + end + + // ~> replace the cacheline + SAVE_CACHELINE: begin + // calculate cacheline offset + automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; + cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6; + // we've got a valid response from refill unit + if (valid_miss_fsm) begin + + addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + req_o = evict_way_q; + we_o = 1'b1; + be_o = '1; + be_o.vldrty = evict_way_q; + data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; + data_o.data = data_miss_fsm; + data_o.valid = 1'b1; + data_o.dirty = 1'b0; + + // is this a write? + if (mshr_q.we) begin + // Yes, so safe the updated data now + for (int i = 0; i < 8; i++) begin + // check if we really want to write the corresponding byte + if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i]; + end + // its immediately dirty if we write + data_o.dirty = 1'b1; + end + // reset MSHR + mshr_d.valid = 1'b0; + // go back to idle + state_d = IDLE; + end + end + + // ------------------------------ + // Write Back Operation + // ------------------------------ + // ~> evict a cache line from way saved in evict_way_q + WB_CACHELINE_FLUSH, WB_CACHELINE_MISS: begin + + req_fsm_miss_valid = 1'b1; + req_fsm_miss_addr = { + evict_cl_q.tag, + cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], + {{DCACHE_BYTE_OFFSET} {1'b0}} + }; + req_fsm_miss_be = '1; + req_fsm_miss_we = 1'b1; + req_fsm_miss_wdata = evict_cl_q.data; + + // we've got a grant --> this is timing critical, think about it + if (gnt_miss_fsm) begin + // write status array + addr_o = cnt_q; + req_o = 1'b1; + we_o = 1'b1; + data_o.valid = INVALIDATE_ON_FLUSH ? 1'b0 : 1'b1; + // invalidate + be_o.vldrty = evict_way_q; + // go back to handling the miss or flushing, depending on where we came from + state_d = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS; + end + end + + // ------------------------------ + // Flushing & Initialization + // ------------------------------ + // ~> make another request to check the same cache-line if there are still some valid entries + FLUSH_REQ_STATUS: begin + req_o = '1; + addr_o = cnt_q; + state_d = FLUSHING; + end + + FLUSHING: begin + // this has priority + // at least one of the cache lines is dirty + if (|evict_way) begin + // evict cache line, look for the first cache-line which is dirty + evict_way_d = get_victim_cl(evict_way); + evict_cl_d = data_i[one_hot_to_bin(evict_way)]; + state_d = WB_CACHELINE_FLUSH; + // not dirty ~> increment and continue + end else begin + // increment and re-request + cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); + state_d = FLUSH_REQ_STATUS; + addr_o = cnt_q; + req_o = 1'b1; + be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0; + we_o = 1'b1; + // finished with flushing operation, go back to idle + if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) begin + // only acknowledge if the flush wasn't triggered by an atomic + flush_ack_o = ~serve_amo_q; + // if we are flushing because of an AMO go to serve it + if (serve_amo_q) begin + state_d = AMO_REQ; + serve_amo_d = 1'b0; + end else begin + state_d = IDLE; + end + end + end + end + + // ~> only called after reset + INIT: begin + // initialize status array + addr_o = cnt_q; + req_o = 1'b1; + we_o = 1'b1; + // only write the dirty array + be_o.vldrty = '1; + cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); + // finished initialization + if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) state_d = IDLE; + end + // ---------------------- + // AMOs + // ---------------------- + // ~> we are here because we need to do the AMO, the cache is clean at this point + AMO_REQ: begin + amo_bypass_req.req = 1'b1; + amo_bypass_req.reqtype = ariane_pkg::SINGLE_REQ; + amo_bypass_req.amo = amo_req_i.amo_op; + // address is in operand a + amo_bypass_req.addr = amo_req_i.operand_a; + if (amo_req_i.amo_op != AMO_LR) begin + amo_bypass_req.we = 1'b1; + end + amo_bypass_req.size = amo_req_i.size; + // AXI implements CLR op instead of AND, negate operand + if (amo_req_i.amo_op == AMO_AND) begin + amo_operand_b = ~amo_req_i.operand_b; + end else begin + amo_operand_b = amo_req_i.operand_b; + end + // align data and byte-enable to correct byte lanes + amo_bypass_req.wdata = amo_operand_b; + if (amo_req_i.size == 2'b11) begin + // 64b transfer + amo_bypass_req.be = 8'b11111111; + end else begin + // 32b transfer + if (amo_req_i.operand_a[2:0] == '0) begin + // 64b aligned -> activate lower 4 byte lanes + amo_bypass_req.be = 8'b00001111; + end else begin + // 64b unaligned -> activate upper 4 byte lanes + amo_bypass_req.be = 8'b11110000; + amo_bypass_req.wdata = amo_operand_b[31:0] << 32; + end + end + + // when request is accepted, wait for response + if (amo_bypass_rsp.gnt) begin + if (amo_bypass_rsp.valid) begin + state_d = IDLE; + amo_resp_o.ack = 1'b1; + amo_resp_o.result = amo_bypass_rsp.rdata; + end else begin + state_d = AMO_WAIT_RESP; + end + end + end + AMO_WAIT_RESP: begin + if (amo_bypass_rsp.valid) begin + state_d = IDLE; + amo_resp_o.ack = 1'b1; + // Request is assumed to be still valid (ack not granted yet) + if (amo_req_i.size == 2'b10) begin + // 32b request + logic [31:0] halfword; + if (amo_req_i.operand_a[2:0] == '0) begin + // 64b aligned -> activate lower 4 byte lanes + halfword = amo_bypass_rsp.rdata[31:0]; + end else begin + // 64b unaligned -> activate upper 4 byte lanes + halfword = amo_bypass_rsp.rdata[63:32]; + end + // Sign-extend 32b requests as per RISC-V spec + amo_resp_o.result = {{32{halfword[31]}}, halfword}; + end else begin + // 64b request + amo_resp_o.result = amo_bypass_rsp.rdata; + end + end + end + endcase + end + + // check MSHR for aliasing + always_comb begin + + mshr_addr_matches_o = 'b0; + mshr_index_matches_o = 'b0; + + for (int i = 0; i < NR_PORTS; i++) begin + // check mshr for potential matching of other units, exclude the unit currently being served + if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin + mshr_addr_matches_o[i] = 1'b1; + end + + // same as previous, but checking only the index + if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin + mshr_index_matches_o[i] = 1'b1; + end + end + end + // -------------------- + // Sequential Process + // -------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mshr_q <= '0; + state_q <= INIT; + cnt_q <= '0; + evict_way_q <= '0; + evict_cl_q <= '0; + serve_amo_q <= 1'b0; + end else begin + mshr_q <= mshr_d; + state_q <= state_d; + cnt_q <= cnt_d; + evict_way_q <= evict_way_d; + evict_cl_q <= evict_cl_d; + serve_amo_q <= serve_amo_d; + end + end + + //pragma translate_off +`ifndef VERILATOR + // assert that cache only hits on one way + assert property (@(posedge clk_i) $onehot0(evict_way_q)) + else $warning("Evict-way should be one-hot encoded"); +`endif + //pragma translate_on + + // ---------------------- + // Pack bypass ports + // ---------------------- + always_comb begin + logic [$clog2(NR_BYPASS_PORTS)-1:0] id; + + // Pack MHSR ports first + for (id = 0; id < NR_PORTS; id++) begin + bypass_ports_req[id].req = miss_req_valid[id] & miss_req_bypass[id]; + bypass_ports_req[id].reqtype = ariane_pkg::SINGLE_REQ; + bypass_ports_req[id].amo = AMO_NONE; + bypass_ports_req[id].id = 4'b1000 | 4'(id); + bypass_ports_req[id].addr = miss_req_addr[id]; + bypass_ports_req[id].wdata = miss_req_wdata[id]; + bypass_ports_req[id].we = miss_req_we[id]; + bypass_ports_req[id].be = miss_req_be[id]; + bypass_ports_req[id].size = miss_req_size[id]; + + bypass_gnt_o[id] = bypass_ports_rsp[id].gnt; + bypass_valid_o[id] = bypass_ports_rsp[id].valid; + bypass_data_o[id] = bypass_ports_rsp[id].rdata; + end + + // AMO port has lowest priority + bypass_ports_req[id] = amo_bypass_req; + amo_bypass_rsp = bypass_ports_rsp[id]; + end + + // ---------------------- + // Arbitrate bypass ports + // ---------------------- + axi_adapter_arbiter #( + .NR_PORTS (NR_BYPASS_PORTS), + .MAX_OUTSTANDING_REQ(CVA6Cfg.MaxOutstandingStores), + .req_t (bypass_req_t), + .rsp_t (bypass_rsp_t) + ) i_bypass_arbiter ( + .clk_i (clk_i), + .rst_ni(rst_ni), + // Master Side + .req_i (bypass_ports_req), + .rsp_o (bypass_ports_rsp), + // Slave Side + .req_o (bypass_adapter_req), + .rsp_i (bypass_adapter_rsp) + ); + + // ---------------------- + // Bypass AXI Interface + // ---------------------- + // Cast bypass_adapter_req.addr to axi_adapter port size + logic [riscv::XLEN-1:0] bypass_addr; + assign bypass_addr = bypass_adapter_req.addr; + + axi_adapter #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (64), + .CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_rsp_t) + ) i_bypass_axi_adapter ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .req_i(bypass_adapter_req.req), + .type_i(bypass_adapter_req.reqtype), + .amo_i(bypass_adapter_req.amo), + .id_i(({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, bypass_adapter_req.id})), + .addr_i(bypass_addr), + .wdata_i(bypass_adapter_req.wdata), + .we_i(bypass_adapter_req.we), + .be_i(bypass_adapter_req.be), + .size_i(bypass_adapter_req.size), + .gnt_o(bypass_adapter_rsp.gnt), + .valid_o(bypass_adapter_rsp.valid), + .rdata_o(bypass_adapter_rsp.rdata), + .id_o(), // not used, single outstanding request in arbiter + .critical_word_o(), // not used for single requests + .critical_word_valid_o(), // not used for single requests + .axi_req_o(axi_bypass_o), + .axi_resp_i(axi_bypass_i) + ); + + // ---------------------- + // Cache Line AXI Refill + // ---------------------- + // Cast req_fsm_miss_addr to axi_adapter port size + logic [riscv::XLEN-1:0] miss_addr; + assign miss_addr = req_fsm_miss_addr; + + axi_adapter #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (DCACHE_LINE_WIDTH), + .CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_rsp_t) + ) i_miss_axi_adapter ( + .clk_i, + .rst_ni, + .req_i (req_fsm_miss_valid), + .type_i (req_fsm_miss_req), + .amo_i (AMO_NONE), + .gnt_o (gnt_miss_fsm), + .addr_i (miss_addr), + .we_i (req_fsm_miss_we), + .wdata_i (req_fsm_miss_wdata), + .be_i (req_fsm_miss_be), + .size_i (req_fsm_miss_size), + .id_i ({{CVA6Cfg.AxiIdWidth - 4{1'b0}}, 4'b0111}), + .valid_o (valid_miss_fsm), + .rdata_o (data_miss_fsm), + .id_o (), + .critical_word_o (critical_word_o), + .critical_word_valid_o(critical_word_valid_o), + .axi_req_o (axi_data_o), + .axi_resp_i (axi_data_i) + ); + + // ----------------- + // Replacement LFSR + // ----------------- + lfsr_8bit #( + .WIDTH(DCACHE_SET_ASSOC) + ) i_lfsr ( + .en_i (lfsr_enable), + .refill_way_oh (lfsr_oh), + .refill_way_bin(lfsr_bin), + .* + ); + + // ----------------- + // Struct Split + // ----------------- + // Hack as system verilog support in modelsim seems to be buggy here + always_comb begin + automatic miss_req_t miss_req; + + for (int unsigned i = 0; i < NR_PORTS; i++) begin + miss_req = miss_req_t'(miss_req_i[i]); + miss_req_valid[i] = miss_req.valid; + miss_req_bypass[i] = miss_req.bypass; + miss_req_addr[i] = miss_req.addr; + miss_req_wdata[i] = miss_req.wdata; + miss_req_we[i] = miss_req.we; + miss_req_be[i] = miss_req.be; + miss_req_size[i] = miss_req.size; + end + end +endmodule + +// -------------- +// AXI Arbiter +// -------------- +// +// Description: Arbitrates access to AXI refill/bypass +// +module axi_adapter_arbiter #( + parameter NR_PORTS = 4, + parameter MAX_OUTSTANDING_REQ = 0, + parameter type req_t = std_cache_pkg::bypass_req_t, + parameter type rsp_t = std_cache_pkg::bypass_rsp_t +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // Master ports + input req_t [NR_PORTS-1:0] req_i, + output rsp_t [NR_PORTS-1:0] rsp_o, + // Slave port + output req_t req_o, + input rsp_t rsp_i +); + + localparam MAX_OUTSTANDING_CNT_WIDTH = $clog2( + MAX_OUTSTANDING_REQ + 1 + ) > 0 ? $clog2( + MAX_OUTSTANDING_REQ + 1 + ) : 1; + + typedef logic [MAX_OUTSTANDING_CNT_WIDTH-1:0] outstanding_cnt_t; + + enum logic { + IDLE, + SERVING + } + state_d, state_q; + + req_t req_d, req_q; + logic [NR_PORTS-1:0] sel_d, sel_q; + outstanding_cnt_t outstanding_cnt_d, outstanding_cnt_q; + + logic [NR_PORTS-1:0] req_flat; + logic any_unselected_port_valid; + + for (genvar i = 0; i < NR_PORTS; i++) begin : gen_req_flat + assign req_flat[i] = req_i[i].req; + end + assign any_unselected_port_valid = |(req_flat & ~(1 << sel_q)); + + + always_comb begin + sel_d = sel_q; + outstanding_cnt_d = outstanding_cnt_q; + + state_d = state_q; + req_d = req_q; + + req_o = req_q; + + rsp_o = '0; + rsp_o[sel_q].rdata = rsp_i.rdata; + + case (state_q) + + IDLE: begin + // wait for incoming requests + for (int unsigned i = 0; i < NR_PORTS; i++) begin + if (req_i[i].req == 1'b1) begin + sel_d = i[$bits(sel_d)-1:0]; + state_d = SERVING; + break; + end + end + + req_d = req_i[sel_d]; + req_o = req_i[sel_d]; + rsp_o[sel_d].gnt = req_i[sel_d].req; + + // Count outstanding transactions, i.e. requests which have been + // granted but response hasn't arrived yet + if (req_o.req && rsp_i.gnt) begin + req_d.req = 1'b0; + outstanding_cnt_d += 1; + end + end + + SERVING: begin + // We can accept multiple outstanding transactions from same port. + // To ensure fairness, we allow this only if all other ports are idle + if ((!req_o.req) && !any_unselected_port_valid && + (outstanding_cnt_q != (MAX_OUTSTANDING_REQ - 1))) begin + if (req_i[sel_q].req) begin + req_d = req_i[sel_q]; + req_o = req_i[sel_q]; + rsp_o[sel_q].gnt = 1'b1; + state_d = SERVING; + end + end + + // Count outstanding transactions, i.e. requests which have been + // granted but response hasn't arrived yet + if (req_o.req && rsp_i.gnt) begin + req_d.req = 1'b0; + outstanding_cnt_d += 1; + end + if (rsp_i.valid) begin + outstanding_cnt_d -= 1; + rsp_o[sel_q].valid = 1'b1; + + if ((outstanding_cnt_d == 0) && (!req_o.req || rsp_i.gnt)) state_d = IDLE; + end + end + + default: /* default */; + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + sel_q <= '0; + req_q <= '0; + outstanding_cnt_q <= '0; + end else begin + state_q <= state_d; + sel_q <= sel_d; + req_q <= req_d; + outstanding_cnt_q <= outstanding_cnt_d; + end + end + // ------------ + // Assertions + // ------------ + + //pragma translate_off +`ifndef VERILATOR + // make sure that we eventually get an rvalid after we received a grant + assert property (@(posedge clk_i) rsp_i.gnt |-> ##[1:$] rsp_i.valid) + else begin + $error("There was a grant without a rvalid"); + $stop(); + end + // assert that there is no grant without a request or outstanding transactions + assert property (@(negedge clk_i) rsp_i.gnt |-> req_o.req) + else begin + $error("There was a grant without a request."); + $stop(); + end + // assert that the address does not contain X when request is sent + assert property (@(posedge clk_i) (req_o.req) |-> (!$isunknown(req_o.addr))) + else begin + $error("address contains X when request is set"); + $stop(); + end + +`endif + //pragma translate_on +endmodule diff --git a/test/type_param/core/cache_subsystem/std_cache_subsystem.sv b/test/type_param/core/cache_subsystem/std_cache_subsystem.sv new file mode 100644 index 00000000..45ba8bd3 --- /dev/null +++ b/test/type_param/core/cache_subsystem/std_cache_subsystem.sv @@ -0,0 +1,315 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Standard Ariane cache subsystem with instruction cache and +// write-back data cache. + + +module std_cache_subsystem + import ariane_pkg::*; + import std_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 4, + parameter type axi_ar_chan_t = logic, + parameter type axi_aw_chan_t = logic, + parameter type axi_w_chan_t = logic, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + input riscv::priv_lvl_t priv_lvl_i, + // I$ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input icache_areq_t icache_areq_i, // to/from frontend + output icache_arsp_t icache_areq_o, + // data requests + input icache_dreq_t icache_dreq_i, // to/from frontend + output icache_drsp_t icache_dreq_o, + // AMOs + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // D$ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + output logic wbuffer_empty_o, // statically set to 1, as there is no wbuffer in this cache system + // Request ports + input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU + output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU + // memory side + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i +); + + assign wbuffer_empty_o = 1'b1; + + axi_req_t axi_req_icache; + axi_rsp_t axi_resp_icache; + axi_req_t axi_req_bypass; + axi_rsp_t axi_resp_bypass; + axi_req_t axi_req_data; + axi_rsp_t axi_resp_data; + + cva6_icache_axi_wrapper #( + .CVA6Cfg (CVA6Cfg), + .axi_req_t(axi_req_t), + .axi_rsp_t(axi_rsp_t) + ) i_cva6_icache_axi_wrapper ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .priv_lvl_i(priv_lvl_i), + .flush_i (icache_flush_i), + .en_i (icache_en_i), + .miss_o (icache_miss_o), + .areq_i (icache_areq_i), + .areq_o (icache_areq_o), + .dreq_i (icache_dreq_i), + .dreq_o (icache_dreq_o), + .axi_req_o (axi_req_icache), + .axi_resp_i(axi_resp_icache) + ); + + // decreasing priority + // Port 0: PTW + // Port 1: Load Unit + // Port 2: Accelerator + // Port 3: Store Unit + std_nbdcache #( + .CVA6Cfg (CVA6Cfg), + .NumPorts (NumPorts), + .axi_req_t(axi_req_t), + .axi_rsp_t(axi_rsp_t) + ) i_nbdcache ( + .clk_i, + .rst_ni, + .enable_i (dcache_enable_i), + .flush_i (dcache_flush_i), + .flush_ack_o (dcache_flush_ack_o), + .miss_o (dcache_miss_o), + .axi_bypass_o(axi_req_bypass), + .axi_bypass_i(axi_resp_bypass), + .axi_data_o (axi_req_data), + .axi_data_i (axi_resp_data), + .req_ports_i (dcache_req_ports_i), + .req_ports_o (dcache_req_ports_o), + .amo_req_i, + .amo_resp_o + ); + + // ----------------------- + // Arbitrate AXI Ports + // ----------------------- + logic [1:0] w_select, w_select_fifo, w_select_arbiter; + logic [1:0] w_fifo_usage; + logic w_fifo_empty, w_fifo_full; + + + // AR Channel + stream_arbiter #( + .DATA_T(axi_ar_chan_t), + .N_INP (3) + ) i_stream_arbiter_ar ( + .clk_i, + .rst_ni, + .inp_data_i ({axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar}), + .inp_valid_i({axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid}), + .inp_ready_o({axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready}), + .oup_data_o (axi_req_o.ar), + .oup_valid_o(axi_req_o.ar_valid), + .oup_ready_i(axi_resp_i.ar_ready) + ); + + // AW Channel + stream_arbiter #( + .DATA_T(axi_aw_chan_t), + .N_INP (3) + ) i_stream_arbiter_aw ( + .clk_i, + .rst_ni, + .inp_data_i ({axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw}), + .inp_valid_i({axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid}), + .inp_ready_o({axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready}), + .oup_data_o (axi_req_o.aw), + .oup_valid_o(axi_req_o.aw_valid), + .oup_ready_i(axi_resp_i.aw_ready) + ); + + // WID has been removed in AXI 4 so we need to keep track which AW request has been accepted + // to forward the correct write data. + always_comb begin + w_select = 0; + unique casez (axi_req_o.aw.id) + 4'b0111: w_select = 2; // dcache + 4'b1???: w_select = 1; // bypass + default: w_select = 0; // icache + endcase + end + + // W Channel + fifo_v3 #( + .DATA_WIDTH (2), + // we can have a maximum of 4 oustanding transactions as each port is blocking + .DEPTH (4), + .FALL_THROUGH(1'b1) + ) i_fifo_w_channel ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (w_fifo_full), + .empty_o (), // leave open + .usage_o (w_fifo_usage), + .data_i (w_select), + // a new transaction was requested and granted + .push_i (axi_req_o.aw_valid & axi_resp_i.aw_ready), + // write ID to select the output MUX + .data_o (w_select_fifo), + // transaction has finished + .pop_i (axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last) + ); + + // In fall-through mode, the empty_o will be low when push_i is high (on zero usage). + // We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero. + assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full; + + // icache will never write so select it as default (e.g.: when no arbitration is active) + // this is equal to setting it to zero + assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo; + + stream_mux #( + .DATA_T(axi_w_chan_t), + .N_INP (3) + ) i_stream_mux_w ( + .inp_data_i ({axi_req_data.w, axi_req_bypass.w, axi_req_icache.w}), + .inp_valid_i({axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid}), + .inp_ready_o({axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready}), + .inp_sel_i (w_select_arbiter), + .oup_data_o (axi_req_o.w), + .oup_valid_o(axi_req_o.w_valid), + .oup_ready_i(axi_resp_i.w_ready) + ); + + // Route responses based on ID + // 0000 -> I$ + // 0111 -> D$ + // 1??? -> Bypass + // R Channel + assign axi_resp_icache.r = axi_resp_i.r; + assign axi_resp_bypass.r = axi_resp_i.r; + assign axi_resp_data.r = axi_resp_i.r; + + logic [1:0] r_select; + + always_comb begin + r_select = 0; + unique casez (axi_resp_i.r.id) + 4'b0111: r_select = 0; // dcache + 4'b1???: r_select = 1; // bypass + 4'b0000: r_select = 2; // icache + default: r_select = 0; + endcase + end + + stream_demux #( + .N_OUP(3) + ) i_stream_demux_r ( + .inp_valid_i(axi_resp_i.r_valid), + .inp_ready_o(axi_req_o.r_ready), + .oup_sel_i (r_select), + .oup_valid_o({axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid}), + .oup_ready_i({axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready}) + ); + + // B Channel + logic [1:0] b_select; + + assign axi_resp_icache.b = axi_resp_i.b; + assign axi_resp_bypass.b = axi_resp_i.b; + assign axi_resp_data.b = axi_resp_i.b; + + always_comb begin + b_select = 0; + unique casez (axi_resp_i.b.id) + 4'b0111: b_select = 0; // dcache + 4'b1???: b_select = 1; // bypass + 4'b0000: b_select = 2; // icache + default: b_select = 0; + endcase + end + + stream_demux #( + .N_OUP(3) + ) i_stream_demux_b ( + .inp_valid_i(axi_resp_i.b_valid), + .inp_ready_o(axi_req_o.b_ready), + .oup_sel_i (b_select), + .oup_valid_o({axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid}), + .oup_ready_i({axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready}) + ); + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + + a_invalid_instruction_fetch : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", + icache_dreq_o.vaddr, + icache_dreq_o.data + ); + + a_invalid_write_data : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> |dcache_req_ports_i[NumPorts-1].data_be |-> (|dcache_req_ports_i[NumPorts-1].data_wdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X", + { + dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index + }, + dcache_req_ports_i[NumPorts-1].data_be, + dcache_req_ports_i[NumPorts-1].data_wdata + ); + generate + for (genvar j = 0; j < NumPorts - 1; j++) begin + a_invalid_read_data : + assert property ( + @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid data on port %01d: data=%016X", + j, + dcache_req_ports_o[j].data_rdata + ); + end + endgenerate + +`endif + //pragma translate_on +endmodule // std_cache_subsystem diff --git a/test/type_param/core/cache_subsystem/std_nbdcache.sv b/test/type_param/core/cache_subsystem/std_nbdcache.sv new file mode 100644 index 00000000..367c67cb --- /dev/null +++ b/test/type_param/core/cache_subsystem/std_nbdcache.sv @@ -0,0 +1,279 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 13.10.2017 +// Description: Nonblocking private L1 dcache + + +module std_nbdcache + import std_cache_pkg::*; + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 4, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // Cache management + input logic enable_i, // from CSR + input logic flush_i, // high until acknowledged + output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic miss_o, // we missed on a LD/ST + // AMOs + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // Request ports + input dcache_req_i_t [NumPorts-1:0] req_ports_i, // request ports + output dcache_req_o_t [NumPorts-1:0] req_ports_o, // request ports + // Cache AXI refill port + output axi_req_t axi_data_o, + input axi_rsp_t axi_data_i, + output axi_req_t axi_bypass_o, + input axi_rsp_t axi_bypass_i +); + + import std_cache_pkg::*; + + // ------------------------------- + // Controller <-> Arbiter + // ------------------------------- + // 1. Miss handler + // 2. PTW + // 3. Load Unit + // 4. Accelerator + // 5. Store unit + logic [ NumPorts:0][ DCACHE_SET_ASSOC-1:0] req; + logic [ NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr; + logic [ NumPorts:0] gnt; + cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata; + logic [ NumPorts:0][ DCACHE_TAG_WIDTH-1:0] tag; + + cache_line_t [ NumPorts:0] wdata; + logic [ NumPorts:0] we; + cl_be_t [ NumPorts:0] be; + logic [ DCACHE_SET_ASSOC-1:0] hit_way; + // ------------------------------- + // Controller <-> Miss unit + // ------------------------------- + logic [ NumPorts-1:0] busy; + logic [ NumPorts-1:0][ 55:0] mshr_addr; + logic [ NumPorts-1:0] mshr_addr_matches; + logic [ NumPorts-1:0] mshr_index_matches; + logic [ 63:0] critical_word; + logic critical_word_valid; + + logic [ NumPorts-1:0][ $bits(miss_req_t)-1:0] miss_req; + logic [ NumPorts-1:0] miss_gnt; + logic [ NumPorts-1:0] active_serving; + + logic [ NumPorts-1:0] bypass_gnt; + logic [ NumPorts-1:0] bypass_valid; + logic [ NumPorts-1:0][ 63:0] bypass_data; + // ------------------------------- + // Arbiter <-> Datram, + // ------------------------------- + logic [ DCACHE_SET_ASSOC-1:0] req_ram; + logic [DCACHE_INDEX_WIDTH-1:0] addr_ram; + logic we_ram; + cache_line_t wdata_ram; + cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram; + cl_be_t be_ram; + + // ------------------ + // Cache Controller + // ------------------ + generate + for (genvar i = 0; i < NumPorts; i++) begin : master_ports + cache_ctrl #( + .CVA6Cfg(CVA6Cfg) + ) i_cache_ctrl ( + .bypass_i (~enable_i), + .busy_o (busy[i]), + // from core + .req_port_i(req_ports_i[i]), + .req_port_o(req_ports_o[i]), + // to SRAM array + .req_o (req[i+1]), + .addr_o (addr[i+1]), + .gnt_i (gnt[i+1]), + .data_i (rdata), + .tag_o (tag[i+1]), + .data_o (wdata[i+1]), + .we_o (we[i+1]), + .be_o (be[i+1]), + .hit_way_i (hit_way), + + .miss_req_o (miss_req[i]), + .miss_gnt_i (miss_gnt[i]), + .active_serving_i (active_serving[i]), + .critical_word_i (critical_word), + .critical_word_valid_i(critical_word_valid), + .bypass_gnt_i (bypass_gnt[i]), + .bypass_valid_i (bypass_valid[i]), + .bypass_data_i (bypass_data[i]), + + .mshr_addr_o (mshr_addr[i]), + .mshr_addr_matches_i (mshr_addr_matches[i]), + .mshr_index_matches_i(mshr_index_matches[i]), + .* + ); + end + endgenerate + + // ------------------ + // Miss Handling Unit + // ------------------ + miss_handler #( + .CVA6Cfg (CVA6Cfg), + .NR_PORTS (NumPorts), + .axi_req_t(axi_req_t), + .axi_rsp_t(axi_rsp_t) + ) i_miss_handler ( + .flush_i (flush_i), + .busy_i (|busy), + // AMOs + .amo_req_i (amo_req_i), + .amo_resp_o (amo_resp_o), + .miss_req_i (miss_req), + .miss_gnt_o (miss_gnt), + .bypass_gnt_o (bypass_gnt), + .bypass_valid_o (bypass_valid), + .bypass_data_o (bypass_data), + .critical_word_o (critical_word), + .critical_word_valid_o(critical_word_valid), + .mshr_addr_i (mshr_addr), + .mshr_addr_matches_o (mshr_addr_matches), + .mshr_index_matches_o (mshr_index_matches), + .active_serving_o (active_serving), + .req_o (req[0]), + .addr_o (addr[0]), + .data_i (rdata), + .be_o (be[0]), + .data_o (wdata[0]), + .we_o (we[0]), + .axi_bypass_o, + .axi_bypass_i, + .axi_data_o, + .axi_data_i, + .* + ); + + assign tag[0] = '0; + + // -------------- + // Memory Arrays + // -------------- + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block + sram #( + .DATA_WIDTH(DCACHE_LINE_WIDTH), + .NUM_WORDS (DCACHE_NUM_WORDS) + ) data_sram ( + .req_i (req_ram[i]), + .rst_ni (rst_ni), + .we_i (we_ram), + .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .wuser_i('0), + .wdata_i(wdata_ram.data), + .be_i (be_ram.data), + .ruser_o(), + .rdata_o(rdata_ram[i].data), + .* + ); + + sram #( + .DATA_WIDTH(DCACHE_TAG_WIDTH), + .NUM_WORDS (DCACHE_NUM_WORDS) + ) tag_sram ( + .req_i (req_ram[i]), + .rst_ni (rst_ni), + .we_i (we_ram), + .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .wuser_i('0), + .wdata_i(wdata_ram.tag), + .be_i (be_ram.tag), + .ruser_o(), + .rdata_o(rdata_ram[i].tag), + .* + ); + + end + + // ---------------- + // Valid/Dirty Regs + // ---------------- + + // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals. + // note: if you have an SRAM that supports flat bit enables for your target technology, + // you can use it here to save the extra 4x overhead introduced by this workaround. + logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; + + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin + assign dirty_wdata[8*i] = wdata_ram.dirty; + assign dirty_wdata[8*i+1] = wdata_ram.valid; + assign rdata_ram[i].dirty = dirty_rdata[8*i]; + assign rdata_ram[i].valid = dirty_rdata[8*i+1]; + end + + sram #( + .USER_WIDTH(1), + .DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH), + .NUM_WORDS (DCACHE_NUM_WORDS) + ) valid_dirty_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (|req_ram), + .we_i (we_ram), + .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .wuser_i('0), + .wdata_i(dirty_wdata), + .be_i (be_ram.vldrty), + .ruser_o(), + .rdata_o(dirty_rdata) + ); + + // ------------------------------------------------ + // Tag Comparison and memory arbitration + // ------------------------------------------------ + tag_cmp #( + .CVA6Cfg (CVA6Cfg), + .NR_PORTS (NumPorts + 1), + .ADDR_WIDTH (DCACHE_INDEX_WIDTH), + .DCACHE_SET_ASSOC(DCACHE_SET_ASSOC) + ) i_tag_cmp ( + .req_i (req), + .gnt_o (gnt), + .addr_i (addr), + .wdata_i (wdata), + .we_i (we), + .be_i (be), + .rdata_o (rdata), + .tag_i (tag), + .hit_way_o(hit_way), + + .req_o (req_ram), + .addr_o (addr_ram), + .wdata_o(wdata_ram), + .we_o (we_ram), + .be_o (be_ram), + .rdata_i(rdata_ram), + .* + ); + + + //pragma translate_off + initial begin + assert (DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16}) + else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth"); + end + //pragma translate_on +endmodule diff --git a/test/type_param/core/cache_subsystem/tag_cmp.sv b/test/type_param/core/cache_subsystem/tag_cmp.sv new file mode 100644 index 00000000..a378c13b --- /dev/null +++ b/test/type_param/core/cache_subsystem/tag_cmp.sv @@ -0,0 +1,106 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// Author: Florian Zaruba +// -------------- +// Tag Compare +// -------------- +// +// Description: Arbitrates access to cache memories, simplified request grant protocol +// checks for hit or miss on cache +// +module tag_cmp #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NR_PORTS = 3, + parameter int unsigned ADDR_WIDTH = 64, + parameter type l_data_t = std_cache_pkg::cache_line_t, + parameter type l_be_t = std_cache_pkg::cl_be_t, + parameter int unsigned DCACHE_SET_ASSOC = 8 +) ( + input logic clk_i, + input logic rst_ni, + + input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i, + output logic [NR_PORTS-1:0] gnt_o, + input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i, + input l_data_t [NR_PORTS-1:0] wdata_i, + input logic [NR_PORTS-1:0] we_i, + input l_be_t [NR_PORTS-1:0] be_i, + output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o, + input logic [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later + output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way + + + output logic [DCACHE_SET_ASSOC-1:0] req_o, + output logic [ ADDR_WIDTH-1:0] addr_o, + output l_data_t wdata_o, + output logic we_o, + output l_be_t be_o, + input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i +); + + assign rdata_o = rdata_i; + // one hot encoded + logic [NR_PORTS-1:0] id_d, id_q; + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag; + + always_comb begin : tag_sel + sel_tag = '0; + for (int unsigned i = 0; i < NR_PORTS; i++) if (id_q[i]) sel_tag = tag_i[i]; + end + + for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp + assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0; + end + + always_comb begin + + gnt_o = '0; + id_d = '0; + wdata_o = '0; + req_o = '0; + addr_o = '0; + be_o = '0; + we_o = '0; + // Request Side + // priority select + for (int unsigned i = 0; i < NR_PORTS; i++) begin + req_o = req_i[i]; + id_d = (1'b1 << i); + gnt_o[i] = 1'b1; + addr_o = addr_i[i]; + be_o = be_i[i]; + we_o = we_i[i]; + wdata_o = wdata_i[i]; + + if (req_i[i]) break; + end + +`ifndef SYNTHESIS +`ifndef VERILATOR + // assert that cache only hits on one way + // this only needs to be checked one cycle after all ways have been requested + onehot : + assert property (@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o)) + else begin + $fatal(1, "Hit should be one-hot encoded"); + end +`endif +`endif + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + id_q <= 0; + end else begin + id_q <= id_d; + end + end + +endmodule diff --git a/test/type_param/core/cache_subsystem/wt_axi_adapter.sv b/test/type_param/core/cache_subsystem/wt_axi_adapter.sv new file mode 100644 index 00000000..1647f1d0 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_axi_adapter.sv @@ -0,0 +1,712 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 08.08.2018 +// Description: adapter module to connect the L1D$ and L1I$ to a 64bit AXI bus. +// + + +module wt_axi_adapter + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned ReqFifoDepth = 2, + parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + // icache + input logic icache_data_req_i, + output logic icache_data_ack_o, + input icache_req_t icache_data_i, + // returning packets must be consumed immediately + output logic icache_rtrn_vld_o, + output icache_rtrn_t icache_rtrn_o, + + // dcache + input logic dcache_data_req_i, + output logic dcache_data_ack_o, + input dcache_req_t dcache_data_i, + // returning packets must be consumed immediately + output logic dcache_rtrn_vld_o, + output dcache_rtrn_t dcache_rtrn_o, + + // AXI port + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i, + + // Invalidations + input logic [63:0] inval_addr_i, + input logic inval_valid_i, + output logic inval_ready_o +); + + // support up to 512bit cache lines + localparam AxiNumWords = (ariane_pkg::ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH > ariane_pkg::DCACHE_LINE_WIDTH) + + (ariane_pkg::DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH <= ariane_pkg::DCACHE_LINE_WIDTH) ; + localparam MaxNumWords = $clog2(CVA6Cfg.AxiDataWidth / 8); + localparam AxiRdBlenIcache = ariane_pkg::ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1; + localparam AxiRdBlenDcache = ariane_pkg::DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1; + + /////////////////////////////////////////////////////// + // request path + /////////////////////////////////////////////////////// + + icache_req_t icache_data; + logic icache_data_full, icache_data_empty; + dcache_req_t dcache_data; + logic dcache_data_full, dcache_data_empty; + + logic [1:0] arb_req, arb_ack; + logic arb_idx, arb_gnt; + + logic axi_rd_req, axi_rd_gnt; + logic axi_wr_req, axi_wr_gnt; + logic axi_wr_valid, axi_rd_valid, axi_rd_rdy, axi_wr_rdy; + logic axi_rd_lock, axi_wr_lock, axi_rd_exokay, axi_wr_exokay, wr_exokay; + logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr, axi_wr_addr; + logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen; + logic [2:0] axi_rd_size, axi_wr_size; + logic [CVA6Cfg.AxiIdWidth-1:0] + axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out; + logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] axi_wr_data; + logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] axi_wr_user; + logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data; + logic [CVA6Cfg.AxiUserWidth-1:0] axi_rd_user; + logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be; + logic [5:0] axi_wr_atop; + logic invalidate; + logic [$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] amo_off_d, amo_off_q; + // AMO generates r beat + logic amo_gen_r_d, amo_gen_r_q; + + logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] icache_rtrn_tid_d, icache_rtrn_tid_q; + logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_tid_d, dcache_rtrn_tid_q; + logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_rd_tid, dcache_rtrn_wr_tid; + logic dcache_rd_pop, dcache_wr_pop; + logic icache_rd_full, icache_rd_empty; + logic dcache_rd_full, dcache_rd_empty; + logic dcache_wr_full, dcache_wr_empty; + + assign icache_data_ack_o = icache_data_req_i & ~icache_data_full; + assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full; + + // arbiter + assign arb_req = { + ~(dcache_data_empty | dcache_wr_full | dcache_rd_full), ~(icache_data_empty | icache_rd_full) + }; + + assign arb_gnt = axi_rd_gnt | axi_wr_gnt; + + rr_arb_tree #( + .NumIn (2), + .DataWidth(1), + .AxiVldRdy(1'b1), + .LockIn (1'b1) + ) i_rr_arb_tree ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (arb_req), + .gnt_o (arb_ack), + .data_i ('0), + .gnt_i (arb_gnt), + .req_o (), + .data_o (), + .idx_o (arb_idx) + ); + + // request side + always_comb begin : p_axi_req + // write channel + axi_wr_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx}; + axi_wr_data[0] = {(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}}; + axi_wr_user[0] = dcache_data.user; + // Cast to AXI address width + axi_wr_addr = {{CVA6Cfg.AxiAddrWidth-riscv::PLEN{1'b0}}, dcache_data.paddr}; + axi_wr_size = dcache_data.size; + axi_wr_req = 1'b0; + axi_wr_blen = '0;// single word writes + axi_wr_be = '0; + axi_wr_lock = '0; + axi_wr_atop = '0; + amo_off_d = amo_off_q; + amo_gen_r_d = amo_gen_r_q; + + // read channel + axi_rd_id_in = {{CVA6Cfg.AxiIdWidth-1{1'b0}}, arb_idx}; + axi_rd_req = 1'b0; + axi_rd_lock = '0; + axi_rd_blen = '0; + + if (dcache_data.paddr[2] == 1'b0) begin + axi_wr_user = {{64 - CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user}; + end else begin + axi_wr_user = {dcache_data.user, {64 - CVA6Cfg.AxiUserWidth{1'b0}}}; + end + + // arbiter mux + if (arb_idx) begin + // Cast to AXI address width + axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, dcache_data.paddr}; + // If dcache_data.size MSB is set, we want to read as much as possible + axi_rd_size = dcache_data.size[2] ? MaxNumWords[2:0] : dcache_data.size; + if (dcache_data.size[2]) begin + axi_rd_blen = AxiRdBlenDcache[$clog2(AxiNumWords)-1:0]; + end + end else begin + // Cast to AXI address width + axi_rd_addr = {{CVA6Cfg.AxiAddrWidth - riscv::PLEN{1'b0}}, icache_data.paddr}; + axi_rd_size = MaxNumWords[2:0]; // always request max number of words in case of ifill + if (!icache_data.nc) begin + axi_rd_blen = AxiRdBlenIcache[$clog2(AxiNumWords)-1:0]; + end + end + + // signal that an invalidation message + // needs to be generated + invalidate = 1'b0; + + // decode message type + if (|arb_req) begin + if (arb_idx == 0) begin + ////////////////////////////////////// + // IMISS + axi_rd_req = 1'b1; + ////////////////////////////////////// + end else begin + unique case (dcache_data.rtype) + ////////////////////////////////////// + wt_cache_pkg::DCACHE_LOAD_REQ: begin + axi_rd_req = 1'b1; + end + ////////////////////////////////////// + wt_cache_pkg::DCACHE_STORE_REQ: begin + axi_wr_req = 1'b1; + axi_wr_be = '0; + unique case (dcache_data.size[1:0]) + 2'b00: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte + 2'b01: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1; // hword + 2'b10: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1; // word + default: + if (riscv::IS_XLEN64) + axi_wr_be[0][dcache_data.paddr[$clog2( + CVA6Cfg.AxiDataWidth/8 + )-1:0]+:8] = '1; // dword + endcase + end + ////////////////////////////////////// + wt_cache_pkg::DCACHE_ATOMIC_REQ: begin + if (CVA6Cfg.RVA) begin + // default + // push back an invalidation here. + // since we only keep one read tx in flight, and since + // the dcache drains all writes/reads before executing + // an atomic, this is safe. + invalidate = arb_gnt; + axi_wr_req = 1'b1; + axi_wr_be = '0; + unique case (dcache_data.size[1:0]) + 2'b00: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte + 2'b01: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = + '1; // hword + 2'b10: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = + '1; // word + default: + axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] = + '1; // dword + endcase + amo_gen_r_d = 1'b1; + // need to use a separate ID here, so concat an additional bit + axi_wr_id_in[1] = 1'b1; + + unique case (dcache_data.amo_op) + AMO_LR: begin + axi_rd_lock = 1'b1; + axi_rd_req = 1'b1; + axi_rd_id_in[1] = 1'b1; + // tie to zero in this special case + axi_wr_req = 1'b0; + axi_wr_be = '0; + end + AMO_SC: begin + axi_wr_lock = 1'b1; + amo_gen_r_d = 1'b0; + // needed to properly encode success. store the result at offset within the returned + // AXI data word aligned with the requested word size. + amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)- + 1:0] & ~((1 << dcache_data.size[1:0]) - 1); + end + // RISC-V atops have a load semantic + AMO_SWAP: axi_wr_atop = axi_pkg::ATOP_ATOMICSWAP; + AMO_ADD: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD + }; + AMO_AND: begin + // in this case we need to invert the data to get a "CLR" + axi_wr_data[0] = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.data}}; + axi_wr_user = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.user}}; + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR + }; + end + AMO_OR: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET + }; + AMO_XOR: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR + }; + AMO_MAX: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX + }; + AMO_MAXU: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX + }; + AMO_MIN: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN + }; + AMO_MINU: + axi_wr_atop = { + axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN + }; + default: ; // Do nothing + endcase + end + end + default: ; // Do nothing + ////////////////////////////////////// + endcase + end + end + end + + fifo_v3 #( + .dtype(icache_req_t), + .DEPTH(ReqFifoDepth) + ) i_icache_data_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (icache_data_full), + .empty_o (icache_data_empty), + .usage_o (), + .data_i (icache_data_i), + .push_i (icache_data_ack_o), + .data_o (icache_data), + .pop_i (arb_ack[0]) + ); + + fifo_v3 #( + .dtype(dcache_req_t), + .DEPTH(ReqFifoDepth) + ) i_dcache_data_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (dcache_data_full), + .empty_o (dcache_data_empty), + .usage_o (), + .data_i (dcache_data_i), + .push_i (dcache_data_ack_o), + .data_o (dcache_data), + .pop_i (arb_ack[1]) + ); + + /////////////////////////////////////////////////////// + // meta info feedback fifos + /////////////////////////////////////////////////////// + + logic icache_rtrn_rd_en, dcache_rtrn_rd_en; + logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q; + + fifo_v3 #( + .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), + .DEPTH (MetaFifoDepth) + ) i_rd_icache_id ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (icache_rd_full), + .empty_o (icache_rd_empty), + .usage_o (), + .data_i (icache_data.tid), + .push_i (arb_ack[0] & axi_rd_gnt), + .data_o (icache_rtrn_tid_d), + .pop_i (icache_rtrn_vld_d) + ); + + fifo_v3 #( + .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), + .DEPTH (MetaFifoDepth) + ) i_rd_dcache_id ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (dcache_rd_full), + .empty_o (dcache_rd_empty), + .usage_o (), + .data_i (dcache_data.tid), + .push_i (arb_ack[1] & axi_rd_gnt), + .data_o (dcache_rtrn_rd_tid), + .pop_i (dcache_rd_pop) + ); + + fifo_v3 #( + .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), + .DEPTH (MetaFifoDepth) + ) i_wr_dcache_id ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (dcache_wr_full), + .empty_o (dcache_wr_empty), + .usage_o (), + .data_i (dcache_data.tid), + .push_i (arb_ack[1] & axi_wr_gnt), + .data_o (dcache_rtrn_wr_tid), + .pop_i (dcache_wr_pop) + ); + + // select correct tid to return + assign dcache_rtrn_tid_d = (dcache_wr_pop) ? dcache_rtrn_wr_tid : dcache_rtrn_rd_tid; + + /////////////////////////////////////////////////////// + // return path + /////////////////////////////////////////////////////// + + // buffer write responses + logic b_full, b_empty, b_push, b_pop; + assign axi_wr_rdy = ~b_full; + assign b_push = axi_wr_valid & axi_wr_rdy; + + fifo_v3 #( + .DATA_WIDTH (CVA6Cfg.AxiIdWidth + 1), + .DEPTH (MetaFifoDepth), + .FALL_THROUGH(1'b1) + ) i_b_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (b_full), + .empty_o (b_empty), + .usage_o (), + .data_i ({axi_wr_exokay, axi_wr_id_out}), + .push_i (b_push), + .data_o ({wr_exokay, wr_id_out}), + .pop_i (b_pop) + ); + + // buffer read responses in shift regs + logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q; + logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] + icache_rd_shift_user_d, icache_rd_shift_user_q; + logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] + dcache_rd_shift_user_d, dcache_rd_shift_user_q; + logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + icache_rd_shift_d, icache_rd_shift_q; + logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + dcache_rd_shift_d, dcache_rd_shift_q; + wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q; + wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q; + logic dcache_sc_rtrn, axi_rd_last; + + always_comb begin : p_axi_rtrn_shift + // output directly from regs + icache_rtrn_o = '0; + icache_rtrn_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK; + icache_rtrn_o.tid = icache_rtrn_tid_q; + icache_rtrn_o.data = icache_rd_shift_q; + icache_rtrn_o.user = icache_rd_shift_user_q; + icache_rtrn_vld_o = icache_rtrn_vld_q; + + dcache_rtrn_o = '0; + dcache_rtrn_o.rtype = dcache_rtrn_type_q; + dcache_rtrn_o.inv = dcache_rtrn_inv_q; + dcache_rtrn_o.tid = dcache_rtrn_tid_q; + dcache_rtrn_o.data = dcache_rd_shift_q; + dcache_rtrn_o.user = dcache_rd_shift_user_q; + dcache_rtrn_vld_o = dcache_rtrn_vld_q; + + // read shift registers + icache_rd_shift_d = icache_rd_shift_q; + icache_rd_shift_user_d = icache_rd_shift_user_q; + dcache_rd_shift_d = dcache_rd_shift_q; + dcache_rd_shift_user_d = dcache_rd_shift_user_q; + icache_first_d = icache_first_q; + dcache_first_d = dcache_first_q; + + if (icache_rtrn_rd_en) begin + icache_first_d = axi_rd_last; + if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + icache_rd_shift_d[0] = axi_rd_data; + end else begin + icache_rd_shift_d = { + axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1] + }; + end + icache_rd_shift_user_d = { + axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1] + }; + // if this is a single word transaction, we need to make sure that word is placed at offset 0 + if (icache_first_q) begin + icache_rd_shift_d[0] = axi_rd_data; + icache_rd_shift_user_d[0] = axi_rd_user; + end + end + + if (dcache_rtrn_rd_en) begin + dcache_first_d = axi_rd_last; + if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + dcache_rd_shift_d[0] = axi_rd_data; + end else begin + dcache_rd_shift_d = { + axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1] + }; + end + dcache_rd_shift_user_d = { + axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1] + }; + // if this is a single word transaction, we need to make sure that word is placed at offset 0 + if (dcache_first_q) begin + dcache_rd_shift_d[0] = axi_rd_data; + dcache_rd_shift_user_d[0] = axi_rd_user; + end + end else if (CVA6Cfg.RVA && dcache_sc_rtrn) begin + // encode lr/sc success + dcache_rd_shift_d[0] = '0; + dcache_rd_shift_user_d[0] = '0; + dcache_rd_shift_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1; + dcache_rd_shift_user_d[0][amo_off_q*8] = (wr_exokay) ? '0 : 1'b1; + end + end + + // decode virtual read channels of icache + always_comb begin : p_axi_rtrn_decode + // we are not ready when invalidating + // note: b's are buffered separately + axi_rd_rdy = ~invalidate; + + icache_rtrn_rd_en = 1'b0; + icache_rtrn_vld_d = 1'b0; + + // decode virtual icache channel, + // this is independent on dcache decoding below + if (axi_rd_valid && axi_rd_id_out == 0 && axi_rd_rdy) begin + icache_rtrn_rd_en = 1'b1; + icache_rtrn_vld_d = axi_rd_last; + end + + dcache_rtrn_rd_en = 1'b0; + dcache_rtrn_vld_d = 1'b0; + dcache_rd_pop = 1'b0; + dcache_wr_pop = 1'b0; + dcache_rtrn_inv_d = '0; + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK; + b_pop = 1'b0; + dcache_sc_rtrn = 1'b0; + + // External invalidation requests (from coprocessor). This is safe as + // there are no other transactions when a coprocessor has pending stores. + inval_ready_o = 1'b0; + if (inval_valid_i) begin + inval_ready_o = 1'b1; + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; + dcache_rtrn_vld_d = 1'b1; + dcache_rtrn_inv_d.all = 1'b1; + dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + ////////////////////////////////////// + // dcache needs some special treatment + // for arbitration and decoding of atomics + ////////////////////////////////////// + // this is safe, there is no other read tx in flight than this atomic. + // note that this self invalidation is handled in this way due to the + // write-through cache architecture, which is aligned with the openpiton + // cache subsystem. + end else if (CVA6Cfg.RVA && invalidate) begin + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; + dcache_rtrn_vld_d = 1'b1; + + dcache_rtrn_inv_d.all = 1'b1; + dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + ////////////////////////////////////// + // read responses + // note that in case of atomics, the dcache sequentializes requests and + // guarantees that there are no other pending transactions in flight + end else if (axi_rd_valid && axi_rd_id_out[0] && axi_rd_rdy) begin + dcache_rtrn_rd_en = 1'b1; + dcache_rtrn_vld_d = axi_rd_last; + + // if this was an atomic op + if (CVA6Cfg.RVA && axi_rd_id_out[1]) begin + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK; + + // check if transaction was issued over write channel and pop that ID + if (!dcache_wr_empty) begin + dcache_wr_pop = axi_rd_last; + // if this is not the case, there MUST be an id in the read channel (LR) + end else begin + dcache_rd_pop = axi_rd_last; + end + end else begin + dcache_rd_pop = axi_rd_last; + end + ////////////////////////////////////// + // write responses, check b fifo + end else if (!b_empty) begin + b_pop = 1'b1; + + // this was an atomic + if (CVA6Cfg.RVA && wr_id_out[1]) begin + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK; + + // silently discard b response if we already popped the fifo + // with a R beat (iff the amo transaction generated an R beat) + if (!amo_gen_r_q) begin + dcache_rtrn_vld_d = 1'b1; + dcache_wr_pop = 1'b1; + dcache_sc_rtrn = 1'b1; + end + end else begin + // regular response + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_STORE_ACK; + dcache_rtrn_vld_d = 1'b1; + dcache_wr_pop = 1'b1; + end + end + ////////////////////////////////////// + end + + // remote invalidations are not supported yet (this needs a cache coherence protocol) + // note that the atomic transactions would also need a "master exclusive monitor" in that case + // assign icache_rtrn_o.inv.idx = '0; + // assign icache_rtrn_o.inv.way = '0; + // assign icache_rtrn_o.inv.vld = '0; + // assign icache_rtrn_o.inv.all = '0; + + // assign dcache_rtrn_o.inv.idx = '0; + // assign dcache_rtrn_o.inv.way = '0; + // assign dcache_rtrn_o.inv.vld = '0; + // assign dcache_rtrn_o.inv.all = '0; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf + if (!rst_ni) begin + icache_first_q <= 1'b1; + dcache_first_q <= 1'b1; + icache_rd_shift_q <= '0; + icache_rd_shift_user_q <= '0; + dcache_rd_shift_q <= '0; + dcache_rd_shift_user_q <= '0; + icache_rtrn_vld_q <= '0; + dcache_rtrn_vld_q <= '0; + icache_rtrn_tid_q <= '0; + dcache_rtrn_tid_q <= '0; + dcache_rtrn_type_q <= wt_cache_pkg::DCACHE_LOAD_ACK; + dcache_rtrn_inv_q <= '0; + amo_off_q <= '0; + amo_gen_r_q <= 1'b0; + end else begin + icache_first_q <= icache_first_d; + dcache_first_q <= dcache_first_d; + icache_rd_shift_q <= icache_rd_shift_d; + icache_rd_shift_user_q <= icache_rd_shift_user_d; + dcache_rd_shift_q <= dcache_rd_shift_d; + dcache_rd_shift_user_q <= dcache_rd_shift_user_d; + icache_rtrn_vld_q <= icache_rtrn_vld_d; + dcache_rtrn_vld_q <= dcache_rtrn_vld_d; + icache_rtrn_tid_q <= icache_rtrn_tid_d; + dcache_rtrn_tid_q <= dcache_rtrn_tid_d; + dcache_rtrn_type_q <= dcache_rtrn_type_d; + dcache_rtrn_inv_q <= dcache_rtrn_inv_d; + amo_off_q <= amo_off_d; + amo_gen_r_q <= amo_gen_r_d; + end + end + + + /////////////////////////////////////////////////////// + // axi protocol shim + /////////////////////////////////////////////////////// + + axi_shim #( + .CVA6Cfg (CVA6Cfg), + .AxiNumWords(AxiNumWords), + .axi_req_t (axi_req_t), + .axi_rsp_t (axi_rsp_t) + ) i_axi_shim ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .rd_req_i (axi_rd_req), + .rd_gnt_o (axi_rd_gnt), + .rd_addr_i (axi_rd_addr), + .rd_blen_i (axi_rd_blen), + .rd_size_i (axi_rd_size), + .rd_id_i (axi_rd_id_in), + .rd_rdy_i (axi_rd_rdy), + .rd_lock_i (axi_rd_lock), + .rd_last_o (axi_rd_last), + .rd_valid_o (axi_rd_valid), + .rd_data_o (axi_rd_data), + .rd_user_o (axi_rd_user), + .rd_id_o (axi_rd_id_out), + .rd_exokay_o(axi_rd_exokay), + .wr_req_i (axi_wr_req), + .wr_gnt_o (axi_wr_gnt), + .wr_addr_i (axi_wr_addr), + .wr_data_i (axi_wr_data), + .wr_user_i (axi_wr_user), + .wr_be_i (axi_wr_be), + .wr_blen_i (axi_wr_blen), + .wr_size_i (axi_wr_size), + .wr_id_i (axi_wr_id_in), + .wr_lock_i (axi_wr_lock), + .wr_atop_i (axi_wr_atop), + .wr_rdy_i (axi_wr_rdy), + .wr_valid_o (axi_wr_valid), + .wr_id_o (axi_wr_id_out), + .wr_exokay_o(axi_wr_exokay), + .axi_req_o (axi_req_o), + .axi_resp_i (axi_resp_i) + ); + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + +`endif + //pragma translate_on + +endmodule // wt_l15_adapter diff --git a/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv b/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv new file mode 100644 index 00000000..ec094671 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_cache_subsystem.sv @@ -0,0 +1,233 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Ariane cache subsystem that is compatible with the OpenPiton +// coherent memory system. +// +// Define PITON_ARIANE if you want to use this cache. +// Define DCACHE_TYPE if you want to use this cache +// with a standard 64 bit AXI interface instead of the OpenPiton +// L1.5 interface. + + +module wt_cache_subsystem + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 4, + parameter type noc_req_t = logic, + parameter type noc_resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + // I$ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input icache_areq_t icache_areq_i, // to/from frontend + output icache_arsp_t icache_areq_o, + // data requests + input icache_dreq_t icache_dreq_i, // to/from frontend + output icache_drsp_t icache_dreq_o, + // D$ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + // For Performance Counter + output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, + // AMO interface + input amo_req_t dcache_amo_req_i, + output amo_resp_t dcache_amo_resp_o, + // Request ports + input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU + output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU + // writebuffer status + output logic wbuffer_empty_o, + output logic wbuffer_not_ni_o, + // memory side + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i, + // Invalidations + input logic [63:0] inval_addr_i, + input logic inval_valid_i, + output logic inval_ready_o + // TODO: interrupt interface +); + + logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld; + wt_cache_pkg::icache_req_t icache_adapter; + wt_cache_pkg::icache_rtrn_t adapter_icache; + + + logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld; + wt_cache_pkg::dcache_req_t dcache_adapter; + wt_cache_pkg::dcache_rtrn_t adapter_dcache; + + cva6_icache #( + // use ID 0 for icache reads + .CVA6Cfg(CVA6Cfg), + .RdTxId (0) + ) i_cva6_icache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (icache_flush_i), + .en_i (icache_en_i), + .miss_o (icache_miss_o), + .areq_i (icache_areq_i), + .areq_o (icache_areq_o), + .dreq_i (icache_dreq_i), + .dreq_o (icache_dreq_o), + .mem_rtrn_vld_i(adapter_icache_rtrn_vld), + .mem_rtrn_i (adapter_icache), + .mem_data_req_o(icache_adapter_data_req), + .mem_data_ack_i(adapter_icache_data_ack), + .mem_data_o (icache_adapter) + ); + + + // Note: + // Ports 0/1 for PTW and LD unit are read only. + // they have equal prio and are RR arbited + // Port 2 is write only and goes into the merging write buffer + wt_dcache #( + .CVA6Cfg (CVA6Cfg), + // use ID 1 for dcache reads and amos. note that the writebuffer + // uses all IDs up to DCACHE_MAX_TX-1 for write transactions. + .RdAmoTxId(1) + ) i_wt_dcache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .enable_i (dcache_enable_i), + .flush_i (dcache_flush_i), + .flush_ack_o (dcache_flush_ack_o), + .miss_o (dcache_miss_o), + .wbuffer_empty_o (wbuffer_empty_o), + .wbuffer_not_ni_o(wbuffer_not_ni_o), + .amo_req_i (dcache_amo_req_i), + .amo_resp_o (dcache_amo_resp_o), + .req_ports_i (dcache_req_ports_i), + .req_ports_o (dcache_req_ports_o), + .miss_vld_bits_o (miss_vld_bits_o), + .mem_rtrn_vld_i (adapter_dcache_rtrn_vld), + .mem_rtrn_i (adapter_dcache), + .mem_data_req_o (dcache_adapter_data_req), + .mem_data_ack_i (adapter_dcache_data_ack), + .mem_data_o (dcache_adapter) + ); + + + /////////////////////////////////////////////////////// + // memory plumbing, either use 64bit AXI port or native + // L15 cache interface (derived from OpenSPARC CCX). + /////////////////////////////////////////////////////// + +`ifdef PITON_ARIANE + wt_l15_adapter #( + .CVA6Cfg(CVA6Cfg), + ) i_adapter ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .icache_data_req_i(icache_adapter_data_req), + .icache_data_ack_o(adapter_icache_data_ack), + .icache_data_i (icache_adapter), + .icache_rtrn_vld_o(adapter_icache_rtrn_vld), + .icache_rtrn_o (adapter_icache), + .dcache_data_req_i(dcache_adapter_data_req), + .dcache_data_ack_o(adapter_dcache_data_ack), + .dcache_data_i (dcache_adapter), + .dcache_rtrn_vld_o(adapter_dcache_rtrn_vld), + .dcache_rtrn_o (adapter_dcache), + .l15_req_o (noc_req_o), + .l15_rtrn_i (noc_resp_i) + ); +`else + wt_axi_adapter #( + .CVA6Cfg (CVA6Cfg), + .axi_req_t(noc_req_t), + .axi_rsp_t(noc_resp_t) + ) i_adapter ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .icache_data_req_i(icache_adapter_data_req), + .icache_data_ack_o(adapter_icache_data_ack), + .icache_data_i (icache_adapter), + .icache_rtrn_vld_o(adapter_icache_rtrn_vld), + .icache_rtrn_o (adapter_icache), + .dcache_data_req_i(dcache_adapter_data_req), + .dcache_data_ack_o(adapter_dcache_data_ack), + .dcache_data_i (dcache_adapter), + .dcache_rtrn_vld_o(adapter_dcache_rtrn_vld), + .dcache_rtrn_o (adapter_dcache), + .axi_req_o (noc_req_o), + .axi_resp_i (noc_resp_i), + .inval_addr_i (inval_addr_i), + .inval_valid_i (inval_valid_i), + .inval_ready_o (inval_ready_o) + ); +`endif + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + a_invalid_instruction_fetch : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", + icache_dreq_o.vaddr, + icache_dreq_o.data + ); + + for (genvar j = 0; j < riscv::XLEN / 8; j++) begin : gen_invalid_write_assertion + a_invalid_write_data : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX)) + else + $warning( + 1, + "[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X", + { + dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index + }, + dcache_req_ports_i[NumPorts-1].data_be, + dcache_req_ports_i[NumPorts-1].data_wdata, + dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata + ); + end + + + for (genvar j = 0; j < NumPorts - 1; j++) begin : gen_assertion + a_invalid_read_data : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) + else + $warning( + 1, + "[l1 dcache] reading invalid data on port %01d: data=%016X", + j, + dcache_req_ports_o[j].data_rdata + ); + end +`endif + //pragma translate_on + + +endmodule // wt_cache_subsystem diff --git a/test/type_param/core/cache_subsystem/wt_dcache.sv b/test/type_param/core/cache_subsystem/wt_dcache.sv new file mode 100644 index 00000000..af672d88 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache.sv @@ -0,0 +1,360 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: Write-Through Data cache that is compatible with openpiton. + + +module wt_dcache + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 4, // number of miss ports + // ID to be used for read and AMO transactions. + // note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions + parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + // Cache management + input logic enable_i, // from CSR + input logic flush_i, // high until acknowledged + output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic miss_o, // we missed on a ld/st + output logic wbuffer_empty_o, + output logic wbuffer_not_ni_o, + + // AMO interface + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + + // Request ports + input dcache_req_i_t [NumPorts-1:0] req_ports_i, + output dcache_req_o_t [NumPorts-1:0] req_ports_o, + + output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, + + input logic mem_rtrn_vld_i, + input dcache_rtrn_t mem_rtrn_i, + output logic mem_data_req_o, + input logic mem_data_ack_i, + output dcache_req_t mem_data_o +); + + // miss unit <-> read controllers + logic cache_en; + + // miss unit <-> memory + logic wr_cl_vld; + logic wr_cl_nc; + logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we; + logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag; + logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx; + logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off; + logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data; + logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user; + logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be; + logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits; + logic [ DCACHE_SET_ASSOC-1:0] wr_req; + logic wr_ack; + logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_idx; + logic [ DCACHE_OFFSET_WIDTH-1:0] wr_off; + riscv::xlen_t wr_data; + logic [ (riscv::XLEN/8)-1:0] wr_data_be; + logic [ DCACHE_USER_WIDTH-1:0] wr_user; + + // miss unit <-> controllers/wbuffer + logic [ NumPorts-1:0] miss_req; + logic [ NumPorts-1:0] miss_ack; + logic [ NumPorts-1:0] miss_nc; + logic [ NumPorts-1:0] miss_we; + logic [ NumPorts-1:0][ riscv::XLEN-1:0] miss_wdata; + logic [ NumPorts-1:0][ DCACHE_USER_WIDTH-1:0] miss_wuser; + logic [ NumPorts-1:0][ riscv::PLEN-1:0] miss_paddr; + logic [ NumPorts-1:0][ 2:0] miss_size; + logic [ NumPorts-1:0][ CACHE_ID_WIDTH-1:0] miss_id; + logic [ NumPorts-1:0] miss_replay; + logic [ NumPorts-1:0] miss_rtrn_vld; + logic [ CACHE_ID_WIDTH-1:0] miss_rtrn_id; + + // memory <-> read controllers/miss unit + logic [ NumPorts-1:0] rd_prio; + logic [ NumPorts-1:0] rd_tag_only; + logic [ NumPorts-1:0] rd_req; + logic [ NumPorts-1:0] rd_ack; + logic [ NumPorts-1:0][ DCACHE_TAG_WIDTH-1:0] rd_tag; + logic [ NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx; + logic [ NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off; + riscv::xlen_t rd_data; + logic [ DCACHE_USER_WIDTH-1:0] rd_user; + logic [ DCACHE_SET_ASSOC-1:0] rd_vld_bits; + logic [ DCACHE_SET_ASSOC-1:0] rd_hit_oh; + + // miss unit <-> wbuffer + logic [ DCACHE_MAX_TX-1:0][ riscv::PLEN-1:0] tx_paddr; + logic [ DCACHE_MAX_TX-1:0] tx_vld; + + // wbuffer <-> memory + wbuffer_t [ DCACHE_WBUF_DEPTH-1:0] wbuffer_data; + + + /////////////////////////////////////////////////////// + // miss handling unit + /////////////////////////////////////////////////////// + + wt_dcache_missunit #( + .CVA6Cfg (CVA6Cfg), + .AmoTxId (RdAmoTxId), + .NumPorts(NumPorts) + ) i_wt_dcache_missunit ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .enable_i (enable_i), + .flush_i (flush_i), + .flush_ack_o (flush_ack_o), + .miss_o (miss_o), + .wbuffer_empty_i(wbuffer_empty_o), + .cache_en_o (cache_en), + // amo interface + .amo_req_i (amo_req_i), + .amo_resp_o (amo_resp_o), + // miss handling interface + .miss_req_i (miss_req), + .miss_ack_o (miss_ack), + .miss_nc_i (miss_nc), + .miss_we_i (miss_we), + .miss_wdata_i (miss_wdata), + .miss_wuser_i (miss_wuser), + .miss_paddr_i (miss_paddr), + .miss_vld_bits_i(miss_vld_bits_o), + .miss_size_i (miss_size), + .miss_id_i (miss_id), + .miss_replay_o (miss_replay), + .miss_rtrn_vld_o(miss_rtrn_vld), + .miss_rtrn_id_o (miss_rtrn_id), + // from writebuffer + .tx_paddr_i (tx_paddr), + .tx_vld_i (tx_vld), + // cache memory interface + .wr_cl_vld_o (wr_cl_vld), + .wr_cl_nc_o (wr_cl_nc), + .wr_cl_we_o (wr_cl_we), + .wr_cl_tag_o (wr_cl_tag), + .wr_cl_idx_o (wr_cl_idx), + .wr_cl_off_o (wr_cl_off), + .wr_cl_data_o (wr_cl_data), + .wr_cl_user_o (wr_cl_user), + .wr_cl_data_be_o(wr_cl_data_be), + .wr_vld_bits_o (wr_vld_bits), + // memory interface + .mem_rtrn_vld_i (mem_rtrn_vld_i), + .mem_rtrn_i (mem_rtrn_i), + .mem_data_req_o (mem_data_req_o), + .mem_data_ack_i (mem_data_ack_i), + .mem_data_o (mem_data_o) + ); + + /////////////////////////////////////////////////////// + // read controllers (LD unit and PTW/MMU) + /////////////////////////////////////////////////////// + + // 0 is used by MMU, 1 by READ access requests + for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports + // set these to high prio ports + if ((k == 0 && MMU_PRESENT) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin + assign rd_prio[k] = 1'b1; + wt_dcache_ctrl #( + .CVA6Cfg(CVA6Cfg), + .RdTxId (RdAmoTxId) + ) i_wt_dcache_ctrl ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .cache_en_i (cache_en), + // reqs from core + .req_port_i (req_ports_i[k]), + .req_port_o (req_ports_o[k]), + // miss interface + .miss_req_o (miss_req[k]), + .miss_ack_i (miss_ack[k]), + .miss_we_o (miss_we[k]), + .miss_wdata_o (miss_wdata[k]), + .miss_wuser_o (miss_wuser[k]), + .miss_vld_bits_o(miss_vld_bits_o[k]), + .miss_paddr_o (miss_paddr[k]), + .miss_nc_o (miss_nc[k]), + .miss_size_o (miss_size[k]), + .miss_id_o (miss_id[k]), + .miss_replay_i (miss_replay[k]), + .miss_rtrn_vld_i(miss_rtrn_vld[k]), + // used to detect readout mux collisions + .wr_cl_vld_i (wr_cl_vld), + // cache mem interface + .rd_tag_o (rd_tag[k]), + .rd_idx_o (rd_idx[k]), + .rd_off_o (rd_off[k]), + .rd_req_o (rd_req[k]), + .rd_tag_only_o (rd_tag_only[k]), + .rd_ack_i (rd_ack[k]), + .rd_data_i (rd_data), + .rd_user_i (rd_user), + .rd_vld_bits_i (rd_vld_bits), + .rd_hit_oh_i (rd_hit_oh) + ); + end else begin + assign rd_prio[k] = 1'b0; + assign req_ports_o[k] = '0; + assign miss_req[k] = 1'b0; + assign miss_we[k] = 1'b0; + assign miss_wdata[k] = {{riscv::XLEN} {1'b0}}; + assign miss_wuser[k] = {{DCACHE_USER_WIDTH} {1'b0}}; + assign miss_vld_bits_o[k] = {{DCACHE_SET_ASSOC} {1'b0}}; + assign miss_paddr[k] = {{riscv::PLEN} {1'b0}}; + assign miss_nc[k] = 1'b0; + assign miss_size[k] = 3'b0; + assign miss_id[k] = {{CACHE_ID_WIDTH} {1'b0}}; + assign rd_tag[k] = {{DCACHE_TAG_WIDTH} {1'b0}}; + assign rd_idx[k] = {{DCACHE_CL_IDX_WIDTH} {1'b0}}; + assign rd_off[k] = {{DCACHE_OFFSET_WIDTH} {1'b0}}; + assign rd_req[k] = 1'b0; + assign rd_tag_only[k] = 1'b0; + end + end + + /////////////////////////////////////////////////////// + // store unit controller + /////////////////////////////////////////////////////// + + // set read port to low priority + assign rd_prio[NumPorts-1] = 1'b0; + + wt_dcache_wbuffer #( + .CVA6Cfg(CVA6Cfg) + ) i_wt_dcache_wbuffer ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .empty_o (wbuffer_empty_o), + .not_ni_o (wbuffer_not_ni_o), + // TODO: fix this + .cache_en_i (cache_en), + // .cache_en_i ( '0 ), + // request ports from core (store unit) + .req_port_i (req_ports_i[NumPorts-1]), + .req_port_o (req_ports_o[NumPorts-1]), + // miss unit interface + .miss_req_o (miss_req[NumPorts-1]), + .miss_ack_i (miss_ack[NumPorts-1]), + .miss_we_o (miss_we[NumPorts-1]), + .miss_wdata_o (miss_wdata[NumPorts-1]), + .miss_wuser_o (miss_wuser[NumPorts-1]), + .miss_vld_bits_o(miss_vld_bits_o[NumPorts-1]), + .miss_paddr_o (miss_paddr[NumPorts-1]), + .miss_nc_o (miss_nc[NumPorts-1]), + .miss_size_o (miss_size[NumPorts-1]), + .miss_id_o (miss_id[NumPorts-1]), + .miss_rtrn_vld_i(miss_rtrn_vld[NumPorts-1]), + .miss_rtrn_id_i (miss_rtrn_id), + // cache read interface + .rd_tag_o (rd_tag[NumPorts-1]), + .rd_idx_o (rd_idx[NumPorts-1]), + .rd_off_o (rd_off[NumPorts-1]), + .rd_req_o (rd_req[NumPorts-1]), + .rd_tag_only_o (rd_tag_only[NumPorts-1]), + .rd_ack_i (rd_ack[NumPorts-1]), + .rd_data_i (rd_data), + .rd_vld_bits_i (rd_vld_bits), + .rd_hit_oh_i (rd_hit_oh), + // incoming invalidations/cache refills + .wr_cl_vld_i (wr_cl_vld), + .wr_cl_idx_i (wr_cl_idx), + // single word write interface + .wr_req_o (wr_req), + .wr_ack_i (wr_ack), + .wr_idx_o (wr_idx), + .wr_off_o (wr_off), + .wr_data_o (wr_data), + .wr_user_o (wr_user), + .wr_data_be_o (wr_data_be), + // write buffer forwarding + .wbuffer_data_o (wbuffer_data), + .tx_paddr_o (tx_paddr), + .tx_vld_o (tx_vld) + ); + + /////////////////////////////////////////////////////// + // memory arrays, arbitration and tag comparison + /////////////////////////////////////////////////////// + + wt_dcache_mem #( + .CVA6Cfg (CVA6Cfg), + .NumPorts(NumPorts) + ) i_wt_dcache_mem ( + .clk_i (clk_i), + .rst_ni (rst_ni), + // read ports + .rd_prio_i (rd_prio), + .rd_tag_i (rd_tag), + .rd_idx_i (rd_idx), + .rd_off_i (rd_off), + .rd_req_i (rd_req), + .rd_tag_only_i (rd_tag_only), + .rd_ack_o (rd_ack), + .rd_vld_bits_o (rd_vld_bits), + .rd_hit_oh_o (rd_hit_oh), + .rd_data_o (rd_data), + .rd_user_o (rd_user), + // cacheline write port + .wr_cl_vld_i (wr_cl_vld), + .wr_cl_nc_i (wr_cl_nc), + .wr_cl_we_i (wr_cl_we), + .wr_cl_tag_i (wr_cl_tag), + .wr_cl_idx_i (wr_cl_idx), + .wr_cl_off_i (wr_cl_off), + .wr_cl_data_i (wr_cl_data), + .wr_cl_user_i (wr_cl_user), + .wr_cl_data_be_i(wr_cl_data_be), + .wr_vld_bits_i (wr_vld_bits), + // single word write port + .wr_req_i (wr_req), + .wr_ack_o (wr_ack), + .wr_idx_i (wr_idx), + .wr_off_i (wr_off), + .wr_data_i (wr_data), + .wr_user_i (wr_user), + .wr_data_be_i (wr_data_be), + // write buffer forwarding + .wbuffer_data_i (wbuffer_data) + ); + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + // check for concurrency issues + + + //pragma translate_off +`ifndef VERILATOR + flush : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o) + else $fatal(1, "[l1 dcache] flushed cache implies flushed wbuffer"); + + initial begin + // assert wrong parameterizations + assert (DCACHE_INDEX_WIDTH <= 12) + else $fatal(1, "[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages"); + end +`endif + //pragma translate_on + +endmodule // wt_dcache diff --git a/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv b/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv new file mode 100644 index 00000000..b5973df1 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache_ctrl.sv @@ -0,0 +1,299 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: DCache controller for read port + + +module wt_dcache_ctrl + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic cache_en_i, + // core request ports + input dcache_req_i_t req_port_i, + output dcache_req_o_t req_port_o, + // interface to miss handler + output logic miss_req_o, + input logic miss_ack_i, + output logic miss_we_o, // unused (set to 0) + output riscv::xlen_t miss_wdata_o, // unused (set to 0) + output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0) + output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index + output logic [riscv::PLEN-1:0] miss_paddr_o, + output logic miss_nc_o, // request to I/O space + output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline + output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID + input logic miss_replay_i, // request collided with pending miss - have to replay the request + input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory + // used to detect readout mux collisions + input logic wr_cl_vld_i, + // cache memory interface + output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later + output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o, + output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o, + output logic rd_req_o, // read the word at offset off_i[:3] in all ways + output logic rd_tag_only_o, // set to zero here + input logic rd_ack_i, + input riscv::xlen_t rd_data_i, + input logic [DCACHE_USER_WIDTH-1:0] rd_user_i, + input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, + input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i +); + + // controller FSM + typedef enum logic [2:0] { + IDLE, + READ, + MISS_REQ, + MISS_WAIT, + KILL_MISS, + KILL_MISS_ACK, + REPLAY_REQ, + REPLAY_READ + } state_e; + state_e state_d, state_q; + + logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q; + logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q; + logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q; + logic [DCACHE_TID_WIDTH-1:0] id_d, id_q; + logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q; + logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q; + logic [1:0] data_size_d, data_size_q; + + /////////////////////////////////////////////////////// + // misc + /////////////////////////////////////////////////////// + + // map address to tag/idx/offset and save + assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q; + assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q; + assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q; + assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q; + assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q; + assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q; + assign rd_tag_o = address_tag_d; + assign rd_idx_o = address_idx_d; + assign rd_off_o = address_off_d; + + assign req_port_o.data_rdata = rd_data_i; + assign req_port_o.data_ruser = rd_user_i; + assign req_port_o.data_rid = id_q; + + // to miss unit + assign miss_vld_bits_o = vld_data_q; + assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q}; + assign miss_size_o = (miss_nc_o) ? {1'b0, data_size_q} : 3'b111; + + // noncacheable if request goes to I/O space, or if cache is disabled + assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions( + CVA6Cfg, + {{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}} + )); + + + assign miss_we_o = '0; + assign miss_wdata_o = '0; + assign miss_wuser_o = '0; + assign miss_id_o = RdTxId; + assign rd_req_d = rd_req_o; + assign rd_ack_d = rd_ack_i; + assign rd_tag_only_o = '0; + + /////////////////////////////////////////////////////// + // main control logic + /////////////////////////////////////////////////////// + + always_comb begin : p_fsm + // default assignment + state_d = state_q; + save_tag = 1'b0; + rd_req_o = 1'b0; + miss_req_o = 1'b0; + req_port_o.data_rvalid = 1'b0; + req_port_o.data_gnt = 1'b0; + + // interfaces + unique case (state_q) + ////////////////////////////////// + // wait for an incoming request + IDLE: begin + if (req_port_i.data_req) begin + rd_req_o = 1'b1; + // if read ack then ack the `req_port_o`, and goto `READ` state + if (rd_ack_i) begin + state_d = READ; + req_port_o.data_gnt = 1'b1; + end + end + end + ////////////////////////////////// + // check whether we have a hit + // in case the cache is disabled, + // or in case the address is NC, we + // reuse the miss mechanism to handle + // the request + READ, REPLAY_READ: begin + // speculatively request cache line + rd_req_o = 1'b1; + + // kill -> go back to IDLE + if (req_port_i.kill_req) begin + state_d = IDLE; + req_port_o.data_rvalid = 1'b1; + end else if (req_port_i.tag_valid | state_q == REPLAY_READ) begin + save_tag = (state_q != REPLAY_READ); + if (wr_cl_vld_i || !rd_ack_q) begin + state_d = REPLAY_REQ; + // we've got a hit + end else if ((|rd_hit_oh_i) && cache_en_i) begin + state_d = IDLE; + req_port_o.data_rvalid = 1'b1; + // we can handle another request + if (rd_ack_i && req_port_i.data_req) begin + state_d = READ; + req_port_o.data_gnt = 1'b1; + end + // we've got a miss + end else begin + state_d = MISS_REQ; + end + end + end + ////////////////////////////////// + // issue request + MISS_REQ: begin + miss_req_o = 1'b1; + + if (req_port_i.kill_req) begin + req_port_o.data_rvalid = 1'b1; + if (miss_ack_i) begin + state_d = KILL_MISS; + end else begin + state_d = KILL_MISS_ACK; + end + end else if (miss_replay_i) begin + state_d = REPLAY_REQ; + end else if (miss_ack_i) begin + state_d = MISS_WAIT; + end + end + ////////////////////////////////// + // wait until the memory transaction + // returns. + MISS_WAIT: begin + if (req_port_i.kill_req) begin + req_port_o.data_rvalid = 1'b1; + if (miss_rtrn_vld_i) begin + state_d = IDLE; + end else begin + state_d = KILL_MISS; + end + end else if (miss_rtrn_vld_i) begin + state_d = IDLE; + req_port_o.data_rvalid = 1'b1; + end + end + ////////////////////////////////// + // replay read request + REPLAY_REQ: begin + rd_req_o = 1'b1; + if (req_port_i.kill_req) begin + req_port_o.data_rvalid = 1'b1; + state_d = IDLE; + end else if (rd_ack_i) begin + state_d = REPLAY_READ; + end + end + ////////////////////////////////// + KILL_MISS_ACK: begin + miss_req_o = 1'b1; + // in this case the miss handler did not issue + // a transaction and we can safely go to idle + if (miss_replay_i) begin + state_d = IDLE; + end else if (miss_ack_i) begin + state_d = KILL_MISS; + end + end + ////////////////////////////////// + // killed miss, + // wait until miss unit responds and + // go back to idle + KILL_MISS: begin + if (miss_rtrn_vld_i) begin + state_d = IDLE; + end + end + default: begin + // we should never get here + state_d = IDLE; + end + endcase // state_q + end + + /////////////////////////////////////////////////////// + // ff's + /////////////////////////////////////////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + state_q <= IDLE; + address_tag_q <= '0; + address_idx_q <= '0; + address_off_q <= '0; + id_q <= '0; + vld_data_q <= '0; + data_size_q <= '0; + rd_req_q <= '0; + rd_ack_q <= '0; + end else begin + state_q <= state_d; + address_tag_q <= address_tag_d; + address_idx_q <= address_idx_d; + address_off_q <= address_off_d; + id_q <= id_d; + vld_data_q <= vld_data_d; + data_size_q <= data_size_d; + rd_req_q <= rd_req_d; + rd_ack_q <= rd_ack_d; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + + hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0( + rd_hit_oh_i + )) + else $fatal(1, "[l1 dcache ctrl] rd_hit_oh_i signal must be hot1"); + + initial begin + // assert wrong parameterizations + assert (DCACHE_INDEX_WIDTH <= 12) + else + $fatal(1, "[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages"); + end +`endif + //pragma translate_on + +endmodule // wt_dcache_ctrl diff --git a/test/type_param/core/cache_subsystem/wt_dcache_mem.sv b/test/type_param/core/cache_subsystem/wt_dcache_mem.sv new file mode 100644 index 00000000..b2b41c3c --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache_mem.sv @@ -0,0 +1,428 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: Memory arrays, arbiter and tag comparison for WT dcache. +// +// +// Notes: 1) all ports can trigger a readout of all ways, and the way where the tag hits is selected +// +// 2) only port0 can write full cache lines. higher ports are read only. also, port0 can only read the tag array, +// and does not trigger a cache line readout. +// +// 3) the single word write port is a separate port without access to the tag memory. +// these single word writes can interleave with read operations if they go to different +// cacheline offsets, since each word offset is placed into a different SRAM bank. +// +// 4) Read ports with same priority are RR arbited. but high prio ports (rd_prio_i[port_nr] = '1b1) will stall +// low prio ports (rd_prio_i[port_nr] = '1b0) + + +module wt_dcache_mem + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 3 +) ( + input logic clk_i, + input logic rst_ni, + + // ports + input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later + input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i, + input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i, + input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways + input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays + input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio + output logic [NumPorts-1:0] rd_ack_o, + output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o, + output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o, + output riscv::xlen_t rd_data_o, + output logic [DCACHE_USER_WIDTH-1:0] rd_user_o, + + // only available on port 0, uses address signals of port 0 + input logic wr_cl_vld_i, + input logic wr_cl_nc_i, // noncacheable access + input logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline + input logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i, + input logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i, + input logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i, + input logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data_i, + input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i, + input logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i, + input logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits_i, + + // separate port for single word write, no tag access + input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3] + output logic wr_ack_o, + input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i, + input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i, + input riscv::xlen_t wr_data_i, + input logic [DCACHE_USER_WIDTH-1:0] wr_user_i, + input logic [(riscv::XLEN/8)-1:0] wr_data_be_i, + + // forwarded wbuffer + input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i +); + + // functions + function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh( + input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in); + logic [DCACHE_NUM_BANKS-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + // number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter + // is not needed. Therefore, increment it by one to avoid reverse range select during elaboration. + localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2( + CVA6Cfg.AxiDataWidth / 8 + ) + 1 : $clog2( + CVA6Cfg.AxiDataWidth / 8 + ); + + logic [DCACHE_NUM_BANKS-1:0] bank_req; + logic [DCACHE_NUM_BANKS-1:0] bank_we; + logic [DCACHE_NUM_BANKS-1:0][ DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be; + logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx; + logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q; + logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q; + + logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; // + logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; // + logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline + logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; // + logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; // + logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline + + logic [DCACHE_TAG_WIDTH-1:0] rd_tag; + logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs + logic vld_we; // valid bits write enable + logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write + logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem + logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit + + logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q; + + logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh; + logic [ (riscv::XLEN/8)-1:0] wbuffer_be; + riscv::xlen_t wbuffer_rdata, rdata; + logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser; + logic [riscv::PLEN-1:0] wbuffer_cmp_addr; + + logic cmp_en_d, cmp_en_q; + logic rd_acked; + logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio; + + /////////////////////////////////////////////////////// + // arbiter + /////////////////////////////////////////////////////// + + // Priority is highest for lowest read port index + // + // SRAM bank mapping: + // + // Bank 0 Bank 2 + // [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] .. + + // byte enable mapping + for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_bank + for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : gen_bank_way + assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] : + (wr_req_i[j] & wr_ack_o) ? wr_data_be_i : + '0; + assign bank_wdata[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_i[k*riscv::XLEN +: riscv::XLEN] : + wr_data_i; + assign bank_wuser[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_user_i[k*DCACHE_USER_WIDTH +: DCACHE_USER_WIDTH] : + wr_user_i; + end + end + + assign vld_wdata = wr_vld_bits_i; + assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d]; + assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle + assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d]; + assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d]; + assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0; + + + // priority masking + // disable low prio requests when any of the high prio reqs is present + assign rd_req_prio = rd_req_i & rd_prio_i; + assign rd_req_masked = (|rd_req_prio) ? rd_req_prio : rd_req_i; + + logic rd_req; + rr_arb_tree #( + .NumIn (NumPorts), + .DataWidth(1) + ) i_rr_arb_tree ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (rd_req_masked), + .gnt_o (rd_ack_o), + .data_i ('0), + .gnt_i (~wr_cl_vld_i), + .req_o (rd_req), + .data_o (), + .idx_o (vld_sel_d) + ); + + assign rd_acked = rd_req & ~wr_cl_vld_i; + + always_comb begin : p_bank_req + vld_we = wr_cl_vld_i; + bank_req = '0; + wr_ack_o = '0; + bank_we = '0; + bank_idx = '{default: wr_idx_i}; + + for (int k = 0; k < NumPorts; k++) begin + bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]; + end + + if (wr_cl_vld_i & |wr_cl_we_i) begin + bank_req = '1; + bank_we = '1; + bank_idx = '{default: wr_cl_idx_i}; + end else begin + if (rd_acked) begin + if (!rd_tag_only_i[vld_sel_d]) begin + bank_req = + dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); + bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d]; + end + end + + if (|wr_req_i) begin + if (rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin + wr_ack_o = 1'b1; + bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); + bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); + end + end + end + end + + /////////////////////////////////////////////////////// + // tag comparison, hit generatio, readoud muxes + /////////////////////////////////////////////////////// + + logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off; + logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off; + logic [ $clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx; + logic [ $clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx; + + assign cmp_en_d = (|vld_req) & ~vld_we; + + // word tag comparison in write buffer + assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} : + {rd_tag, bank_idx_q, bank_off_q}; + // hit generation + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel + // tag comparison of ways >0 + assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q; + // byte offset mux of ways >0 + assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i]; + assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i]; + end + + for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit + assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & ({{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_data_i[k].wtag} == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES)); + end + + lzc #( + .WIDTH(DCACHE_WBUF_DEPTH) + ) i_lzc_wbuffer_hit ( + .in_i (wbuffer_hit_oh), + .cnt_o (wbuffer_hit_idx), + .empty_o() + ); + + lzc #( + .WIDTH(DCACHE_SET_ASSOC) + ) i_lzc_rd_hit ( + .in_i (rd_hit_oh_o), + .cnt_o (rd_hit_idx), + .empty_o() + ); + + assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data; + assign wbuffer_ruser = wbuffer_data_i[wbuffer_hit_idx].user; + assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0; + + if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset + // In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read + assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 : + {{DCACHE_OFFSET_WIDTH-AXI_OFFSET_WIDTH{1'b0}}, wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]} : + wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]; + end else begin : gen_piton_offset + assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3]; + end + + always_comb begin + if (wr_cl_vld_i) begin + rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN+:riscv::XLEN]; + ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH+:DCACHE_USER_WIDTH]; + end else begin + rdata = rdata_cl[rd_hit_idx]; + ruser = ruser_cl[rd_hit_idx]; + end + end + + // overlay bytes that hit in the write buffer + for (genvar k = 0; k < (riscv::XLEN / 8); k++) begin : gen_rd_data + assign rd_data_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k+:8] : rdata[8*k+:8]; + end + for (genvar k = 0; k < DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user + assign rd_user_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k+:8] : ruser[8*k+:8]; + end + + /////////////////////////////////////////////////////// + // memory arrays and regs + /////////////////////////////////////////////////////// + + logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata[DCACHE_SET_ASSOC-1:0]; + + for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks + // Data RAM + sram #( + .USER_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH), + .DATA_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN), + .USER_EN (ariane_pkg::DATA_USER_EN), + .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS) + ) i_data_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (bank_req[k]), + .we_i (bank_we[k]), + .addr_i (bank_idx[k]), + .wuser_i(bank_wuser[k]), + .wdata_i(bank_wdata[k]), + .be_i (bank_be[k]), + .ruser_o(bank_ruser[k]), + .rdata_o(bank_rdata[k]) + ); + end + + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_srams + + assign tag_rdata[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0]; + assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH]; + + // Tag RAM + sram #( + // tag + valid bit + .DATA_WIDTH(ariane_pkg::DCACHE_TAG_WIDTH + 1), + .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS) + ) i_tag_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (vld_req[i]), + .we_i (vld_we), + .addr_i (vld_addr), + .wuser_i('0), + .wdata_i({vld_wdata[i], wr_cl_tag_i}), + .be_i ('1), + .ruser_o(), + .rdata_o(vld_tag_rdata[i]) + ); + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + bank_idx_q <= '0; + bank_off_q <= '0; + vld_sel_q <= '0; + cmp_en_q <= '0; + end else begin + bank_idx_q <= bank_idx_d; + bank_off_q <= bank_off_d; + vld_sel_q <= vld_sel_d; + cmp_en_q <= cmp_en_d; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + initial begin + cach_line_width_axi : + assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth) + else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width"); + end + + initial begin + axi_xlen : + assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN) + else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN"); + end + + initial begin + cach_line_width_xlen : + assert (DCACHE_LINE_WIDTH > riscv::XLEN) + else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN"); + end + + hit_hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0( + rd_hit_oh_o + )) + else $fatal(1, "[l1 dcache] rd_hit_oh_o signal must be hot1"); + + word_write_hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i)) + else $fatal(1, "[l1 dcache] wr_req_i signal must be hot1"); + + wbuffer_hit_hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0( + wbuffer_hit_oh + )) + else $fatal(1, "[l1 dcache] wbuffer_hit_oh signal must be hot1"); + + // this is only used for verification! + logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0]; + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0]; + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror + if (!rst_ni) begin + vld_mirror <= '{default: '0}; + tag_mirror <= '{default: '0}; + end else begin + for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin + if (vld_req[i] & vld_we) begin + vld_mirror[vld_addr][i] <= vld_wdata[i]; + tag_mirror[vld_addr][i] <= wr_cl_tag_i; + end + end + end + end + + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test + assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata); + end + + tag_write_duplicate : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test)) + else $fatal(1, "[l1 dcache] cannot allocate a CL that is already present in the cache"); + +`endif + //pragma translate_on + +endmodule // wt_dcache_mem diff --git a/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv b/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv new file mode 100644 index 00000000..3e06a92e --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache_missunit.sv @@ -0,0 +1,645 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: miss controller for WT dcache. Note that the current assumption +// is that the port with the highest index issues writes instead of reads. + + +module wt_dcache_missunit + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs + parameter int unsigned NumPorts = 4 // number of miss ports +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // cache management, signals from/to core + input logic enable_i, // from CSR + input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache + output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic miss_o, // we missed on a ld/st + // local cache management signals + input logic wbuffer_empty_i, + output logic cache_en_o, // local cache enable signal + // AMO interface + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // miss handling interface (ld, ptw, wbuffer) + input logic [NumPorts-1:0] miss_req_i, + output logic [NumPorts-1:0] miss_ack_o, + input logic [NumPorts-1:0] miss_nc_i, + input logic [NumPorts-1:0] miss_we_i, + input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i, + input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i, + input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i, + input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i, + input logic [NumPorts-1:0][2:0] miss_size_i, + input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID + // signals that the request collided with a pending read + output logic [NumPorts-1:0] miss_replay_o, + // signals response from memory + output logic [NumPorts-1:0] miss_rtrn_vld_o, + output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads + // from writebuffer + input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations + input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations + // write interface to cache memory + output logic wr_cl_vld_o, // writes a full cacheline + output logic wr_cl_nc_o, // writes a full cacheline + output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline + output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o, + output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o, + output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o, + output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o, + output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o, + output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o, + output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o, + // memory interface + input logic mem_rtrn_vld_i, + input dcache_rtrn_t mem_rtrn_i, + output logic mem_data_req_o, + input logic mem_data_ack_i, + output dcache_req_t mem_data_o +); + + // functions + function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh( + input logic [L1D_WAY_WIDTH-1:0] in); + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + // align the physical address to the specified size: + // 000: bytes + // 001: hword + // 010: word + // 011: dword + // 111: DCACHE line + function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(input logic [riscv::PLEN-1:0] paddr, + input logic [2:0] size); + logic [riscv::PLEN-1:0] out; + out = paddr; + unique case (size) + 3'b001: out[0:0] = '0; + 3'b010: out[1:0] = '0; + 3'b011: out[2:0] = '0; + 3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0; + default: ; + endcase + return out; + endfunction : paddrSizeAlign + + // controller FSM + typedef enum logic [2:0] { + IDLE, + DRAIN, + AMO, + FLUSH, + STORE_WAIT, + LOAD_WAIT, + AMO_WAIT + } state_e; + state_e state_d, state_q; + + // MSHR for reads + typedef struct packed { + logic [riscv::PLEN-1:0] paddr; + logic [2:0] size; + logic [DCACHE_SET_ASSOC-1:0] vld_bits; + logic [CACHE_ID_WIDTH-1:0] id; + logic nc; + logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way; + logic [$clog2(NumPorts)-1:0] miss_port_idx; + } mshr_t; + + mshr_t mshr_d, mshr_q; + logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way; + logic mshr_vld_d, mshr_vld_q, mshr_vld_q1; + logic mshr_allocate; + logic update_lfsr, all_ways_valid; + + logic enable_d, enable_q; + logic flush_ack_d, flush_ack_q; + logic flush_en, flush_done; + logic mask_reads, lock_reqs; + logic amo_sel, miss_is_write; + logic amo_req_d, amo_req_q; + logic [63:0] amo_rtrn_mux; + riscv::xlen_t amo_data, amo_data_a, amo_data_b; + riscv::xlen_t amo_user; //DCACHE USER ? DATA_USER_WIDTH + logic [riscv::PLEN-1:0] tmp_paddr; + logic [$clog2(NumPorts)-1:0] miss_port_idx; + logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q; + logic [NumPorts-1:0] miss_req_masked_d, miss_req_masked_q; + + logic inv_vld, inv_vld_all, cl_write_en; + logic load_ack, store_ack, amo_ack; + + logic [NumPorts-1:0] mshr_rdrd_collision_d, mshr_rdrd_collision_q; + logic [NumPorts-1:0] mshr_rdrd_collision; + logic tx_rdwr_collision, mshr_rdwr_collision; + + /////////////////////////////////////////////////////// + // input arbitration and general control sigs + /////////////////////////////////////////////////////// + + assign cache_en_o = enable_q; + assign cnt_d = (flush_en) ? cnt_q + 1 : '0; + assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS - 1); + + assign miss_req_masked_d = (lock_reqs) ? miss_req_masked_q : + (mask_reads) ? miss_we_i & miss_req_i : miss_req_i; + assign miss_is_write = miss_we_i[miss_port_idx]; + + // read port arbiter + lzc #( + .WIDTH(NumPorts) + ) i_lzc_reqs ( + .in_i (miss_req_masked_d), + .cnt_o (miss_port_idx), + .empty_o() + ); + + always_comb begin : p_ack + miss_ack_o = '0; + if (!amo_sel) begin + miss_ack_o[miss_port_idx] = mem_data_ack_i & mem_data_req_o; + end + end + + /////////////////////////////////////////////////////// + // MSHR and way replacement logic (only for read ops) + /////////////////////////////////////////////////////// + + // find invalid cache line + lzc #( + .WIDTH(ariane_pkg::DCACHE_SET_ASSOC) + ) i_lzc_inv ( + .in_i (~miss_vld_bits_i[miss_port_idx]), + .cnt_o (inv_way), + .empty_o(all_ways_valid) + ); + + // generate random cacheline index + lfsr #( + .LfsrWidth(8), + .OutWidth ($clog2(ariane_pkg::DCACHE_SET_ASSOC)) + ) i_lfsr_inv ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .en_i (update_lfsr), + .out_o (rnd_way) + ); + + assign repl_way = (all_ways_valid) ? rnd_way : inv_way; + + assign mshr_d.size = (mshr_allocate) ? miss_size_i[miss_port_idx] : mshr_q.size; + assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i[miss_port_idx] : mshr_q.paddr; + assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits; + assign mshr_d.id = (mshr_allocate) ? miss_id_i[miss_port_idx] : mshr_q.id; + assign mshr_d.nc = (mshr_allocate) ? miss_nc_i[miss_port_idx] : mshr_q.nc; + assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way; + assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx; + + // currently we only have one outstanding read TX, hence an incoming load clears the MSHR + assign mshr_vld_d = (mshr_allocate) ? 1'b1 : (load_ack) ? 1'b0 : mshr_vld_q; + + assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0; + + + for (genvar k = 0; k < NumPorts; k++) begin : gen_rdrd_collision + assign mshr_rdrd_collision[k] = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1); + assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k]; + end + + // read/write collision, stalls the corresponding request + // write port[NumPorts-1] collides with MSHR_Q + assign mshr_rdwr_collision = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && mshr_vld_q; + + // read collides with inflight TX + always_comb begin : p_tx_coll + tx_rdwr_collision = 1'b0; + for (int k = 0; k < DCACHE_MAX_TX; k++) begin + tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k]; + end + end + + /////////////////////////////////////////////////////// + // to memory + /////////////////////////////////////////////////////// + + // if size = 32bit word, select appropriate offset, replicate for openpiton... + + if (CVA6Cfg.RVA) begin + if (riscv::IS_XLEN64) begin : gen_amo_64b_data + assign amo_data_a = {amo_req_i.operand_b[0+:32], amo_req_i.operand_b[0+:32]}; + assign amo_data_b = amo_req_i.operand_b; + end else begin : gen_amo_32b_data + assign amo_data_a = amo_req_i.operand_b[0+:32]; + end + end + + always_comb begin + if (CVA6Cfg.RVA) begin + if (riscv::IS_XLEN64) begin + if (amo_req_i.size == 2'b10) begin + amo_data = amo_data_a; + end else begin + amo_data = amo_data_b; + end + end else begin + amo_data = amo_data_a; + end + if (ariane_pkg::DATA_USER_EN) begin + amo_user = amo_data; + end else begin + amo_user = '0; + end + end + end + + if (CVA6Cfg.RVA) begin + // note: openpiton returns a full cacheline! + if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_rtrn_mux + if (CVA6Cfg.AxiDataWidth > 64) begin + assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2( + CVA6Cfg.AxiDataWidth/8 + )-1:3]*64+:64]; + end else begin + assign amo_rtrn_mux = mem_rtrn_i.data[0+:64]; + end + end else begin : gen_piton_rtrn_mux + assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64+:64]; + end + + // always sign extend 32bit values + assign amo_resp_o.result = (amo_req_i.size==2'b10) ? {{32{amo_rtrn_mux[amo_req_i.operand_a[2]*32 + 31]}},amo_rtrn_mux[amo_req_i.operand_a[2]*32 +: 32]} : + amo_rtrn_mux ; + assign amo_req_d = amo_req_i.req; + end + + // outgoing memory requests (AMOs are always uncached) + assign mem_data_o.tid = (CVA6Cfg.RVA && amo_sel) ? AmoTxId : miss_id_i[miss_port_idx]; + assign mem_data_o.nc = (CVA6Cfg.RVA && amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx]; + assign mem_data_o.way = (CVA6Cfg.RVA && amo_sel) ? '0 : repl_way; + assign mem_data_o.data = (CVA6Cfg.RVA && amo_sel) ? amo_data : miss_wdata_i[miss_port_idx]; + assign mem_data_o.user = (CVA6Cfg.RVA && amo_sel) ? amo_user : miss_wuser_i[miss_port_idx]; + assign mem_data_o.size = (CVA6Cfg.RVA && amo_sel) ? {1'b0, amo_req_i.size} : miss_size_i [miss_port_idx]; + assign mem_data_o.amo_op = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.amo_op : AMO_NONE; + + assign tmp_paddr = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx]; + assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size); + + /////////////////////////////////////////////////////// + // back-off mechanism for LR/SC completion guarantee + /////////////////////////////////////////////////////// + + logic sc_fail, sc_pass, sc_backoff_over; + exp_backoff #( + .Seed (3), + .MaxExp(16) + ) i_exp_backoff ( + .clk_i, + .rst_ni, + .set_i (sc_fail), + .clr_i (sc_pass), + .is_zero_o(sc_backoff_over) + ); + + /////////////////////////////////////////////////////// + // responses from memory + /////////////////////////////////////////////////////// + + // keep track of pending stores + logic store_sent; + logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q; + assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ); + + assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q : + (store_ack) ? stores_inflight_q - 1 : + (store_sent) ? stores_inflight_q + 1 : + stores_inflight_q; + + // incoming responses + always_comb begin : p_rtrn_logic + load_ack = 1'b0; + store_ack = 1'b0; + amo_ack = 1'b0; + inv_vld = 1'b0; + inv_vld_all = 1'b0; + sc_fail = 1'b0; + sc_pass = 1'b0; + miss_rtrn_vld_o = '0; + if (mem_rtrn_vld_i) begin + unique case (mem_rtrn_i.rtype) + DCACHE_LOAD_ACK: begin + if (mshr_vld_q) begin + load_ack = 1'b1; + miss_rtrn_vld_o[mshr_q.miss_port_idx] = 1'b1; + end + end + DCACHE_STORE_ACK: begin + if (stores_inflight_q > 0) begin + store_ack = 1'b1; + miss_rtrn_vld_o[NumPorts-1] = 1'b1; + end + end + DCACHE_ATOMIC_ACK: begin + if (CVA6Cfg.RVA) begin + if (amo_req_q) begin + amo_ack = 1'b1; + // need to set SC backoff counter if + // this op failed + if (amo_req_i.amo_op == AMO_SC) begin + if (amo_resp_o.result > 0) begin + sc_fail = 1'b1; + end else begin + sc_pass = 1'b1; + end + end + end + end + end + DCACHE_INV_REQ: begin + inv_vld = mem_rtrn_i.inv.vld | mem_rtrn_i.inv.all; + inv_vld_all = mem_rtrn_i.inv.all; + end + // TODO: + // DCACHE_INT_REQ: begin + // end + default: begin + end + endcase + end + end + + // to write buffer + assign miss_rtrn_id_o = mem_rtrn_i.tid; + + /////////////////////////////////////////////////////// + // writes to cache memory + /////////////////////////////////////////////////////// + + // cacheline write port + assign wr_cl_nc_o = mshr_q.nc; + assign wr_cl_vld_o = load_ack | (|wr_cl_we_o); + + assign wr_cl_we_o = (flush_en) ? '1 : (inv_vld_all) ? '1 : (inv_vld) ? dcache_way_bin2oh( + mem_rtrn_i.inv.way + ) : (cl_write_en) ? dcache_way_bin2oh( + mshr_q.repl_way + ) : '0; + + assign wr_vld_bits_o = (flush_en) ? '0 : (inv_vld) ? '0 : (cl_write_en) ? dcache_way_bin2oh( + mshr_q.repl_way + ) : '0; + + assign wr_cl_idx_o = (flush_en) ? cnt_q : + (inv_vld) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : + mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; + + assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; + assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign wr_cl_data_o = mem_rtrn_i.data; + assign wr_cl_user_o = mem_rtrn_i.user; + assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory + + // only non-NC responses write to the cache + assign cl_write_en = load_ack & ~mshr_q.nc; + + /////////////////////////////////////////////////////// + // main control logic for generating tx + /////////////////////////////////////////////////////// + + always_comb begin : p_fsm + // default assignment + state_d = state_q; + + flush_ack_o = 1'b0; + mem_data_o.rtype = DCACHE_LOAD_REQ; + mem_data_req_o = 1'b0; + amo_resp_o.ack = 1'b0; + miss_replay_o = '0; + + // disabling cache is possible anytime, enabling goes via flush + enable_d = enable_q & enable_i; + flush_ack_d = flush_ack_q; + flush_en = 1'b0; + amo_sel = 1'b0; + update_lfsr = 1'b0; + mshr_allocate = 1'b0; + lock_reqs = 1'b0; + mask_reads = mshr_vld_q; + + // interfaces + unique case (state_q) + ////////////////////////////////// + // wait for misses / amo ops + IDLE: begin + if (flush_i || (enable_i && !enable_q)) begin + if (wbuffer_empty_i && !mshr_vld_q) begin + flush_ack_d = flush_i; + state_d = FLUSH; + end else begin + state_d = DRAIN; + end + end else if (CVA6Cfg.RVA && amo_req_i.req) begin + if (wbuffer_empty_i && !mshr_vld_q) begin + state_d = AMO; + end else begin + state_d = DRAIN; + end + // we've got a miss to handle + end else if (|miss_req_masked_d) begin + // this is a write miss, just pass through (but check whether write collides with MSHR) + if (miss_is_write) begin + // stall in case this write collides with the MSHR address + if (!mshr_rdwr_collision) begin + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_STORE_REQ; + if (!mem_data_ack_i) begin + state_d = STORE_WAIT; + end + end + // this is a read miss, can only allocate 1 MSHR + // in case of a load_ack we can accept a new miss, since the MSHR is being cleared + end else if (!mshr_vld_q || load_ack) begin + // replay the read request in case the address has collided with MSHR during the time the request was pending + // i.e., the cache state may have been updated in the mean time due to a refill at the same CL address + if (mshr_rdrd_collision_d[miss_port_idx]) begin + miss_replay_o[miss_port_idx] = 1'b1; + // stall in case this CL address overlaps with a write TX that is in flight + end else if (!tx_rdwr_collision) begin + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_LOAD_REQ; + update_lfsr = all_ways_valid & mem_data_ack_i; // need to evict a random way + mshr_allocate = mem_data_ack_i; + if (!mem_data_ack_i) begin + state_d = LOAD_WAIT; + end + end + end + end + end + ////////////////////////////////// + // wait until this request is acked + STORE_WAIT: begin + lock_reqs = 1'b1; + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_STORE_REQ; + if (mem_data_ack_i) begin + state_d = IDLE; + end + end + ////////////////////////////////// + // wait until this request is acked + LOAD_WAIT: begin + lock_reqs = 1'b1; + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_LOAD_REQ; + if (mem_data_ack_i) begin + update_lfsr = all_ways_valid; // need to evict a random way + mshr_allocate = 1'b1; + state_d = IDLE; + end + end + ////////////////////////////////// + // only handle stores, do not accept new read requests + // wait until MSHR is cleared and wbuffer is empty + DRAIN: begin + mask_reads = 1'b1; + // these are writes, check whether they collide with MSHR + if (|miss_req_masked_d && !mshr_rdwr_collision) begin + mem_data_req_o = 1'b1; + mem_data_o.rtype = DCACHE_STORE_REQ; + end + + if (wbuffer_empty_i && !mshr_vld_q) begin + state_d = IDLE; + end + end + ////////////////////////////////// + // flush the cache + FLUSH: begin + // internal flush signal + flush_en = 1'b1; + if (flush_done) begin + state_d = IDLE; + flush_ack_o = flush_ack_q; + flush_ack_d = 1'b0; + enable_d = enable_i; + end + end + ////////////////////////////////// + // send out amo op request + AMO: begin + if (CVA6Cfg.RVA) begin + mem_data_o.rtype = DCACHE_ATOMIC_REQ; + amo_sel = 1'b1; + // if this is an LR, we need to consult the backoff counter + if ((amo_req_i.amo_op != AMO_LR) || sc_backoff_over) begin + mem_data_req_o = 1'b1; + if (mem_data_ack_i) begin + state_d = AMO_WAIT; + end + end + end + end + ////////////////////////////////// + // block and wait until AMO OP returns + AMO_WAIT: begin + if (CVA6Cfg.RVA) begin + amo_sel = 1'b1; + if (amo_ack) begin + amo_resp_o.ack = 1'b1; + state_d = IDLE; + end + end + end + ////////////////////////////////// + default: begin + // we should never get here + state_d = IDLE; + end + endcase // state_q + end + + /////////////////////////////////////////////////////// + // ff's + /////////////////////////////////////////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + state_q <= FLUSH; + cnt_q <= '0; + enable_q <= '0; + flush_ack_q <= '0; + mshr_vld_q <= '0; + mshr_vld_q1 <= '0; + mshr_q <= '0; + mshr_rdrd_collision_q <= '0; + miss_req_masked_q <= '0; + amo_req_q <= '0; + stores_inflight_q <= '0; + end else begin + state_q <= state_d; + cnt_q <= cnt_d; + enable_q <= enable_d; + flush_ack_q <= flush_ack_d; + mshr_vld_q <= mshr_vld_d; + mshr_vld_q1 <= mshr_vld_q; + mshr_q <= mshr_d; + mshr_rdrd_collision_q <= mshr_rdrd_collision_d; + miss_req_masked_q <= miss_req_masked_d; + amo_req_q <= amo_req_d; + stores_inflight_q <= stores_inflight_d; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + + read_tid : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id) + else $fatal(1, "[l1 dcache missunit] TID of load response doesn't match"); + + read_ports : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0) + else $fatal(1, "[l1 dcache missunit] only last port can issue write requests"); + + write_port : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1]) + else $fatal(1, "[l1 dcache missunit] last port can only issue write requests"); + + initial begin + // assert wrong parameterizations + assert (NumPorts >= 2) + else + $fatal( + 1, "[l1 dcache missunit] at least two ports are required (one read port, one write port)" + ); + end +`endif + //pragma translate_on + +endmodule // wt_dcache_missunit diff --git a/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv b/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv new file mode 100644 index 00000000..8e9c39d4 --- /dev/null +++ b/test/type_param/core/cache_subsystem/wt_dcache_wbuffer.sv @@ -0,0 +1,635 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 13.09.2018 +// Description: coalescing write buffer for WT dcache +// +// A couple of notes: +// +// 1) the write buffer behaves as a fully-associative cache, and is therefore coalescing. +// this cache is used by the cache readout logic to forward data to the load unit. +// +// each byte can be in the following states (valid/dirty/txblock): +// +// 0/0/0: invalid -> free entry in the buffer +// 1/1/0: valid and dirty, Byte is hence not part of TX in-flight +// 1/0/1: valid and not dirty, Byte is part of a TX in-flight +// 1/1/1: valid and, part of tx and dirty. this means that the byte has been +// overwritten while in TX and needs to be retransmitted once the write of that byte returns. +// 1/0/0: this would represent a clean state, but is never reached in the wbuffer in the current implementation. +// this is because when a TX returns, and the byte is in state [1/0/1], it is written to cache if needed and +// its state is immediately cleared to 0/x/x. +// +// this state is used to distinguish between bytes that have been written and not +// yet sent to the memory subsystem, and bytes that are part of a transaction. +// +// 2) further, each word in the write buffer has a cache states (checked, hit_oh) +// +// checked == 0: unknown cache state +// checked == 1: cache state has been looked up, valid way is stored in "hit_oh" +// +// cache invalidations/refills affecting a particular word will clear its word state to 0, +// so another lookup has to be done. note that these lookups are triggered as soon as there is +// a valid word with checked == 0 in the write buffer. +// +// 3) returning write ACKs trigger a cache update if the word is present in the cache, and evict that +// word from the write buffer. if the word is not allocated to the cache, it is just evicted from the write buffer. +// if the word cache state is VOID, the pipeline is stalled until it is clear whether that word is in the cache or not. +// +// 4) we handle NC writes using the writebuffer circuitry. upon an NC request, the writebuffer will first be drained. +// then, only the NC word is written into the write buffer and no further write requests are acknowledged until that +// word has been evicted from the write buffer. + + +module wt_dcache_wbuffer + import ariane_pkg::*; + import wt_cache_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input logic cache_en_i, // writes are treated as NC if disabled + output logic empty_o, // asserted if no data is present in write buffer + output logic not_ni_o, // asserted if no ni data is present in write buffer + // core request ports + input dcache_req_i_t req_port_i, + output dcache_req_o_t req_port_o, + // interface to miss handler + input logic miss_ack_i, + output logic [riscv::PLEN-1:0] miss_paddr_o, + output logic miss_req_o, + output logic miss_we_o, // always 1 here + output riscv::xlen_t miss_wdata_o, + output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, + output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0) + output logic miss_nc_o, // request to I/O space + output logic [2:0] miss_size_o, // + output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1) + // write responses from memory + input logic miss_rtrn_vld_i, + input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear + // cache read interface + output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later + output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o, + output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o, + output logic rd_req_o, // read the word at offset off_i[:3] in all ways + output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays + input logic rd_ack_i, + input riscv::xlen_t rd_data_i, // unused + input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused + input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i, + // cacheline writes + input logic wr_cl_vld_i, + input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i, + // cache word write interface + output logic [DCACHE_SET_ASSOC-1:0] wr_req_o, + input logic wr_ack_i, + output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o, + output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o, + output riscv::xlen_t wr_data_o, + output logic [(riscv::XLEN/8)-1:0] wr_data_be_o, + output logic [DCACHE_USER_WIDTH-1:0] wr_user_o, + // to forwarding logic and miss unit + output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o, + output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations + output logic [DCACHE_MAX_TX-1:0] tx_vld_o +); + + tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q; + wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q; + logic [DCACHE_WBUF_DEPTH-1:0] valid; + logic [DCACHE_WBUF_DEPTH-1:0] dirty; + logic [DCACHE_WBUF_DEPTH-1:0] tocheck; + logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit; + //logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty; + logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty; + + logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] + next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr; + logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id; + + logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off; + logic [(riscv::XLEN/8)-1:0] tx_be; + logic [riscv::PLEN-1:0] wr_paddr, rd_paddr, extract_tag; + logic [DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q; + logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q; + logic check_en_d, check_en_q, check_en_q1; + logic full, dirty_rd_en, rdy; + logic rtrn_empty, evict; + logic [DCACHE_WBUF_DEPTH-1:0] ni_pending_d, ni_pending_q; + logic wbuffer_wren; + logic free_tx_slots; + + logic wr_cl_vld_q, wr_cl_vld_d; + logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d; + + logic [riscv::PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0]; + + wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux; + + /////////////////////////////////////////////////////// + // misc + /////////////////////////////////////////////////////// + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag; + logic is_nc_miss; + logic is_ni; + assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH]; + assign is_nc_miss = !config_pkg::is_inside_cacheable_regions( + CVA6Cfg, + { + {64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}} + } + ); + assign miss_nc_o = !cache_en_i || is_nc_miss; + // Non-idempotent if request goes to NI region + assign is_ni = config_pkg::is_inside_nonidempotent_regions( + CVA6Cfg, + { + {64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, + req_port_i.address_tag, + {DCACHE_INDEX_WIDTH{1'b0}} + } + ); + + assign miss_we_o = 1'b1; + assign miss_vld_bits_o = '0; + assign wbuffer_data_o = wbuffer_q; + + for (genvar k = 0; k < DCACHE_MAX_TX; k++) begin : gen_tx_vld + assign tx_vld_o[k] = tx_stat_q[k].vld; + assign tx_paddr_o[k] = { + {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[tx_stat_q[k].ptr].wtag << riscv::XLEN_ALIGN_BYTES + }; + end + + /////////////////////////////////////////////////////// + // openpiton does not understand byte enable sigs + // need to convert to the four cases: + // 00: byte + // 01: halfword + // 10: word + // 11: dword + // non-contiguous writes need to be serialized! + // e.g. merged dwords with BE like this: 8'b01001100 + /////////////////////////////////////////////////////// + + // get byte offset + lzc #( + .WIDTH(riscv::XLEN / 8) + ) i_vld_bdirty ( + .in_i (bdirty[dirty_ptr]), + .cnt_o (bdirty_off), + .empty_o() + ); + + // add the offset to the physical base address of this buffer entry + assign miss_paddr_o = {wbuffer_dirty_mux.wtag, bdirty_off}; + assign miss_id_o = tx_id; + + // is there any dirty word to be transmitted, and is there a free TX slot? + assign miss_req_o = (|dirty) && free_tx_slots; + + // get size of aligned words, and the corresponding byte enables + // note: openpiton can only handle aligned offsets + size, and hence + // we have to split unaligned data into multiple transfers (see toSize64) + // e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000 + if (riscv::IS_XLEN64) begin : gen_size_64b + assign miss_size_o = {1'b0, toSize64(bdirty[dirty_ptr])}; + end else begin : gen_size_32b + assign miss_size_o = {1'b0, toSize32(bdirty[dirty_ptr])}; + end + + // replicate transfers shorter than a dword + assign miss_wdata_o = riscv::IS_XLEN64 ? repData64( + wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0] + ) : repData32( + wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0] + ); + if (ariane_pkg::DATA_USER_EN) begin + assign miss_wuser_o = riscv::IS_XLEN64 ? repData64( + wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0] + ) : repData32( + wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0] + ); + end else begin + assign miss_wuser_o = '0; + end + + assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8( + bdirty_off, miss_size_o[1:0] + ) : to_byte_enable4( + bdirty_off, miss_size_o[1:0] + ); + + /////////////////////////////////////////////////////// + // TX status registers and ID counters + /////////////////////////////////////////////////////// + + // TODO: todo: make this fall through if timing permits it + fifo_v3 #( + .FALL_THROUGH(1'b0), + .DATA_WIDTH ($clog2(DCACHE_MAX_TX)), + .DEPTH (DCACHE_MAX_TX) + ) i_rtrn_id_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (), + .empty_o (rtrn_empty), + .usage_o (), + .data_i (miss_rtrn_id_i), + .push_i (miss_rtrn_vld_i), + .data_o (rtrn_id), + .pop_i (evict) + ); + + always_comb begin : p_tx_stat + tx_stat_d = tx_stat_q; + evict = 1'b0; + wr_req_o = '0; + + // clear entry if it is clear whether it can be pushed to the cache or not + if ((!rtrn_empty) && wbuffer_q[rtrn_ptr].checked) begin + // check if data is clean and can be written, otherwise skip + // check if CL is present, otherwise skip + if ((|wr_data_be_o) && (|wbuffer_q[rtrn_ptr].hit_oh)) begin + wr_req_o = wbuffer_q[rtrn_ptr].hit_oh; + if (wr_ack_i) begin + evict = 1'b1; + tx_stat_d[rtrn_id].vld = 1'b0; + end + end else begin + evict = 1'b1; + tx_stat_d[rtrn_id].vld = 1'b0; + end + end + + // allocate a new entry + if (dirty_rd_en) begin + tx_stat_d[tx_id].vld = 1'b1; + tx_stat_d[tx_id].ptr = dirty_ptr; + tx_stat_d[tx_id].be = tx_be; + end + end + + assign free_tx_slots = |(~tx_vld_o); + + // next word to lookup in the cache + rr_arb_tree #( + .NumIn (DCACHE_MAX_TX), + .LockIn (1'b1), + .DataWidth(1) + ) i_tx_id_rr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (~tx_vld_o), + .gnt_o (), + .data_i ('0), + .gnt_i (dirty_rd_en), + .req_o (), + .data_o (), + .idx_o (tx_id) + ); + + /////////////////////////////////////////////////////// + // cache readout & update + /////////////////////////////////////////////////////// + + assign extract_tag = rd_paddr >> DCACHE_INDEX_WIDTH; + assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0]; + + // trigger TAG readout in cache + assign rd_tag_only_o = 1'b1; + assign rd_paddr = { + {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_check_mux.wtag << riscv::XLEN_ALIGN_BYTES + }; + assign rd_req_o = |tocheck; + assign rd_tag_o = rd_tag_q; //delay by one cycle + assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; + assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign check_en_d = rd_req_o & rd_ack_i; + + // cache update port + assign rtrn_ptr = tx_stat_q[rtrn_id].ptr; + // if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache + // when the TX returns + assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty); + assign wr_paddr = { + {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[rtrn_ptr].wtag << riscv::XLEN_ALIGN_BYTES + }; + assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; + assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign wr_data_o = wbuffer_q[rtrn_ptr].data; + assign wr_user_o = wbuffer_q[rtrn_ptr].user; + + + /////////////////////////////////////////////////////// + // readout of status bits, index calculation + /////////////////////////////////////////////////////// + + logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp; + + assign wr_cl_vld_d = wr_cl_vld_i; + assign wr_cl_idx_d = wr_cl_idx_i; + + for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_flags + // only for debug, will be pruned + if (CVA6Cfg.DebugEn) begin + assign debug_paddr[k] = { + {riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES + }; + end + + // dirty bytes that are ready for transmission. + // note that we cannot retransmit a word that is already in-flight + // since the multiple transactions might overtake each other in the memory system! + assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid; + + + assign dirty[k] = |bdirty[k]; + assign valid[k] = |wbuffer_q[k].valid; + assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]}); + + // checks if an invalidation/cache refill hits a particular word + // note: an invalidation can hit multiple words! + // need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal... + assign wtag_comp[k] = wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES-1:DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES]; + assign inval_hit[k] = (wr_cl_vld_d & valid[k] & (wtag_comp[k] == wr_cl_idx_d)) | + (wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q)); + + // these word have to be looked up in the cache + assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k]; + end + + assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr; + assign rdy = (|wbuffer_hit_oh) | (~full); + + // next free entry in the buffer + lzc #( + .WIDTH(DCACHE_WBUF_DEPTH) + ) i_vld_lzc ( + .in_i (~valid), + .cnt_o (next_ptr), + .empty_o(full) + ); + + // get index of hit + lzc #( + .WIDTH(DCACHE_WBUF_DEPTH) + ) i_hit_lzc ( + .in_i (wbuffer_hit_oh), + .cnt_o (hit_ptr), + .empty_o() + ); + + // next dirty word to serve + rr_arb_tree #( + .NumIn (DCACHE_WBUF_DEPTH), + .LockIn (1'b1), + .DataType(wbuffer_t) + ) i_dirty_rr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (dirty), + .gnt_o (), + .data_i (wbuffer_q), + .gnt_i (dirty_rd_en), + .req_o (), + .data_o (wbuffer_dirty_mux), + .idx_o (dirty_ptr) + ); + + // next word to lookup in the cache + rr_arb_tree #( + .NumIn (DCACHE_WBUF_DEPTH), + .DataType(wbuffer_t) + ) i_clean_rr ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (tocheck), + .gnt_o (), + .data_i (wbuffer_q), + .gnt_i (check_en_d), + .req_o (), + .data_o (wbuffer_check_mux), + .idx_o (check_ptr_d) + ); + + /////////////////////////////////////////////////////// + // update logic + /////////////////////////////////////////////////////// + + assign req_port_o.data_rvalid = '0; + assign req_port_o.data_rdata = '0; + assign req_port_o.data_ruser = '0; + assign req_port_o.data_rid = '0; + + assign rd_hit_oh_d = rd_hit_oh_i; + + logic ni_inside, ni_conflict; + assign ni_inside = |ni_pending_q; + assign ni_conflict = CVA6Cfg.NonIdemPotenceEn && is_ni && ni_inside; + assign not_ni_o = !ni_inside; + assign empty_o = !(|valid); + + // TODO: rewrite and separate into MUXES and write strobe logic + always_comb begin : p_buffer + wbuffer_d = wbuffer_q; + ni_pending_d = ni_pending_q; + dirty_rd_en = 1'b0; + req_port_o.data_gnt = 1'b0; + wbuffer_wren = 1'b0; + + // TAG lookup returns, mark corresponding word + if (check_en_q1) begin + if (|wbuffer_q[check_ptr_q1].valid) begin + wbuffer_d[check_ptr_q1].checked = 1'b1; + wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q; + end + end + + // if an invalidation or cache line refill comes in and hits on the write buffer, + // we have to discard our knowledge of the corresponding cacheline state + for (int k = 0; k < DCACHE_WBUF_DEPTH; k++) begin + if (inval_hit[k]) begin + wbuffer_d[k].checked = 1'b0; + end + end + + // once TX write response came back, we can clear the TX block. if it was not dirty, we + // can completely evict it - otherwise we have to leave it there for retransmission + if (evict) begin + for (int k = 0; k < (riscv::XLEN / 8); k++) begin + if (tx_stat_q[rtrn_id].be[k]) begin + wbuffer_d[rtrn_ptr].txblock[k] = 1'b0; + if (!wbuffer_q[rtrn_ptr].dirty[k]) begin + wbuffer_d[rtrn_ptr].valid[k] = 1'b0; + + // NOTE: this is not strictly needed, but makes it much + // easier to debug, since no invalid data remains in the buffer + // wbuffer_d[rtrn_ptr].data[k*8 +:8] = '0; + end + end + end + // if all bytes are evicted, clear the cache status flag + if (wbuffer_d[rtrn_ptr].valid == 0) begin + wbuffer_d[rtrn_ptr].checked = 1'b0; + ni_pending_d[rtrn_ptr] = 1'b0; + end + end + + // mark bytes sent out to the memory system + if (miss_req_o && miss_ack_i) begin + dirty_rd_en = 1'b1; + for (int k = 0; k < (riscv::XLEN / 8); k++) begin + if (tx_be[k]) begin + wbuffer_d[dirty_ptr].dirty[k] = 1'b0; + wbuffer_d[dirty_ptr].txblock[k] = 1'b1; + end + end + end + + // write new word into the buffer + if (req_port_i.data_req && rdy) begin + // in case we have an NI address, need to drain the buffer first + // in case we are serving an NI address, we block until it is written to memory + if (!ni_conflict) begin //empty of NI operations + wbuffer_wren = 1'b1; + + req_port_o.data_gnt = 1'b1; + ni_pending_d[wr_ptr] = is_ni; + + wbuffer_d[wr_ptr].checked = 1'b0; + wbuffer_d[wr_ptr].wtag = { + req_port_i.address_tag, + req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES] + }; + + // mark bytes as dirty + for (int k = 0; k < (riscv::XLEN / 8); k++) begin + if (req_port_i.data_be[k]) begin + wbuffer_d[wr_ptr].valid[k] = 1'b1; + wbuffer_d[wr_ptr].dirty[k] = 1'b1; + wbuffer_d[wr_ptr].data[k*8+:8] = req_port_i.data_wdata[k*8+:8]; + if (ariane_pkg::DATA_USER_EN) begin + wbuffer_d[wr_ptr].user[k*8+:8] = req_port_i.data_wuser[k*8+:8]; + end else begin + wbuffer_d[wr_ptr].user[k*8+:8] = '0; + end + end + end + end + end + end + + + /////////////////////////////////////////////////////// + // ff's + /////////////////////////////////////////////////////// + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + wbuffer_q <= '{default: '0}; + tx_stat_q <= '{default: '0}; + ni_pending_q <= '0; + check_ptr_q <= '0; + check_ptr_q1 <= '0; + check_en_q <= '0; + check_en_q1 <= '0; + rd_tag_q <= '0; + rd_hit_oh_q <= '0; + wr_cl_vld_q <= '0; + wr_cl_idx_q <= '0; + end else begin + wbuffer_q <= wbuffer_d; + tx_stat_q <= tx_stat_d; + ni_pending_q <= ni_pending_d; + check_ptr_q <= check_ptr_d; + check_ptr_q1 <= check_ptr_q; + check_en_q <= check_en_d; + check_en_q1 <= check_en_q; + rd_tag_q <= rd_tag_d; + rd_hit_oh_q <= rd_hit_oh_d; + wr_cl_vld_q <= wr_cl_vld_d; + wr_cl_idx_q <= wr_cl_idx_d; + end + end + + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + + hot1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0( + wbuffer_hit_oh + )) + else $fatal(1, "[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1"); + + tx_status : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) evict && miss_ack_i && miss_req_o |-> (tx_id != rtrn_id)) + else $fatal(1, "[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle"); + + tx_valid0 : + assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld) + else $fatal(1, "[l1 dcache wbuffer] evicting invalid transaction slot"); + + tx_valid1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid) + else $fatal(1, "[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid"); + + write_full : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((!full) || (|wbuffer_hit_oh))) + else $fatal(1, "[l1 dcache wbuffer] cannot write if full or no hit"); + + unused0 : + assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid) + else $fatal(1, "[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted"); + + unused1 : + assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req) + else $fatal(1, "[l1 dcache wbuffer] req_port_i.kill_req should not be asserted"); + + for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_assert1 + for (genvar j = 0; j < (riscv::XLEN / 8); j++) begin : gen_assert2 + byteStates : + assert property ( + @(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} ) + else + $fatal( + 1, + "[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b", + j, + k, + wbuffer_q[k].valid[j], + wbuffer_q[k].dirty[j], + wbuffer_q[k].txblock[j] + ); + end + end +`endif + //pragma translate_on + +endmodule // wt_dcache_wbuffer diff --git a/test/type_param/core/commit_stage.sv b/test/type_param/core/commit_stage.sv new file mode 100644 index 00000000..8f168be1 --- /dev/null +++ b/test/type_param/core/commit_stage.sv @@ -0,0 +1,298 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 15.04.2017 +// Description: Commits to the architectural state resulting from the scoreboard. + + +module commit_stage + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic halt_i, // request to halt the core + input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline + output exception_t exception_o, // take exception to controller + output logic dirty_fp_state_o, // mark the F state as dirty + input logic single_step_i, // we are in single step debug mode + // from scoreboard + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit + output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, // acknowledge that we are indeed committing + // to register file + output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address + output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data + output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable + output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable + // Atomic memory operations + input amo_resp_t amo_resp_i, // result of AMO operation + // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) + output logic [riscv::VLEN-1:0] pc_o, + // to/from CSR file + output fu_op csr_op_o, // decoded CSR operation + output riscv::xlen_t csr_wdata_o, // data to write to CSR + input riscv::xlen_t csr_rdata_i, // data to read from CSR + input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit) + output logic csr_write_fflags_o, // write the fflags CSR + // commit signals to ex + output logic commit_lsu_o, // commit the pending store + input logic commit_lsu_ready_i, // commit buffer of LSU is ready + output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port + output logic amo_valid_commit_o, // valid AMO in commit stage + input logic no_st_pending_i, // there is no store pending + output logic commit_csr_o, // commit the pending CSR instruction + output logic fence_i_o, // flush I$ and pipeline + output logic fence_o, // flush D$ and pipeline + output logic flush_commit_o, // request a pipeline flush + output logic sfence_vma_o // flush TLBs and pipeline +); + + // ila_0 i_ila_commit ( + // .clk(clk_i), // input wire clk + // .probe0(commit_instr_i[0].pc), // input wire [63:0] probe0 + // .probe1(commit_instr_i[1].pc), // input wire [63:0] probe1 + // .probe2(commit_instr_i[0].valid), // input wire [0:0] probe2 + // .probe3(commit_instr_i[1].valid), // input wire [0:0] probe3 + // .probe4(commit_ack_o[0]), // input wire [0:0] probe4 + // .probe5(commit_ack_o[0]), // input wire [0:0] probe5 + // .probe6(1'b0), // input wire [0:0] probe6 + // .probe7(1'b0), // input wire [0:0] probe7 + // .probe8(1'b0), // input wire [0:0] probe8 + // .probe9(1'b0) // input wire [0:0] probe9 + // ); + + for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr + assign waddr_o[i] = commit_instr_i[i].rd[4:0]; + end + + assign pc_o = commit_instr_i[0].pc; + // Dirty the FP state if we are committing anything related to the FPU + always_comb begin : dirty_fp_state + dirty_fp_state_o = 1'b0; + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + commit_instr_i[i].op + ))); + // Check if we issued a vector floating-point instruction to the accellerator + dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp; + end + end + + assign commit_tran_id_o = commit_instr_i[0].trans_id; + + logic instr_0_is_amo; + assign instr_0_is_amo = is_amo(commit_instr_i[0].op); + // ------------------- + // Commit Instruction + // ------------------- + // write register file or commit instruction in LSU or CSR Buffer + always_comb begin : commit + // default assignments + commit_ack_o[0] = 1'b0; + + amo_valid_commit_o = 1'b0; + + we_gpr_o[0] = 1'b0; + we_fpr_o = '{default: 1'b0}; + commit_lsu_o = 1'b0; + commit_csr_o = 1'b0; + // amos will commit on port 0 + wdata_o[0] = (CVA6Cfg.RVA && amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result; + csr_op_o = ADD; // this corresponds to a CSR NOP + csr_wdata_o = {riscv::XLEN{1'b0}}; + fence_i_o = 1'b0; + fence_o = 1'b0; + sfence_vma_o = 1'b0; + csr_write_fflags_o = 1'b0; + flush_commit_o = 1'b0; + + // we will not commit the instruction if we took an exception + // and we do not commit the instruction if we requested a halt + if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin + // we can definitely write the register file + // if the instruction is not committing anything the destination + commit_ack_o[0] = 1'b1; + if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin + we_fpr_o[0] = 1'b1; + end else begin + we_gpr_o[0] = 1'b1; + end + // check whether the instruction we retire was a store + if ((!CVA6Cfg.RVA && commit_instr_i[0].fu == STORE) || (CVA6Cfg.RVA && commit_instr_i[0].fu == STORE && !instr_0_is_amo)) begin + // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) + if (commit_lsu_ready_i) begin + commit_ack_o[0] = 1'b1; + commit_lsu_o = 1'b1; + // stall in case the store buffer is not able to accept anymore instructions + end else begin + commit_ack_o[0] = 1'b0; + end + end + // --------- + // FPU Flags + // --------- + if (CVA6Cfg.FpPresent) begin + if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin + // write the CSR with potential exception flags from retiring floating point instruction + csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]}; + csr_write_fflags_o = 1'b1; + commit_ack_o[0] = 1'b1; + end + end + // --------- + // CSR Logic + // --------- + // check whether the instruction we retire was a CSR instruction and it did not + // throw an exception + if (commit_instr_i[0].fu == CSR) begin + // write the CSR file + csr_op_o = commit_instr_i[0].op; + csr_wdata_o = commit_instr_i[0].result; + if (!csr_exception_i.valid) begin + commit_csr_o = 1'b1; + wdata_o[0] = csr_rdata_i; + commit_ack_o[0] = 1'b1; + end else begin + commit_ack_o[0] = 1'b0; + we_gpr_o[0] = 1'b0; + end + end + // ------------------ + // SFENCE.VMA Logic + // ------------------ + // sfence.vma is idempotent so we can safely re-execute it after returning + // from interrupt service routine + // check if this instruction was a SFENCE_VMA + if (CVA6Cfg.RVS && commit_instr_i[0].op == SFENCE_VMA) begin + // no store pending so we can flush the TLBs and pipeline + sfence_vma_o = no_st_pending_i; + // wait for the store buffer to drain until flushing the pipeline + commit_ack_o[0] = no_st_pending_i; + end + // ------------------ + // FENCE.I Logic + // ------------------ + // fence.i is idempotent so we can safely re-execute it after returning + // from interrupt service routine + // Fence synchronizes data and instruction streams. That means that we need to flush the private icache + // and the private dcache. This is the most expensive instruction. + if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && DCACHE_TYPE == int'(config_pkg::WB) && commit_instr_i[0].fu != STORE)) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the I$ + fence_i_o = no_st_pending_i; + end + // ------------------ + // FENCE Logic + // ------------------ + // fence is idempotent so we can safely re-execute it after returning + // from interrupt service routine + if (commit_instr_i[0].op == FENCE) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the D$ + fence_o = no_st_pending_i; + end + // ------------------ + // AMO + // ------------------ + if (CVA6Cfg.RVA && instr_0_is_amo) begin + // AMO finished + commit_ack_o[0] = amo_resp_i.ack; + // flush the pipeline + flush_commit_o = amo_resp_i.ack; + amo_valid_commit_o = 1'b1; + we_gpr_o[0] = amo_resp_i.ack; + end + end + + if (CVA6Cfg.NrCommitPorts > 1) begin + + commit_ack_o[1] = 1'b0; + we_gpr_o[1] = 1'b0; + wdata_o[1] = commit_instr_i[1].result; + + // ----------------- + // Commit Port 2 + // ----------------- + // check if the second instruction can be committed as well and the first wasn't a CSR instruction + // also if we are in single step mode don't retire the second instruction + if (commit_ack_o[0] && commit_instr_i[1].valid + && !halt_i + && !(commit_instr_i[0].fu inside {CSR}) + && !flush_dcache_i + && !instr_0_is_amo + && !single_step_i) begin + // only if the first instruction didn't throw an exception and this instruction won't throw an exception + // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC + if (!exception_o.valid && !commit_instr_i[1].ex.valid + && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin + + if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op)) we_fpr_o[1] = 1'b1; + else we_gpr_o[1] = 1'b1; + + commit_ack_o[1] = 1'b1; + + // additionally check if we are retiring an FPU instruction because we need to make sure that we write all + // exception flags + if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin + if (csr_write_fflags_o) + csr_wdata_o = { + {riscv::XLEN - 5{1'b0}}, + (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0]) + }; + else csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]}; + + csr_write_fflags_o = 1'b1; + end + end + end + end + end + + // ----------------------------- + // Exception & Interrupt Logic + // ----------------------------- + // here we know for sure that we are taking the exception + always_comb begin : exception_handling + // Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing + // priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30) + // interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here + exception_o.valid = 1'b0; + exception_o.cause = '0; + exception_o.tval = '0; + // we need a valid instruction in the commit stage + if (commit_instr_i[0].valid) begin + // ------------------------ + // check for CSR exception + // ------------------------ + if (csr_exception_i.valid) begin + exception_o = csr_exception_i; + // if no earlier exception happened the commit instruction will still contain + // the instruction bits from the ID stage. If a earlier exception happened we don't care + // as we will overwrite it anyway in the next IF bl + exception_o.tval = commit_instr_i[0].ex.tval; + end + // ------------------------ + // Earlier Exceptions + // ------------------------ + // but we give precedence to exceptions which happened earlier e.g.: instruction page + // faults for example + if (commit_instr_i[0].ex.valid) begin + exception_o = commit_instr_i[0].ex; + end + end + // Don't take any exceptions iff: + // - If we halted the processor + if (halt_i) begin + exception_o.valid = 1'b0; + end + end +endmodule diff --git a/test/type_param/core/compressed_decoder.sv b/test/type_param/core/compressed_decoder.sv new file mode 100644 index 00000000..c218a83d --- /dev/null +++ b/test/type_param/core/compressed_decoder.sv @@ -0,0 +1,935 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. // +// +// Author: Florian Zaruba - zarubaf@iis.ee.ethz.ch +// Engineer: Sven Stucki - svstucki@student.ethz.ch +// +// Design Name: Compressed instruction decoder +// Project Name: zero-riscy +// Language: SystemVerilog +// +// Description: Decodes RISC-V compressed instructions into their RV32 +// equivalent. This module is fully combinatorial. + + +module compressed_decoder #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic [31:0] instr_i, + output logic [31:0] instr_o, + output logic illegal_instr_o, + output logic is_compressed_o +); + + // ------------------- + // Compressed Decoder + // ------------------- + always_comb begin + illegal_instr_o = 1'b0; + instr_o = '0; + is_compressed_o = 1'b1; + instr_o = instr_i; + + // I: | imm[11:0] | rs1 | funct3 | rd | opcode | + // S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode | + unique case (instr_i[1:0]) + // C0 + riscv::OpcodeC0: begin + unique case (instr_i[15:13]) + riscv::OpcodeC0Addi4spn: begin + // c.addi4spn -> addi rd', x2, imm + instr_o = { + 2'b0, + instr_i[10:7], + instr_i[12:11], + instr_i[5], + instr_i[6], + 2'b00, + 5'h02, + 3'b000, + 2'b01, + instr_i[4:2], + riscv::OpcodeOpImm + }; + if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; + end + + riscv::OpcodeC0Fld: begin + if (CVA6Cfg.FpPresent) begin + // c.fld -> fld rd', imm(rs1') + // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | + instr_o = { + 4'b0, + instr_i[6:5], + instr_i[12:10], + 3'b000, + 2'b01, + instr_i[9:7], + 3'b011, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoadFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC0Lw: begin + // c.lw -> lw rd', imm(rs1') + instr_o = { + 5'b0, + instr_i[5], + instr_i[12:10], + instr_i[6], + 2'b00, + 2'b01, + instr_i[9:7], + 3'b010, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end + + riscv::OpcodeC0Ld: begin + // RV64 + // c.ld -> ld rd', imm(rs1') + // RV32 + // c.flw -> flw fprd', imm(rs1') + if (riscv::IS_XLEN64) begin + // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | + instr_o = { + 4'b0, + instr_i[6:5], + instr_i[12:10], + 3'b000, + 2'b01, + instr_i[9:7], + 3'b011, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end else begin + if (CVA6Cfg.FpPresent) begin + // CFLW: | funct3 (change to LW) | imm[5:3] | rs1' | imm[2|6] | rd' | C0 | + instr_o = { + 5'b0, + instr_i[5], + instr_i[12:10], + instr_i[6], + 2'b00, + 2'b01, + instr_i[9:7], + 3'b010, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoadFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + end + + riscv::OpcodeC0Zcb: begin + if (CVA6Cfg.RVZCB) begin + unique case (instr_i[12:10]) + 3'b000: begin + // c.lbu -> lbu rd', uimm(rs1') + instr_o = { + 10'b0, + instr_i[5], + instr_i[6], + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end + + 3'b001: begin + if (instr_i[6]) begin + // c.lh -> lh rd', uimm(rs1') + instr_o = { + 10'b0, + instr_i[5], + 1'b0, + 2'b01, + instr_i[9:7], + 3'b001, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end else begin + // c.lhu -> lhu rd', uimm(rs1') + instr_o = { + 10'b0, + instr_i[5], + 1'b0, + 2'b01, + instr_i[9:7], + 3'b101, + 2'b01, + instr_i[4:2], + riscv::OpcodeLoad + }; + end + end + + 3'b010: begin + // c.sb -> sb rs2', uimm(rs1') + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 3'b0, + instr_i[5], + instr_i[6], + riscv::OpcodeStore + }; + end + + 3'b011: begin + // c.sh -> sh rs2', uimm(rs1') + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b001, + 3'b0, + instr_i[5], + 1'b0, + riscv::OpcodeStore + }; + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + + end else begin + instr_o = instr_i; + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC0Fsd: begin + if (CVA6Cfg.FpPresent) begin + // c.fsd -> fsd rs2', imm(rs1') + instr_o = { + 4'b0, + instr_i[6:5], + instr_i[12], + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b011, + instr_i[11:10], + 3'b000, + riscv::OpcodeStoreFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC0Sw: begin + // c.sw -> sw rs2', imm(rs1') + instr_o = { + 5'b0, + instr_i[5], + instr_i[12], + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b010, + instr_i[11:10], + instr_i[6], + 2'b00, + riscv::OpcodeStore + }; + end + + riscv::OpcodeC0Sd: begin + // RV64 + // c.sd -> sd rs2', imm(rs1') + // RV32 + // c.fsw -> fsw fprs2', imm(rs1') + if (riscv::IS_XLEN64) begin + instr_o = { + 4'b0, + instr_i[6:5], + instr_i[12], + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b011, + instr_i[11:10], + 3'b000, + riscv::OpcodeStore + }; + end else begin + if (CVA6Cfg.FpPresent) begin + instr_o = { + 5'b0, + instr_i[5], + instr_i[12], + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b010, + instr_i[11:10], + instr_i[6], + 2'b00, + riscv::OpcodeStoreFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + // C1 + riscv::OpcodeC1: begin + unique case (instr_i[15:13]) + riscv::OpcodeC1Addi: begin + // c.addi -> addi rd, rd, nzimm + // c.nop -> addi 0, 0, 0 + instr_o = { + {6{instr_i[12]}}, + instr_i[12], + instr_i[6:2], + instr_i[11:7], + 3'b0, + instr_i[11:7], + riscv::OpcodeOpImm + }; + end + + + riscv::OpcodeC1Addiw: begin // or riscv::OpcodeC1Jal for RV32IC + if (riscv::IS_XLEN64) begin + // c.addiw -> addiw rd, rd, nzimm for RV64IC + if (instr_i[11:7] != 5'h0) begin // only valid if the destination is not r0 + instr_o = { + {6{instr_i[12]}}, + instr_i[12], + instr_i[6:2], + instr_i[11:7], + 3'b0, + instr_i[11:7], + riscv::OpcodeOpImm32 + }; + end else begin + illegal_instr_o = 1'b1; + end + end else begin + // c.jal -> jal x1, imm for RV32IC only + instr_o = { + instr_i[12], + instr_i[8], + instr_i[10:9], + instr_i[6], + instr_i[7], + instr_i[2], + instr_i[11], + instr_i[5:3], + {9{instr_i[12]}}, + 5'b1, + riscv::OpcodeJal + }; + + + + end + end + + riscv::OpcodeC1Li: begin + // c.li -> addi rd, x0, nzimm + instr_o = { + {6{instr_i[12]}}, + instr_i[12], + instr_i[6:2], + 5'b0, + 3'b0, + instr_i[11:7], + riscv::OpcodeOpImm + }; + end + + riscv::OpcodeC1LuiAddi16sp: begin + // c.lui -> lui rd, imm + instr_o = {{15{instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui}; + + if (instr_i[11:7] == 5'h02) begin + // c.addi16sp -> addi x2, x2, nzimm + instr_o = { + {3{instr_i[12]}}, + instr_i[4:3], + instr_i[5], + instr_i[2], + instr_i[6], + 4'b0, + 5'h02, + 3'b000, + 5'h02, + riscv::OpcodeOpImm + }; + end + + if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; + end + + riscv::OpcodeC1MiscAlu: begin + unique case (instr_i[11:10]) + 2'b00, 2'b01: begin + // 00: c.srli -> srli rd, rd, shamt + // 01: c.srai -> srai rd, rd, shamt + instr_o = { + 1'b0, + instr_i[10], + 4'b0, + instr_i[12], + instr_i[6:2], + 2'b01, + instr_i[9:7], + 3'b101, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end + + 2'b10: begin + // c.andi -> andi rd, rd, imm + instr_o = { + {6{instr_i[12]}}, + instr_i[12], + instr_i[6:2], + 2'b01, + instr_i[9:7], + 3'b111, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end + + 2'b11: begin + unique case ({ + instr_i[12], instr_i[6:5] + }) + 3'b000: begin + // c.sub -> sub rd', rd', rs2' + instr_o = { + 2'b01, + 5'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + + 3'b001: begin + // c.xor -> xor rd', rd', rs2' + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + + 3'b010: begin + // c.or -> or rd', rd', rs2' + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b110, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + + 3'b011: begin + // c.and -> and rd', rd', rs2' + instr_o = { + 7'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b111, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + + 3'b100: begin + if (riscv::IS_XLEN64) begin + // c.subw -> subw rd', rd', rs2' + instr_o = { + 2'b01, + 5'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp32 + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + 3'b101: begin + if (riscv::IS_XLEN64) begin + // c.addw -> addw rd', rd', rs2' + instr_o = { + 2'b00, + 5'b0, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp32 + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + 3'b110: begin + if (CVA6Cfg.RVZCB) begin + // c.mul -> mul rd', rd', rs2' + instr_o = { + 6'b0, + 1'b1, + 2'b01, + instr_i[4:2], + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end else begin + instr_o = instr_i; + illegal_instr_o = 1'b1; + end + end + + 3'b111: begin + if (CVA6Cfg.RVZCB) begin + + unique case (instr_i[4:2]) + 3'b000: begin + // c.zext.b -> andi rd', rd', 0xff + instr_o = { + 4'b0, + 8'hFF, + 2'b01, + instr_i[9:7], + 3'b111, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end + + 3'b001: begin + if (CVA6Cfg.RVB) begin + // c.sext.b -> sext.b rd', rd' + instr_o = { + 7'h30, + 5'h4, + 2'b01, + instr_i[9:7], + 3'b001, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end else illegal_instr_o = 1'b1; + end + + 3'b010: begin + if (CVA6Cfg.RVB) begin + // c.zext.h -> zext.h rd', rd' + if (riscv::IS_XLEN64) begin + instr_o = { + 7'h4, + 5'h0, + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp32 + }; + end else begin + instr_o = { + 7'h4, + 5'h0, + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp + }; + end + end else illegal_instr_o = 1'b1; + end + + 3'b011: begin + if (CVA6Cfg.RVB) begin + // c.sext.h -> sext.h rd', rd' + instr_o = { + 7'h30, + 5'h5, + 2'b01, + instr_i[9:7], + 3'b001, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end else illegal_instr_o = 1'b1; + end + + 3'b100: begin + if (CVA6Cfg.RVB) begin + // c.zext.w -> add.uw + if (riscv::IS_XLEN64) begin + instr_o = { + 7'h4, + 5'h0, + 2'b01, + instr_i[9:7], + 3'b000, + 2'b01, + instr_i[9:7], + riscv::OpcodeOp32 + }; + end else begin + illegal_instr_o = 1'b1; + end + end else illegal_instr_o = 1'b1; + end + + 3'b101: begin + // c.not -> xori rd', rd', -1 + instr_o = { + 12'hFFF, + 2'b01, + instr_i[9:7], + 3'b100, + 2'b01, + instr_i[9:7], + riscv::OpcodeOpImm + }; + end + + default: begin + instr_o = instr_i; + illegal_instr_o = 1; + end + endcase + end + end + endcase + end + endcase + end + + riscv::OpcodeC1J: begin + // 101: c.j -> jal x0, imm + instr_o = { + instr_i[12], + instr_i[8], + instr_i[10:9], + instr_i[6], + instr_i[7], + instr_i[2], + instr_i[11], + instr_i[5:3], + {9{instr_i[12]}}, + 4'b0, + ~instr_i[15], + riscv::OpcodeJal + }; + end + + riscv::OpcodeC1Beqz, riscv::OpcodeC1Bnez: begin + // 0: c.beqz -> beq rs1', x0, imm + // 1: c.bnez -> bne rs1', x0, imm + instr_o = { + {4{instr_i[12]}}, + instr_i[6:5], + instr_i[2], + 5'b0, + 2'b01, + instr_i[9:7], + 2'b00, + instr_i[13], + instr_i[11:10], + instr_i[4:3], + instr_i[12], + riscv::OpcodeBranch + }; + end + endcase + end + + // C2 + riscv::OpcodeC2: begin + unique case (instr_i[15:13]) + riscv::OpcodeC2Slli: begin + // c.slli -> slli rd, rd, shamt + instr_o = { + 6'b0, + instr_i[12], + instr_i[6:2], + instr_i[11:7], + 3'b001, + instr_i[11:7], + riscv::OpcodeOpImm + }; + end + + riscv::OpcodeC2Fldsp: begin + if (CVA6Cfg.FpPresent) begin + // c.fldsp -> fld rd, imm(x2) + instr_o = { + 3'b0, + instr_i[4:2], + instr_i[12], + instr_i[6:5], + 3'b000, + 5'h02, + 3'b011, + instr_i[11:7], + riscv::OpcodeLoadFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC2Lwsp: begin + // c.lwsp -> lw rd, imm(x2) + instr_o = { + 4'b0, + instr_i[3:2], + instr_i[12], + instr_i[6:4], + 2'b00, + 5'h02, + 3'b010, + instr_i[11:7], + riscv::OpcodeLoad + }; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end + + riscv::OpcodeC2Ldsp: begin + // RV64 + // c.ldsp -> ld rd, imm(x2) + // RV32 + // c.flwsp -> flw fprd, imm(x2) + if (riscv::IS_XLEN64) begin + instr_o = { + 3'b0, + instr_i[4:2], + instr_i[12], + instr_i[6:5], + 3'b000, + 5'h02, + 3'b011, + instr_i[11:7], + riscv::OpcodeLoad + }; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end else begin + if (CVA6Cfg.FpPresent) begin + instr_o = { + 4'b0, + instr_i[3:2], + instr_i[12], + instr_i[6:4], + 2'b00, + 5'h02, + 3'b010, + instr_i[11:7], + riscv::OpcodeLoadFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + end + + riscv::OpcodeC2JalrMvAdd: begin + if (instr_i[12] == 1'b0) begin + // c.mv -> add rd/rs1, x0, rs2 + instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOp}; + + if (instr_i[6:2] == 5'b0) begin + // c.jr -> jalr x0, rd/rs1, 0 + instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, riscv::OpcodeJalr}; + // rs1 != 0 + illegal_instr_o = (instr_i[11:7] != '0) ? 1'b0 : 1'b1; + end + end else begin + // c.add -> add rd, rd, rs2 + instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOp}; + + if (instr_i[6:2] == 5'b0) begin + if (instr_i[11:7] == 5'b0) begin + // c.ebreak -> ebreak + instr_o = {32'h00_10_00_73}; + end else begin + // c.jalr -> jalr x1, rs1, 0 + instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, riscv::OpcodeJalr}; + end + end + end + end + + riscv::OpcodeC2Fsdsp: begin + if (CVA6Cfg.FpPresent) begin + // c.fsdsp -> fsd rs2, imm(x2) + instr_o = { + 3'b0, + instr_i[9:7], + instr_i[12], + instr_i[6:2], + 5'h02, + 3'b011, + instr_i[11:10], + 3'b000, + riscv::OpcodeStoreFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + + riscv::OpcodeC2Swsp: begin + // c.swsp -> sw rs2, imm(x2) + instr_o = { + 4'b0, + instr_i[8:7], + instr_i[12], + instr_i[6:2], + 5'h02, + 3'b010, + instr_i[11:9], + 2'b00, + riscv::OpcodeStore + }; + end + + riscv::OpcodeC2Sdsp: begin + // RV64 + // c.sdsp -> sd rs2, imm(x2) + // RV32 + // c.fswsp -> fsw fprs2, imm(x2) + if (riscv::IS_XLEN64) begin + instr_o = { + 3'b0, + instr_i[9:7], + instr_i[12], + instr_i[6:2], + 5'h02, + 3'b011, + instr_i[11:10], + 3'b000, + riscv::OpcodeStore + }; + end else begin + if (CVA6Cfg.FpPresent) begin + instr_o = { + 4'b0, + instr_i[8:7], + instr_i[12], + instr_i[6:2], + 5'h02, + 3'b010, + instr_i[11:9], + 2'b00, + riscv::OpcodeStoreFp + }; + end else begin + illegal_instr_o = 1'b1; + end + end + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + // normal instruction + default: is_compressed_o = 1'b0; + endcase + + // Check if the instruction was illegal, if it was then output the offending instruction (zero-extended) + if (illegal_instr_o) begin + instr_o = instr_i; + end + end +endmodule diff --git a/test/type_param/core/controller.sv b/test/type_param/core/controller.sv new file mode 100644 index 00000000..c2db321c --- /dev/null +++ b/test/type_param/core/controller.sv @@ -0,0 +1,194 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.05.2017 +// Description: Flush controller + + +module controller + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + output logic set_pc_commit_o, // Set PC om PC Gen + output logic flush_if_o, // Flush the IF stage + output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard + output logic flush_id_o, // Flush ID stage + output logic flush_ex_o, // Flush EX stage + output logic flush_bp_o, // Flush branch predictors + output logic flush_icache_o, // Flush ICache + output logic flush_dcache_o, // Flush DCache + input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush + output logic flush_tlb_o, // Flush TLBs + + input logic halt_csr_i, // Halt request from CSR (WFI instruction) + input logic halt_acc_i, // Halt request from accelerator dispatcher + output logic halt_o, // Halt signal to commit stage + input logic eret_i, // Return from exception + input logic ex_valid_i, // We got an exception, flush the pipeline + input logic set_debug_pc_i, // set the debug pc from CSR + input bp_resolve_t resolved_branch_i, // We got a resolved branch, check if we need to flush the front-end + input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline + input logic fence_i_i, // fence.i in + input logic fence_i, // fence in + input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline + input logic flush_commit_i, // Flush request from commit stage + input logic flush_acc_i // Flush request from accelerator +); + + // active fence - high if we are currently flushing the dcache + logic fence_active_d, fence_active_q; + logic flush_dcache; + + // ------------ + // Flush CTRL + // ------------ + always_comb begin : flush_ctrl + fence_active_d = fence_active_q; + set_pc_commit_o = 1'b0; + flush_if_o = 1'b0; + flush_unissued_instr_o = 1'b0; + flush_id_o = 1'b0; + flush_ex_o = 1'b0; + flush_dcache = 1'b0; + flush_icache_o = 1'b0; + flush_tlb_o = 1'b0; + flush_bp_o = 1'b0; + // ------------ + // Mis-predict + // ------------ + // flush on mispredict + if (resolved_branch_i.is_mispredict) begin + // flush only un-issued instructions + flush_unissued_instr_o = 1'b1; + // and if stage + flush_if_o = 1'b1; + end + + // --------------------------------- + // FENCE + // --------------------------------- + if (fence_i) begin + // this can be seen as a CSR instruction with side-effect + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + // this is not needed in the case since we + // have a write-through cache in this case + if (DCACHE_TYPE == int'(config_pkg::WB)) begin + flush_dcache = 1'b1; + fence_active_d = 1'b1; + end + end + + // --------------------------------- + // FENCE.I + // --------------------------------- + if (fence_i_i) begin + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + flush_icache_o = 1'b1; + // this is not needed in the case since we + // have a write-through cache in this case + if (DCACHE_TYPE == int'(config_pkg::WB)) begin + flush_dcache = 1'b1; + fence_active_d = 1'b1; + end + end + + // this is not needed in the case since we + // have a write-through cache in this case + if (DCACHE_TYPE == int'(config_pkg::WB)) begin + // wait for the acknowledge here + if (flush_dcache_ack_i && fence_active_q) begin + fence_active_d = 1'b0; + // keep the flush dcache signal high as long as we didn't get the acknowledge from the cache + end else if (fence_active_q) begin + flush_dcache = 1'b1; + end + end + // --------------------------------- + // SFENCE.VMA + // --------------------------------- + if (CVA6Cfg.RVS && sfence_vma_i) begin + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + + flush_tlb_o = 1'b1; + end + + // Set PC to commit stage and flush pipeline + if (flush_csr_i || flush_acc_i) begin + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + end else if (CVA6Cfg.RVA && flush_commit_i) begin + set_pc_commit_o = 1'b1; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + end + + // --------------------------------- + // 1. Exception + // 2. Return from exception + // --------------------------------- + if (ex_valid_i || eret_i || (CVA6Cfg.DebugEn && set_debug_pc_i)) begin + // don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal + // for the PC Gen stage but instead tells it to take the PC we gave it + set_pc_commit_o = 1'b0; + flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; + flush_id_o = 1'b1; + flush_ex_o = 1'b1; + // this potentially reduces performance, but is needed + // to suppress speculative fetches to virtual memory from + // machine mode. TODO: remove when PMA checkers have been + // added to the system + flush_bp_o = 1'b1; + end + end + + // ---------------------- + // Halt Logic + // ---------------------- + always_comb begin + // halt the core if the fence is active + halt_o = halt_csr_i || halt_acc_i || (DCACHE_TYPE == int'(config_pkg::WB) && fence_active_q); + end + + // ---------------------- + // Registers + // ---------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + fence_active_q <= 1'b0; + flush_dcache_o <= 1'b0; + end else begin + fence_active_q <= fence_active_d; + // register on the flush signal, this signal might be critical + flush_dcache_o <= flush_dcache; + end + end +endmodule diff --git a/test/type_param/core/csr_buffer.sv b/test/type_param/core/csr_buffer.sv new file mode 100644 index 00000000..57be04dd --- /dev/null +++ b/test/type_param/core/csr_buffer.sv @@ -0,0 +1,76 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 05.05.2017 +// Description: Buffer to hold CSR address, this acts like a functional unit +// to the scoreboard. + + +module csr_buffer + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + + input fu_data_t fu_data_i, + + output logic csr_ready_o, // FU is ready e.g. not busy + input logic csr_valid_i, // Input is valid + output riscv::xlen_t csr_result_o, + input logic csr_commit_i, // commit the pending CSR OP + // to CSR file + output logic [11:0] csr_addr_o // CSR address to commit stage +); + // this is a single entry store buffer for the address of the CSR + // which we are going to need in the commit stage + struct packed { + logic [11:0] csr_address; + logic valid; + } + csr_reg_n, csr_reg_q; + + // control logic, scoreboard signals + assign csr_result_o = fu_data_i.operand_a; + assign csr_addr_o = csr_reg_q.csr_address; + + // write logic + always_comb begin : write + csr_reg_n = csr_reg_q; + // by default we are ready + csr_ready_o = 1'b1; + // if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready + if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) csr_ready_o = 1'b0; + // if we got a valid from the scoreboard + // store the CSR address + if (csr_valid_i) begin + csr_reg_n.csr_address = fu_data_i.operand_b[11:0]; + csr_reg_n.valid = 1'b1; + end + // if we get a commit and no new valid instruction -> clear the valid bit + if (csr_commit_i && ~csr_valid_i) begin + csr_reg_n.valid = 1'b0; + end + // clear the buffer if we flushed + if (flush_i) csr_reg_n.valid = 1'b0; + end + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + csr_reg_q <= '{default: 0}; + end else begin + csr_reg_q <= csr_reg_n; + end + end + +endmodule diff --git a/test/type_param/core/csr_regfile.sv b/test/type_param/core/csr_regfile.sv new file mode 100644 index 00000000..88c16444 --- /dev/null +++ b/test/type_param/core/csr_regfile.sv @@ -0,0 +1,1646 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 05.05.2017 +// Description: CSR Register File as specified by RISC-V + + +module csr_regfile + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int AsidWidth = 1, + parameter int unsigned MHPMCounterNum = 6 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic time_irq_i, // Timer threw a interrupt + // send a flush request out if a CSR with a side effect has changed (e.g. written) + output logic flush_o, + output logic halt_csr_o, // halt requested + // commit acknowledge + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // Commit acknowledged a instruction -> increase instret CSR + // Core and Cluster ID + input logic[riscv::VLEN-1:0] boot_addr_i, // Address from which to start booting, mtvec is set to the same address + input logic[riscv::XLEN-1:0] hart_id_i, // Hart id in a multicore environment (reflected in a CSR) + // we are taking an exception + input exception_t ex_i, // We've got an exception from the commit stage, take it + + input fu_op csr_op_i, // Operation to perform on the CSR file + input logic [11:0] csr_addr_i, // Address of the register to read/write + input logic [riscv::XLEN-1:0] csr_wdata_i, // Write data in + output logic [riscv::XLEN-1:0] csr_rdata_o, // Read data out + input logic dirty_fp_state_i, // Mark the FP sate as dirty + input logic csr_write_fflags_i, // Write fflags register e.g.: we are retiring a floating point instruction + input logic dirty_v_state_i, // Mark the V state as dirty + input logic [riscv::VLEN-1:0] pc_i, // PC of instruction accessing the CSR + output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege + // level or to write a read-only register also + // raises illegal instruction exceptions. + // Interrupts/Exceptions + output logic [riscv::VLEN-1:0] epc_o, // Output the exception PC to PC Gen, the correct CSR (mepc, sepc) is set accordingly + output logic eret_o, // Return from exception, set the PC of epc_o + output logic [riscv::VLEN-1:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec) + output riscv::priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in + // FP Imprecise exceptions + input logic [4:0] acc_fflags_ex_i, // Imprecise FP exception from the accelerator (fcsr.fflags format) + input logic acc_fflags_ex_valid_i, // An FP exception from the accelerator occurred + // FPU + output riscv::xs_t fs_o, // Floating point extension status + output logic [4:0] fflags_o, // Floating-Point Accured Exceptions + output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode + output logic [6:0] fprec_o, // Floating-Point Precision Control + // Vector extension + output riscv::xs_t vs_o, // Vector extension status + // Decoder + output irq_ctrl_t irq_ctrl_o, // interrupt management to id stage + // MMU + output logic en_translation_o, // enable VA translation + output logic en_ld_st_translation_o, // enable VA translation for load and stores + output riscv::priv_lvl_t ld_st_priv_lvl_o, // Privilege level at which load and stores should happen + output logic sum_o, + output logic mxr_o, + output logic [riscv::PPNW-1:0] satp_ppn_o, + output logic [AsidWidth-1:0] asid_o, + // external interrupts + input logic [1:0] irq_i, // external interrupt in + input logic ipi_i, // inter processor interrupt -> connected to machine mode sw + input logic debug_req_i, // debug request in + output logic set_debug_pc_o, + // Virtualization Support + output logic tvm_o, // trap virtual memory + output logic tw_o, // timeout wait + output logic tsr_o, // trap sret + output logic debug_mode_o, // we are in debug mode -> that will change some decoding + output logic single_step_o, // we are in single-step mode + // Caches + output logic icache_en_o, // L1 ICache Enable + output logic dcache_en_o, // L1 DCache Enable + // Accelerator + output logic acc_cons_en_o, // Accelerator memory consistent mode + // Performance Counter + output logic [11:0] perf_addr_o, // read/write address to performance counter module + output logic [riscv::XLEN-1:0] perf_data_o, // write data to performance counter module + input logic [riscv::XLEN-1:0] perf_data_i, // read data from performance counter module + output logic perf_we_o, + // PMPs + output riscv::pmpcfg_t [15:0] pmpcfg_o, // PMP configuration containing pmpcfg for max 16 PMPs + output logic [15:0][riscv::PLEN-3:0] pmpaddr_o, // PMP addresses + output logic [31:0] mcountinhibit_o +); + // internal signal to keep track of access exceptions + logic read_access_exception, update_access_exception, privilege_violation; + logic csr_we, csr_read; + riscv::xlen_t csr_wdata, csr_rdata; + riscv::priv_lvl_t trap_to_priv_lvl; + // register for enabling load store address translation, this is critical, hence the register + logic en_ld_st_translation_d, en_ld_st_translation_q; + logic mprv; + logic mret; // return from M-mode exception + logic sret; // return from S-mode exception + logic dret; // return from debug mode + // CSR write causes us to mark the FPU state as dirty + logic dirty_fp_state_csr; + riscv::mstatus_rv_t mstatus_q, mstatus_d; + riscv::xlen_t mstatus_extended; + riscv::satp_t satp_q, satp_d; + riscv::dcsr_t dcsr_q, dcsr_d; + riscv::csr_t csr_addr; + // privilege level register + riscv::priv_lvl_t priv_lvl_d, priv_lvl_q; + // we are in debug + logic debug_mode_q, debug_mode_d; + logic mtvec_rst_load_q; // used to determine whether we came out of reset + + riscv::xlen_t dpc_q, dpc_d; + riscv::xlen_t dscratch0_q, dscratch0_d; + riscv::xlen_t dscratch1_q, dscratch1_d; + riscv::xlen_t mtvec_q, mtvec_d; + riscv::xlen_t medeleg_q, medeleg_d; + riscv::xlen_t mideleg_q, mideleg_d; + riscv::xlen_t mip_q, mip_d; + riscv::xlen_t mie_q, mie_d; + riscv::xlen_t mcounteren_q, mcounteren_d; + riscv::xlen_t mscratch_q, mscratch_d; + riscv::xlen_t mepc_q, mepc_d; + riscv::xlen_t mcause_q, mcause_d; + riscv::xlen_t mtval_q, mtval_d; + logic fiom_d, fiom_q; + + riscv::xlen_t stvec_q, stvec_d; + riscv::xlen_t scounteren_q, scounteren_d; + riscv::xlen_t sscratch_q, sscratch_d; + riscv::xlen_t sepc_q, sepc_d; + riscv::xlen_t scause_q, scause_d; + riscv::xlen_t stval_q, stval_d; + riscv::xlen_t dcache_q, dcache_d; + riscv::xlen_t icache_q, icache_d; + riscv::xlen_t acc_cons_q, acc_cons_d; + + logic wfi_d, wfi_q; + + logic [63:0] cycle_q, cycle_d; + logic [63:0] instret_q, instret_d; + + riscv::pmpcfg_t [15:0] pmpcfg_q, pmpcfg_d; + logic [15:0][riscv::PLEN-3:0] pmpaddr_q, pmpaddr_d; + logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q; + logic [3:0] index; + + localparam riscv::xlen_t IsaCode = (riscv::XLEN'(CVA6Cfg.RVA) << 0) // A - Atomic Instructions extension + | (riscv::XLEN'(CVA6Cfg.RVB) << 1) // C - Bitmanip extension + | (riscv::XLEN'(CVA6Cfg.RVC) << 2) // C - Compressed extension + | (riscv::XLEN'(CVA6Cfg.RVD) << 3) // D - Double precision floating-point extension + | (riscv::XLEN'(CVA6Cfg.RVF) << 5) // F - Single precision floating-point extension + | (riscv::XLEN'(1) << 8) // I - RV32I/64I/128I base ISA + | (riscv::XLEN'(1) << 12) // M - Integer Multiply/Divide extension + | (riscv::XLEN'(0) << 13) // N - User level interrupts supported + | (riscv::XLEN'(CVA6Cfg.RVS) << 18) // S - Supervisor mode implemented + | (riscv::XLEN'(CVA6Cfg.RVU) << 20) // U - User mode implemented + | (riscv::XLEN'(CVA6Cfg.RVV) << 21) // V - Vector extension + | (riscv::XLEN'(CVA6Cfg.NSX) << 23) // X - Non-standard extensions present + | ((riscv::XLEN == 64 ? 2 : 1) << riscv::XLEN - 2); // MXL + + assign pmpcfg_o = pmpcfg_q[15:0]; + assign pmpaddr_o = pmpaddr_q; + + riscv::fcsr_t fcsr_q, fcsr_d; + // ---------------- + // Assignments + // ---------------- + assign csr_addr = riscv::csr_t'(csr_addr_i); + assign fs_o = mstatus_q.fs; + assign vs_o = mstatus_q.vs; + // ---------------- + // CSR Read logic + // ---------------- + assign mstatus_extended = riscv::IS_XLEN64 ? mstatus_q[riscv::XLEN-1:0] : + {mstatus_q.sd, mstatus_q.wpri3[7:0], mstatus_q[22:0]}; + + + always_comb begin : csr_read_process + // a read access exception can only occur if we attempt to read a CSR which does not exist + read_access_exception = 1'b0; + csr_rdata = '0; + perf_addr_o = csr_addr.address[11:0]; + index = '0; + + if (csr_read) begin + unique case (csr_addr.address) + riscv::CSR_FFLAGS: begin + if (CVA6Cfg.FpPresent) begin + csr_rdata = {{riscv::XLEN - 5{1'b0}}, fcsr_q.fflags}; + end else begin + read_access_exception = 1'b1; + end + end + riscv::CSR_FRM: begin + if (CVA6Cfg.FpPresent) begin + csr_rdata = {{riscv::XLEN - 3{1'b0}}, fcsr_q.frm}; + end else begin + read_access_exception = 1'b1; + end + end + riscv::CSR_FCSR: begin + if (CVA6Cfg.FpPresent) begin + csr_rdata = {{riscv::XLEN - 8{1'b0}}, fcsr_q.frm, fcsr_q.fflags}; + end else begin + read_access_exception = 1'b1; + end + end + // non-standard extension + riscv::CSR_FTRAN: begin + if (CVA6Cfg.FpPresent) begin + csr_rdata = {{riscv::XLEN - 7{1'b0}}, fcsr_q.fprec}; + end else begin + read_access_exception = 1'b1; + end + end + // debug registers + riscv::CSR_DCSR: + if (CVA6Cfg.DebugEn) csr_rdata = {{riscv::XLEN - 32{1'b0}}, dcsr_q}; + else read_access_exception = 1'b1; + riscv::CSR_DPC: + if (CVA6Cfg.DebugEn) csr_rdata = dpc_q; + else read_access_exception = 1'b1; + riscv::CSR_DSCRATCH0: + if (CVA6Cfg.DebugEn) csr_rdata = dscratch0_q; + else read_access_exception = 1'b1; + riscv::CSR_DSCRATCH1: + if (CVA6Cfg.DebugEn) csr_rdata = dscratch1_q; + else read_access_exception = 1'b1; + // trigger module registers + riscv::CSR_TSELECT: read_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA1: read_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA2: read_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA3: read_access_exception = 1'b1; // not implemented + // supervisor registers + riscv::CSR_SSTATUS: begin + if (CVA6Cfg.RVS) + csr_rdata = mstatus_extended & ariane_pkg::SMODE_STATUS_READ_MASK[riscv::XLEN-1:0]; + else read_access_exception = 1'b1; + end + riscv::CSR_SIE: + if (CVA6Cfg.RVS) csr_rdata = mie_q & mideleg_q; + else read_access_exception = 1'b1; + riscv::CSR_SIP: + if (CVA6Cfg.RVS) csr_rdata = mip_q & mideleg_q; + else read_access_exception = 1'b1; + riscv::CSR_STVEC: + if (CVA6Cfg.RVS) csr_rdata = stvec_q; + else read_access_exception = 1'b1; + riscv::CSR_SCOUNTEREN: + if (CVA6Cfg.RVS) csr_rdata = scounteren_q; + else read_access_exception = 1'b1; + riscv::CSR_SSCRATCH: + if (CVA6Cfg.RVS) csr_rdata = sscratch_q; + else read_access_exception = 1'b1; + riscv::CSR_SEPC: + if (CVA6Cfg.RVS) csr_rdata = sepc_q; + else read_access_exception = 1'b1; + riscv::CSR_SCAUSE: + if (CVA6Cfg.RVS) csr_rdata = scause_q; + else read_access_exception = 1'b1; + riscv::CSR_STVAL: + if (CVA6Cfg.RVS) csr_rdata = stval_q; + else read_access_exception = 1'b1; + riscv::CSR_SATP: begin + if (CVA6Cfg.RVS) begin + // intercept reads to SATP if in S-Mode and TVM is enabled + if (priv_lvl_o == riscv::PRIV_LVL_S && mstatus_q.tvm) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = satp_q; + end + end else begin + read_access_exception = 1'b1; + end + end + // machine mode registers + riscv::CSR_MSTATUS: csr_rdata = mstatus_extended; + riscv::CSR_MSTATUSH: + if (riscv::XLEN == 32) csr_rdata = '0; + else read_access_exception = 1'b1; + riscv::CSR_MISA: csr_rdata = IsaCode; + riscv::CSR_MEDELEG: + if (CVA6Cfg.RVS) csr_rdata = medeleg_q; + else read_access_exception = 1'b1; + riscv::CSR_MIDELEG: + if (CVA6Cfg.RVS) csr_rdata = mideleg_q; + else read_access_exception = 1'b1; + riscv::CSR_MIE: csr_rdata = mie_q; + riscv::CSR_MTVEC: csr_rdata = mtvec_q; + riscv::CSR_MCOUNTEREN: csr_rdata = mcounteren_q; + riscv::CSR_MSCRATCH: csr_rdata = mscratch_q; + riscv::CSR_MEPC: csr_rdata = mepc_q; + riscv::CSR_MCAUSE: csr_rdata = mcause_q; + riscv::CSR_MTVAL: csr_rdata = mtval_q; + riscv::CSR_MIP: csr_rdata = mip_q; + riscv::CSR_MENVCFG: csr_rdata = '0 | fiom_q; + riscv::CSR_MENVCFGH: begin + if (riscv::XLEN == 32) csr_rdata = '0; + else read_access_exception = 1'b1; + end + riscv::CSR_MVENDORID: csr_rdata = OPENHWGROUP_MVENDORID; + riscv::CSR_MARCHID: csr_rdata = ARIANE_MARCHID; + riscv::CSR_MIMPID: csr_rdata = '0; // not implemented + riscv::CSR_MHARTID: csr_rdata = hart_id_i; + riscv::CSR_MCONFIGPTR: csr_rdata = '0; // not implemented + riscv::CSR_MCOUNTINHIBIT: + csr_rdata = {{(riscv::XLEN - (MHPMCounterNum + 3)) {1'b0}}, mcountinhibit_q}; + // Counters and Timers + riscv::CSR_MCYCLE: csr_rdata = cycle_q[riscv::XLEN-1:0]; + riscv::CSR_MCYCLEH: + if (riscv::XLEN == 32) csr_rdata = cycle_q[63:32]; + else read_access_exception = 1'b1; + riscv::CSR_MINSTRET: csr_rdata = instret_q[riscv::XLEN-1:0]; + riscv::CSR_MINSTRETH: + if (riscv::XLEN == 32) csr_rdata = instret_q[63:32]; + else read_access_exception = 1'b1; + riscv::CSR_CYCLE: csr_rdata = cycle_q[riscv::XLEN-1:0]; + riscv::CSR_CYCLEH: + if (riscv::XLEN == 32) csr_rdata = cycle_q[63:32]; + else read_access_exception = 1'b1; + riscv::CSR_INSTRET: csr_rdata = instret_q[riscv::XLEN-1:0]; + riscv::CSR_INSTRETH: + if (riscv::XLEN == 32) csr_rdata = instret_q[63:32]; + else read_access_exception = 1'b1; + //Event Selector + riscv::CSR_MHPM_EVENT_3, + riscv::CSR_MHPM_EVENT_4, + riscv::CSR_MHPM_EVENT_5, + riscv::CSR_MHPM_EVENT_6, + riscv::CSR_MHPM_EVENT_7, + riscv::CSR_MHPM_EVENT_8, + riscv::CSR_MHPM_EVENT_9, + riscv::CSR_MHPM_EVENT_10, + riscv::CSR_MHPM_EVENT_11, + riscv::CSR_MHPM_EVENT_12, + riscv::CSR_MHPM_EVENT_13, + riscv::CSR_MHPM_EVENT_14, + riscv::CSR_MHPM_EVENT_15, + riscv::CSR_MHPM_EVENT_16, + riscv::CSR_MHPM_EVENT_17, + riscv::CSR_MHPM_EVENT_18, + riscv::CSR_MHPM_EVENT_19, + riscv::CSR_MHPM_EVENT_20, + riscv::CSR_MHPM_EVENT_21, + riscv::CSR_MHPM_EVENT_22, + riscv::CSR_MHPM_EVENT_23, + riscv::CSR_MHPM_EVENT_24, + riscv::CSR_MHPM_EVENT_25, + riscv::CSR_MHPM_EVENT_26, + riscv::CSR_MHPM_EVENT_27, + riscv::CSR_MHPM_EVENT_28, + riscv::CSR_MHPM_EVENT_29, + riscv::CSR_MHPM_EVENT_30, + riscv::CSR_MHPM_EVENT_31 : + csr_rdata = perf_data_i; + + riscv::CSR_MHPM_COUNTER_3, + riscv::CSR_MHPM_COUNTER_4, + riscv::CSR_MHPM_COUNTER_5, + riscv::CSR_MHPM_COUNTER_6, + riscv::CSR_MHPM_COUNTER_7, + riscv::CSR_MHPM_COUNTER_8, + riscv::CSR_MHPM_COUNTER_9, + riscv::CSR_MHPM_COUNTER_10, + riscv::CSR_MHPM_COUNTER_11, + riscv::CSR_MHPM_COUNTER_12, + riscv::CSR_MHPM_COUNTER_13, + riscv::CSR_MHPM_COUNTER_14, + riscv::CSR_MHPM_COUNTER_15, + riscv::CSR_MHPM_COUNTER_16, + riscv::CSR_MHPM_COUNTER_17, + riscv::CSR_MHPM_COUNTER_18, + riscv::CSR_MHPM_COUNTER_19, + riscv::CSR_MHPM_COUNTER_20, + riscv::CSR_MHPM_COUNTER_21, + riscv::CSR_MHPM_COUNTER_22, + riscv::CSR_MHPM_COUNTER_23, + riscv::CSR_MHPM_COUNTER_24, + riscv::CSR_MHPM_COUNTER_25, + riscv::CSR_MHPM_COUNTER_26, + riscv::CSR_MHPM_COUNTER_27, + riscv::CSR_MHPM_COUNTER_28, + riscv::CSR_MHPM_COUNTER_29, + riscv::CSR_MHPM_COUNTER_30, + riscv::CSR_MHPM_COUNTER_31 : + csr_rdata = perf_data_i; + + riscv::CSR_MHPM_COUNTER_3H, + riscv::CSR_MHPM_COUNTER_4H, + riscv::CSR_MHPM_COUNTER_5H, + riscv::CSR_MHPM_COUNTER_6H, + riscv::CSR_MHPM_COUNTER_7H, + riscv::CSR_MHPM_COUNTER_8H, + riscv::CSR_MHPM_COUNTER_9H, + riscv::CSR_MHPM_COUNTER_10H, + riscv::CSR_MHPM_COUNTER_11H, + riscv::CSR_MHPM_COUNTER_12H, + riscv::CSR_MHPM_COUNTER_13H, + riscv::CSR_MHPM_COUNTER_14H, + riscv::CSR_MHPM_COUNTER_15H, + riscv::CSR_MHPM_COUNTER_16H, + riscv::CSR_MHPM_COUNTER_17H, + riscv::CSR_MHPM_COUNTER_18H, + riscv::CSR_MHPM_COUNTER_19H, + riscv::CSR_MHPM_COUNTER_20H, + riscv::CSR_MHPM_COUNTER_21H, + riscv::CSR_MHPM_COUNTER_22H, + riscv::CSR_MHPM_COUNTER_23H, + riscv::CSR_MHPM_COUNTER_24H, + riscv::CSR_MHPM_COUNTER_25H, + riscv::CSR_MHPM_COUNTER_26H, + riscv::CSR_MHPM_COUNTER_27H, + riscv::CSR_MHPM_COUNTER_28H, + riscv::CSR_MHPM_COUNTER_29H, + riscv::CSR_MHPM_COUNTER_30H, + riscv::CSR_MHPM_COUNTER_31H : + if (riscv::XLEN == 32) csr_rdata = perf_data_i; + else read_access_exception = 1'b1; + + // Performance counters (User Mode - R/O Shadows) + riscv::CSR_HPM_COUNTER_3, + riscv::CSR_HPM_COUNTER_4, + riscv::CSR_HPM_COUNTER_5, + riscv::CSR_HPM_COUNTER_6, + riscv::CSR_HPM_COUNTER_7, + riscv::CSR_HPM_COUNTER_8, + riscv::CSR_HPM_COUNTER_9, + riscv::CSR_HPM_COUNTER_10, + riscv::CSR_HPM_COUNTER_11, + riscv::CSR_HPM_COUNTER_12, + riscv::CSR_HPM_COUNTER_13, + riscv::CSR_HPM_COUNTER_14, + riscv::CSR_HPM_COUNTER_15, + riscv::CSR_HPM_COUNTER_16, + riscv::CSR_HPM_COUNTER_17, + riscv::CSR_HPM_COUNTER_18, + riscv::CSR_HPM_COUNTER_19, + riscv::CSR_HPM_COUNTER_20, + riscv::CSR_HPM_COUNTER_21, + riscv::CSR_HPM_COUNTER_22, + riscv::CSR_HPM_COUNTER_23, + riscv::CSR_HPM_COUNTER_24, + riscv::CSR_HPM_COUNTER_25, + riscv::CSR_HPM_COUNTER_26, + riscv::CSR_HPM_COUNTER_27, + riscv::CSR_HPM_COUNTER_28, + riscv::CSR_HPM_COUNTER_29, + riscv::CSR_HPM_COUNTER_30, + riscv::CSR_HPM_COUNTER_31 : + csr_rdata = perf_data_i; + + riscv::CSR_HPM_COUNTER_3H, + riscv::CSR_HPM_COUNTER_4H, + riscv::CSR_HPM_COUNTER_5H, + riscv::CSR_HPM_COUNTER_6H, + riscv::CSR_HPM_COUNTER_7H, + riscv::CSR_HPM_COUNTER_8H, + riscv::CSR_HPM_COUNTER_9H, + riscv::CSR_HPM_COUNTER_10H, + riscv::CSR_HPM_COUNTER_11H, + riscv::CSR_HPM_COUNTER_12H, + riscv::CSR_HPM_COUNTER_13H, + riscv::CSR_HPM_COUNTER_14H, + riscv::CSR_HPM_COUNTER_15H, + riscv::CSR_HPM_COUNTER_16H, + riscv::CSR_HPM_COUNTER_17H, + riscv::CSR_HPM_COUNTER_18H, + riscv::CSR_HPM_COUNTER_19H, + riscv::CSR_HPM_COUNTER_20H, + riscv::CSR_HPM_COUNTER_21H, + riscv::CSR_HPM_COUNTER_22H, + riscv::CSR_HPM_COUNTER_23H, + riscv::CSR_HPM_COUNTER_24H, + riscv::CSR_HPM_COUNTER_25H, + riscv::CSR_HPM_COUNTER_26H, + riscv::CSR_HPM_COUNTER_27H, + riscv::CSR_HPM_COUNTER_28H, + riscv::CSR_HPM_COUNTER_29H, + riscv::CSR_HPM_COUNTER_30H, + riscv::CSR_HPM_COUNTER_31H : + if (riscv::XLEN == 32) csr_rdata = perf_data_i; + else read_access_exception = 1'b1; + + // custom (non RISC-V) cache control + riscv::CSR_DCACHE: csr_rdata = dcache_q; + riscv::CSR_ICACHE: csr_rdata = icache_q; + // custom (non RISC-V) accelerator memory consistency mode + riscv::CSR_ACC_CONS: begin + if (CVA6Cfg.EnableAccelerator) begin + csr_rdata = acc_cons_q; + end else begin + read_access_exception = 1'b1; + end + end + // PMPs + riscv::CSR_PMPCFG0: csr_rdata = pmpcfg_q[riscv::XLEN/8-1:0]; + riscv::CSR_PMPCFG1: + if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[7:4]; + else read_access_exception = 1'b1; + riscv::CSR_PMPCFG2: csr_rdata = pmpcfg_q[8+:riscv::XLEN/8]; + riscv::CSR_PMPCFG3: + if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[15:12]; + else read_access_exception = 1'b1; + // PMPADDR + riscv::CSR_PMPADDR0, + riscv::CSR_PMPADDR1, + riscv::CSR_PMPADDR2, + riscv::CSR_PMPADDR3, + riscv::CSR_PMPADDR4, + riscv::CSR_PMPADDR5, + riscv::CSR_PMPADDR6, + riscv::CSR_PMPADDR7, + riscv::CSR_PMPADDR8, + riscv::CSR_PMPADDR9, + riscv::CSR_PMPADDR10, + riscv::CSR_PMPADDR11, + riscv::CSR_PMPADDR12, + riscv::CSR_PMPADDR13, + riscv::CSR_PMPADDR14, + riscv::CSR_PMPADDR15: begin + // index is specified by the last byte in the address + index = csr_addr.csr_decode.address[3:0]; + // Important: we only support granularity 8 bytes (G=1) + // -> last bit of pmpaddr must be set 0/1 based on the mode: + // NA4, NAPOT: 1 + // TOR, OFF: 0 + if (pmpcfg_q[index].addr_mode[1] == 1'b1) csr_rdata = pmpaddr_q[index][riscv::PLEN-3:0]; + else csr_rdata = {pmpaddr_q[index][riscv::PLEN-3:1], 1'b0}; + end + default: read_access_exception = 1'b1; + endcase + end + end + // --------------------------- + // CSR Write and update logic + // --------------------------- + riscv::xlen_t mask; + always_comb begin : csr_update + automatic riscv::satp_t satp; + automatic logic [63:0] instret; + + + satp = satp_q; + instret = instret_q; + + mcountinhibit_d = mcountinhibit_q; + + // -------------------- + // Counters + // -------------------- + cycle_d = cycle_q; + instret_d = instret_q; + if (!debug_mode_q) begin + // increase instruction retired counter + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + if (commit_ack_i[i] && !ex_i.valid && !mcountinhibit_q[2]) instret++; + end + instret_d = instret; + // increment the cycle count + if (!mcountinhibit_q[0]) cycle_d = cycle_q + 1'b1; + else cycle_d = cycle_q; + end + + eret_o = 1'b0; + flush_o = 1'b0; + update_access_exception = 1'b0; + + set_debug_pc_o = 1'b0; + + perf_we_o = 1'b0; + perf_data_o = 'b0; + + fcsr_d = fcsr_q; + + priv_lvl_d = priv_lvl_q; + debug_mode_d = debug_mode_q; + dcsr_d = dcsr_q; + dpc_d = dpc_q; + dscratch0_d = dscratch0_q; + dscratch1_d = dscratch1_q; + mstatus_d = mstatus_q; + + // check whether we come out of reset + // this is a workaround. some tools have issues + // having boot_addr_i in the asynchronous + // reset assignment to mtvec_d, even though + // boot_addr_i will be assigned a constant + // on the top-level. + if (mtvec_rst_load_q) begin + mtvec_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, boot_addr_i} + 'h40; + end else begin + mtvec_d = mtvec_q; + end + + medeleg_d = medeleg_q; + mideleg_d = mideleg_q; + mip_d = mip_q; + mie_d = mie_q; + mepc_d = mepc_q; + mcause_d = mcause_q; + mcounteren_d = mcounteren_q; + mscratch_d = mscratch_q; + mtval_d = mtval_q; + fiom_d = fiom_q; + dcache_d = dcache_q; + icache_d = icache_q; + acc_cons_d = acc_cons_q; + + sepc_d = sepc_q; + scause_d = scause_q; + stvec_d = stvec_q; + scounteren_d = scounteren_q; + sscratch_d = sscratch_q; + stval_d = stval_q; + satp_d = satp_q; + + en_ld_st_translation_d = en_ld_st_translation_q; + dirty_fp_state_csr = 1'b0; + + pmpcfg_d = pmpcfg_q; + pmpaddr_d = pmpaddr_q; + + // check for correct access rights and that we are writing + if (csr_we) begin + unique case (csr_addr.address) + // Floating-Point + riscv::CSR_FFLAGS: begin + if (CVA6Cfg.FpPresent) begin + dirty_fp_state_csr = 1'b1; + fcsr_d.fflags = csr_wdata[4:0]; + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_FRM: begin + if (CVA6Cfg.FpPresent) begin + dirty_fp_state_csr = 1'b1; + fcsr_d.frm = csr_wdata[2:0]; + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_FCSR: begin + if (CVA6Cfg.FpPresent) begin + dirty_fp_state_csr = 1'b1; + fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_FTRAN: begin + if (CVA6Cfg.FpPresent) begin + dirty_fp_state_csr = 1'b1; + fcsr_d.fprec = csr_wdata[6:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + // debug CSR + riscv::CSR_DCSR: begin + if (CVA6Cfg.DebugEn) begin + dcsr_d = csr_wdata[31:0]; + // debug is implemented + dcsr_d.xdebugver = 4'h4; + // currently not supported + dcsr_d.nmip = 1'b0; + dcsr_d.stopcount = 1'b0; + dcsr_d.stoptime = 1'b0; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_DPC: + if (CVA6Cfg.DebugEn) dpc_d = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_DSCRATCH0: + if (CVA6Cfg.DebugEn) dscratch0_d = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_DSCRATCH1: + if (CVA6Cfg.DebugEn) dscratch1_d = csr_wdata; + else update_access_exception = 1'b1; + // trigger module CSRs + riscv::CSR_TSELECT: update_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA1: update_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA2: update_access_exception = 1'b1; // not implemented + riscv::CSR_TDATA3: update_access_exception = 1'b1; // not implemented + // sstatus is a subset of mstatus - mask it accordingly + riscv::CSR_SSTATUS: begin + if (CVA6Cfg.RVS) begin + mask = ariane_pkg::SMODE_STATUS_WRITE_MASK[riscv::XLEN-1:0]; + mstatus_d = (mstatus_q & ~{{64-riscv::XLEN{1'b0}}, mask}) | {{64-riscv::XLEN{1'b0}}, (csr_wdata & mask)}; + // hardwire to zero if floating point extension is not present + if (!CVA6Cfg.FpPresent) begin + mstatus_d.fs = riscv::Off; + end + // hardwire to zero if vector extension is not present + if (!CVA6Cfg.RVV) begin + mstatus_d.vs = riscv::Off; + end + // this instruction has side-effects + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + // even machine mode interrupts can be visible and set-able to supervisor + // if the corresponding bit in mideleg is set + riscv::CSR_SIE: begin + if (CVA6Cfg.RVS) begin + // the mideleg makes sure only delegate-able register (and therefore also only implemented registers) are written + mie_d = (mie_q & ~mideleg_q) | (csr_wdata & mideleg_q); + end else begin + update_access_exception = 1'b1; + end + end + + riscv::CSR_SIP: begin + if (CVA6Cfg.RVS) begin + // only the supervisor software interrupt is write-able, iff delegated + mask = riscv::MIP_SSIP & mideleg_q; + mip_d = (mip_q & ~mask) | (csr_wdata & mask); + end else begin + update_access_exception = 1'b1; + end + end + + riscv::CSR_STVEC: + if (CVA6Cfg.RVS) stvec_d = {csr_wdata[riscv::XLEN-1:2], 1'b0, csr_wdata[0]}; + else update_access_exception = 1'b1; + riscv::CSR_SCOUNTEREN: + if (CVA6Cfg.RVS) scounteren_d = {{riscv::XLEN - 32{1'b0}}, csr_wdata[31:0]}; + else update_access_exception = 1'b1; + riscv::CSR_SSCRATCH: + if (CVA6Cfg.RVS) sscratch_d = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_SEPC: + if (CVA6Cfg.RVS) sepc_d = {csr_wdata[riscv::XLEN-1:1], 1'b0}; + else update_access_exception = 1'b1; + riscv::CSR_SCAUSE: + if (CVA6Cfg.RVS) scause_d = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_STVAL: + if (CVA6Cfg.RVS) stval_d = csr_wdata; + else update_access_exception = 1'b1; + // supervisor address translation and protection + riscv::CSR_SATP: begin + if (CVA6Cfg.RVS) begin + // intercept SATP writes if in S-Mode and TVM is enabled + if (priv_lvl_o == riscv::PRIV_LVL_S && mstatus_q.tvm) update_access_exception = 1'b1; + else begin + satp = riscv::satp_t'(csr_wdata); + // only make ASID_LEN - 1 bit stick, that way software can figure out how many ASID bits are supported + satp.asid = satp.asid & {{(riscv::ASIDW - AsidWidth) {1'b0}}, {AsidWidth{1'b1}}}; + // only update if we actually support this mode + if (riscv::vm_mode_t'(satp.mode) == riscv::ModeOff || + riscv::vm_mode_t'(satp.mode) == riscv::MODE_SV) + satp_d = satp; + end + // changing the mode can have side-effects on address translation (e.g.: other instructions), re-fetch + // the next instruction by executing a flush + flush_o = 1'b1; + end else begin + update_access_exception = 1'b1; + end + end + + riscv::CSR_MSTATUS: begin + mstatus_d = {{64 - riscv::XLEN{1'b0}}, csr_wdata}; + mstatus_d.xs = riscv::Off; + if (!CVA6Cfg.FpPresent) begin + mstatus_d.fs = riscv::Off; + end + if (!CVA6Cfg.RVV) begin + mstatus_d.vs = riscv::Off; + end + mstatus_d.wpri3 = 9'b0; + mstatus_d.wpri1 = 1'b0; + mstatus_d.wpri2 = 1'b0; + mstatus_d.wpri0 = 1'b0; + mstatus_d.ube = 1'b0; // CVA6 is little-endian + // this register has side-effects on other registers, flush the pipeline + flush_o = 1'b1; + end + riscv::CSR_MSTATUSH: if (riscv::XLEN != 32) update_access_exception = 1'b1; + // MISA is WARL (Write Any Value, Reads Legal Value) + riscv::CSR_MISA: ; + // machine exception delegation register + // 0 - 15 exceptions supported + riscv::CSR_MEDELEG: begin + if (CVA6Cfg.RVS) begin + mask = (1 << riscv::INSTR_ADDR_MISALIGNED) | + (1 << riscv::BREAKPOINT) | + (1 << riscv::ENV_CALL_UMODE) | + (1 << riscv::INSTR_PAGE_FAULT) | + (1 << riscv::LOAD_PAGE_FAULT) | + (1 << riscv::STORE_PAGE_FAULT); + medeleg_d = (medeleg_q & ~mask) | (csr_wdata & mask); + end else begin + update_access_exception = 1'b1; + end + end + // machine interrupt delegation register + // we do not support user interrupt delegation + riscv::CSR_MIDELEG: begin + if (CVA6Cfg.RVS) begin + mask = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP; + mideleg_d = (mideleg_q & ~mask) | (csr_wdata & mask); + end else begin + update_access_exception = 1'b1; + end + end + // mask the register so that unsupported interrupts can never be set + riscv::CSR_MIE: begin + mask = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP | riscv::MIP_MSIP | riscv::MIP_MTIP | riscv::MIP_MEIP; + mie_d = (mie_q & ~mask) | (csr_wdata & mask); // we only support supervisor and M-mode interrupts + end + + riscv::CSR_MTVEC: begin + mtvec_d = {csr_wdata[riscv::XLEN-1:2], 1'b0, csr_wdata[0]}; + // we are in vector mode, this implementation requires the additional + // alignment constraint of 64 * 4 bytes + if (csr_wdata[0]) mtvec_d = {csr_wdata[riscv::XLEN-1:8], 7'b0, csr_wdata[0]}; + end + riscv::CSR_MCOUNTEREN: mcounteren_d = {{riscv::XLEN - 32{1'b0}}, csr_wdata[31:0]}; + + riscv::CSR_MSCRATCH: mscratch_d = csr_wdata; + riscv::CSR_MEPC: mepc_d = {csr_wdata[riscv::XLEN-1:1], 1'b0}; + riscv::CSR_MCAUSE: mcause_d = csr_wdata; + riscv::CSR_MTVAL: mtval_d = csr_wdata; + riscv::CSR_MIP: begin + mask = riscv::MIP_SSIP | riscv::MIP_STIP | riscv::MIP_SEIP; + mip_d = (mip_q & ~mask) | (csr_wdata & mask); + end + riscv::CSR_MENVCFG: if (CVA6Cfg.RVS) fiom_d = csr_wdata[0]; + riscv::CSR_MENVCFGH: begin + if (riscv::XLEN != 32) update_access_exception = 1'b1; + end + riscv::CSR_MCOUNTINHIBIT: + mcountinhibit_d = {csr_wdata[MHPMCounterNum+2:2], 1'b0, csr_wdata[0]}; + // performance counters + riscv::CSR_MCYCLE: cycle_d[riscv::XLEN-1:0] = csr_wdata; + riscv::CSR_MCYCLEH: + if (riscv::XLEN == 32) cycle_d[63:32] = csr_wdata; + else update_access_exception = 1'b1; + riscv::CSR_MINSTRET: instret_d[riscv::XLEN-1:0] = csr_wdata; + riscv::CSR_MINSTRETH: + if (riscv::XLEN == 32) instret_d[63:32] = csr_wdata; + else update_access_exception = 1'b1; + //Event Selector + riscv::CSR_MHPM_EVENT_3, + riscv::CSR_MHPM_EVENT_4, + riscv::CSR_MHPM_EVENT_5, + riscv::CSR_MHPM_EVENT_6, + riscv::CSR_MHPM_EVENT_7, + riscv::CSR_MHPM_EVENT_8, + riscv::CSR_MHPM_EVENT_9, + riscv::CSR_MHPM_EVENT_10, + riscv::CSR_MHPM_EVENT_11, + riscv::CSR_MHPM_EVENT_12, + riscv::CSR_MHPM_EVENT_13, + riscv::CSR_MHPM_EVENT_14, + riscv::CSR_MHPM_EVENT_15, + riscv::CSR_MHPM_EVENT_16, + riscv::CSR_MHPM_EVENT_17, + riscv::CSR_MHPM_EVENT_18, + riscv::CSR_MHPM_EVENT_19, + riscv::CSR_MHPM_EVENT_20, + riscv::CSR_MHPM_EVENT_21, + riscv::CSR_MHPM_EVENT_22, + riscv::CSR_MHPM_EVENT_23, + riscv::CSR_MHPM_EVENT_24, + riscv::CSR_MHPM_EVENT_25, + riscv::CSR_MHPM_EVENT_26, + riscv::CSR_MHPM_EVENT_27, + riscv::CSR_MHPM_EVENT_28, + riscv::CSR_MHPM_EVENT_29, + riscv::CSR_MHPM_EVENT_30, + riscv::CSR_MHPM_EVENT_31 : begin + perf_we_o = 1'b1; + perf_data_o = csr_wdata; + end + + riscv::CSR_MHPM_COUNTER_3, + riscv::CSR_MHPM_COUNTER_4, + riscv::CSR_MHPM_COUNTER_5, + riscv::CSR_MHPM_COUNTER_6, + riscv::CSR_MHPM_COUNTER_7, + riscv::CSR_MHPM_COUNTER_8, + riscv::CSR_MHPM_COUNTER_9, + riscv::CSR_MHPM_COUNTER_10, + riscv::CSR_MHPM_COUNTER_11, + riscv::CSR_MHPM_COUNTER_12, + riscv::CSR_MHPM_COUNTER_13, + riscv::CSR_MHPM_COUNTER_14, + riscv::CSR_MHPM_COUNTER_15, + riscv::CSR_MHPM_COUNTER_16, + riscv::CSR_MHPM_COUNTER_17, + riscv::CSR_MHPM_COUNTER_18, + riscv::CSR_MHPM_COUNTER_19, + riscv::CSR_MHPM_COUNTER_20, + riscv::CSR_MHPM_COUNTER_21, + riscv::CSR_MHPM_COUNTER_22, + riscv::CSR_MHPM_COUNTER_23, + riscv::CSR_MHPM_COUNTER_24, + riscv::CSR_MHPM_COUNTER_25, + riscv::CSR_MHPM_COUNTER_26, + riscv::CSR_MHPM_COUNTER_27, + riscv::CSR_MHPM_COUNTER_28, + riscv::CSR_MHPM_COUNTER_29, + riscv::CSR_MHPM_COUNTER_30, + riscv::CSR_MHPM_COUNTER_31 : begin + perf_we_o = 1'b1; + perf_data_o = csr_wdata; + end + + riscv::CSR_MHPM_COUNTER_3H, + riscv::CSR_MHPM_COUNTER_4H, + riscv::CSR_MHPM_COUNTER_5H, + riscv::CSR_MHPM_COUNTER_6H, + riscv::CSR_MHPM_COUNTER_7H, + riscv::CSR_MHPM_COUNTER_8H, + riscv::CSR_MHPM_COUNTER_9H, + riscv::CSR_MHPM_COUNTER_10H, + riscv::CSR_MHPM_COUNTER_11H, + riscv::CSR_MHPM_COUNTER_12H, + riscv::CSR_MHPM_COUNTER_13H, + riscv::CSR_MHPM_COUNTER_14H, + riscv::CSR_MHPM_COUNTER_15H, + riscv::CSR_MHPM_COUNTER_16H, + riscv::CSR_MHPM_COUNTER_17H, + riscv::CSR_MHPM_COUNTER_18H, + riscv::CSR_MHPM_COUNTER_19H, + riscv::CSR_MHPM_COUNTER_20H, + riscv::CSR_MHPM_COUNTER_21H, + riscv::CSR_MHPM_COUNTER_22H, + riscv::CSR_MHPM_COUNTER_23H, + riscv::CSR_MHPM_COUNTER_24H, + riscv::CSR_MHPM_COUNTER_25H, + riscv::CSR_MHPM_COUNTER_26H, + riscv::CSR_MHPM_COUNTER_27H, + riscv::CSR_MHPM_COUNTER_28H, + riscv::CSR_MHPM_COUNTER_29H, + riscv::CSR_MHPM_COUNTER_30H, + riscv::CSR_MHPM_COUNTER_31H : begin + perf_we_o = 1'b1; + if (riscv::XLEN == 32) perf_data_o = csr_wdata; + else update_access_exception = 1'b1; + end + + riscv::CSR_DCACHE: dcache_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + riscv::CSR_ICACHE: icache_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + riscv::CSR_ACC_CONS: begin + if (CVA6Cfg.EnableAccelerator) begin + acc_cons_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + end else begin + update_access_exception = 1'b1; + end + end + // PMP locked logic + // 1. refuse to update any locked entry + // 2. also refuse to update the entry below a locked TOR entry + // Note that writes to pmpcfg below a locked TOR entry are valid + riscv::CSR_PMPCFG0: + for (int i = 0; i < (riscv::XLEN / 8); i++) + if (!pmpcfg_q[i].locked) pmpcfg_d[i] = csr_wdata[i*8+:8]; + riscv::CSR_PMPCFG1: begin + if (riscv::XLEN == 32) begin + for (int i = 0; i < 4; i++) + if (!pmpcfg_q[i+4].locked) pmpcfg_d[i+4] = csr_wdata[i*8+:8]; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_PMPCFG2: + for (int i = 0; i < (riscv::XLEN / 8); i++) + if (!pmpcfg_q[i+8].locked) pmpcfg_d[i+8] = csr_wdata[i*8+:8]; + riscv::CSR_PMPCFG3: begin + if (riscv::XLEN == 32) begin + for (int i = 0; i < 4; i++) + if (!pmpcfg_q[i+12].locked) pmpcfg_d[i+12] = csr_wdata[i*8+:8]; + end else begin + update_access_exception = 1'b1; + end + end + riscv::CSR_PMPADDR0, + riscv::CSR_PMPADDR1, + riscv::CSR_PMPADDR2, + riscv::CSR_PMPADDR3, + riscv::CSR_PMPADDR4, + riscv::CSR_PMPADDR5, + riscv::CSR_PMPADDR6, + riscv::CSR_PMPADDR7, + riscv::CSR_PMPADDR8, + riscv::CSR_PMPADDR9, + riscv::CSR_PMPADDR10, + riscv::CSR_PMPADDR11, + riscv::CSR_PMPADDR12, + riscv::CSR_PMPADDR13, + riscv::CSR_PMPADDR14, + riscv::CSR_PMPADDR15: begin + // index is specified by the last byte in the address + automatic logic [3:0] index = csr_addr.csr_decode.address[3:0]; + // check if the entry or the entry above is locked + if (!pmpcfg_q[index].locked && !(pmpcfg_q[index+1].locked && pmpcfg_q[index].addr_mode == riscv::TOR)) begin + pmpaddr_d[index] = csr_wdata[riscv::PLEN-3:0]; + end + end + default: update_access_exception = 1'b1; + endcase + end + + mstatus_d.sxl = riscv::XLEN_64; + mstatus_d.uxl = riscv::XLEN_64; + + // mark the floating point extension register as dirty + if (CVA6Cfg.FpPresent && (dirty_fp_state_csr || dirty_fp_state_i)) begin + mstatus_d.fs = riscv::Dirty; + end + // mark the vector extension register as dirty + if (CVA6Cfg.RVV && dirty_v_state_i) begin + mstatus_d.vs = riscv::Dirty; + end + // hardwired extension registers + mstatus_d.sd = (mstatus_q.xs == riscv::Dirty) | (mstatus_q.fs == riscv::Dirty); + + // reserve PMPCFG bits 5 and 6 (hardwire to 0) + for (int i = 0; i < CVA6Cfg.NrPMPEntries; i++) pmpcfg_d[i].reserved = 2'b0; + + // write the floating point status register + if (CVA6Cfg.FpPresent && csr_write_fflags_i) begin + fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags; + end + + // ---------------------------- + // Accelerator FP imprecise exceptions + // ---------------------------- + + // Update fflags as soon as a FP exception occurs in the accelerator + // The exception is imprecise, and the fcsr.fflags update always happens immediately + if (CVA6Cfg.EnableAccelerator) begin + fcsr_d.fflags |= acc_fflags_ex_valid_i ? acc_fflags_ex_i : 5'b0; + end + + // --------------------- + // External Interrupts + // --------------------- + // Machine Mode External Interrupt Pending + mip_d[riscv::IRQ_M_EXT] = irq_i[0]; + // Machine software interrupt + mip_d[riscv::IRQ_M_SOFT] = ipi_i; + // Timer interrupt pending, coming from platform timer + mip_d[riscv::IRQ_M_TIMER] = time_irq_i; + + // ----------------------- + // Manage Exception Stack + // ----------------------- + // update exception CSRs + // we got an exception update cause, pc and stval register + trap_to_priv_lvl = riscv::PRIV_LVL_M; + // Exception is taken and we are not in debug mode + // exceptions in debug mode don't update any fields + if ((CVA6Cfg.DebugEn && !debug_mode_q && ex_i.cause != riscv::DEBUG_REQUEST && ex_i.valid) || (!CVA6Cfg.DebugEn && ex_i.valid)) begin + // do not flush, flush is reserved for CSR writes with side effects + flush_o = 1'b0; + // figure out where to trap to + // a m-mode trap might be delegated if we are taking it in S mode + // first figure out if this was an exception or an interrupt e.g.: look at bit (XLEN-1) + // the cause register can only be $clog2(riscv::XLEN) bits long (as we only support XLEN exceptions) + if (CVA6Cfg.RVS && ((ex_i.cause[riscv::XLEN-1] && mideleg_q[ex_i.cause[$clog2( + riscv::XLEN + )-1:0]]) || (~ex_i.cause[riscv::XLEN-1] && medeleg_q[ex_i.cause[$clog2( + riscv::XLEN + )-1:0]]))) begin + // traps never transition from a more-privileged mode to a less privileged mode + // so if we are already in M mode, stay there + if (priv_lvl_o == riscv::PRIV_LVL_M) trap_to_priv_lvl = riscv::PRIV_LVL_M; + else trap_to_priv_lvl = riscv::PRIV_LVL_S; + end + + // trap to supervisor mode + if (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S) begin + // update sstatus + mstatus_d.sie = 1'b0; + mstatus_d.spie = mstatus_q.sie; + // this can either be user or supervisor mode + mstatus_d.spp = priv_lvl_q[0]; + // set cause + scause_d = ex_i.cause; + // set epc + sepc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + // set mtval or stval + stval_d = (ariane_pkg::ZERO_TVAL + && (ex_i.cause inside { + riscv::ILLEGAL_INSTR, + riscv::BREAKPOINT, + riscv::ENV_CALL_UMODE, + riscv::ENV_CALL_SMODE, + riscv::ENV_CALL_MMODE + } || ex_i.cause[riscv::XLEN-1])) ? '0 : ex_i.tval; + // trap to machine mode + end else begin + // update mstatus + mstatus_d.mie = 1'b0; + mstatus_d.mpie = mstatus_q.mie; + // save the previous privilege mode + mstatus_d.mpp = priv_lvl_q; + mcause_d = ex_i.cause; + // set epc + mepc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + // set mtval or stval + mtval_d = (ariane_pkg::ZERO_TVAL + && (ex_i.cause inside { + riscv::ILLEGAL_INSTR, + riscv::BREAKPOINT, + riscv::ENV_CALL_UMODE, + riscv::ENV_CALL_SMODE, + riscv::ENV_CALL_MMODE + } || ex_i.cause[riscv::XLEN-1])) ? '0 : ex_i.tval; + end + + priv_lvl_d = trap_to_priv_lvl; + end + + // ------------------------------ + // Debug + // ------------------------------ + // Explains why Debug Mode was entered. + // When there are multiple reasons to enter Debug Mode in a single cycle, hardware should set cause to the cause with the highest priority. + // 1: An ebreak instruction was executed. (priority 3) + // 2: The Trigger Module caused a breakpoint exception. (priority 4) + // 3: The debugger requested entry to Debug Mode. (priority 2) + // 4: The hart single stepped because step was set. (priority 1) + // we are currently not in debug mode and could potentially enter + if (!debug_mode_q) begin + dcsr_d.prv = priv_lvl_o; + // trigger module fired + + // caused by a breakpoint + if (CVA6Cfg.DebugEn && ex_i.valid && ex_i.cause == riscv::BREAKPOINT) begin + dcsr_d.prv = priv_lvl_o; + // check that we actually want to enter debug depending on the privilege level we are currently in + unique case (priv_lvl_o) + riscv::PRIV_LVL_M: begin + debug_mode_d = dcsr_q.ebreakm; + set_debug_pc_o = dcsr_q.ebreakm; + end + riscv::PRIV_LVL_S: begin + if (CVA6Cfg.RVS) begin + debug_mode_d = dcsr_q.ebreaks; + set_debug_pc_o = dcsr_q.ebreaks; + end + end + riscv::PRIV_LVL_U: begin + if (CVA6Cfg.RVU) begin + debug_mode_d = dcsr_q.ebreaku; + set_debug_pc_o = dcsr_q.ebreaku; + end + end + default: ; + endcase + // save PC of next this instruction e.g.: the next one to be executed + dpc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + dcsr_d.cause = ariane_pkg::CauseBreakpoint; + end + + // we've got a debug request + if (CVA6Cfg.DebugEn && ex_i.valid && ex_i.cause == riscv::DEBUG_REQUEST) begin + dcsr_d.prv = priv_lvl_o; + // save the PC + dpc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + // enter debug mode + debug_mode_d = 1'b1; + // jump to the base address + set_debug_pc_o = 1'b1; + // save the cause as external debug request + dcsr_d.cause = ariane_pkg::CauseRequest; + end + + // single step enable and we just retired an instruction + if (CVA6Cfg.DebugEn && dcsr_q.step && commit_ack_i[0]) begin + dcsr_d.prv = priv_lvl_o; + // valid CTRL flow change + if (commit_instr_i[0].fu == CTRL_FLOW) begin + // we saved the correct target address during execute + dpc_d = { + {riscv::XLEN - riscv::VLEN{commit_instr_i[0].bp.predict_address[riscv::VLEN-1]}}, + commit_instr_i[0].bp.predict_address + }; + // exception valid + end else if (ex_i.valid) begin + dpc_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, trap_vector_base_o}; + // return from environment + end else if (eret_o) begin + dpc_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, epc_o}; + // consecutive PC + end else begin + dpc_d = { + {riscv::XLEN - riscv::VLEN{commit_instr_i[0].pc[riscv::VLEN-1]}}, + commit_instr_i[0].pc + (commit_instr_i[0].is_compressed ? 'h2 : 'h4) + }; + end + debug_mode_d = 1'b1; + set_debug_pc_o = 1'b1; + dcsr_d.cause = ariane_pkg::CauseSingleStep; + end + end + // go in halt-state again when we encounter an exception + if (CVA6Cfg.DebugEn && debug_mode_q && ex_i.valid && ex_i.cause == riscv::BREAKPOINT) begin + set_debug_pc_o = 1'b1; + end + + // ------------------------------ + // MPRV - Modify Privilege Level + // ------------------------------ + // Set the address translation at which the load and stores should occur + // we can use the previous values since changing the address translation will always involve a pipeline flush + if (ariane_pkg::MMU_PRESENT && mprv && CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == riscv::MODE_SV && (mstatus_q.mpp != riscv::PRIV_LVL_M)) + en_ld_st_translation_d = 1'b1; + else // otherwise we go with the regular settings + en_ld_st_translation_d = en_translation_o; + + ld_st_priv_lvl_o = (mprv) ? mstatus_q.mpp : priv_lvl_o; + en_ld_st_translation_o = en_ld_st_translation_q; + // ------------------------------ + // Return from Environment + // ------------------------------ + // When executing an xRET instruction, supposing xPP holds the value y, xIE is set to xPIE; the privilege + // mode is changed to y; xPIE is set to 1; and xPP is set to U + if (mret) begin + // return from exception, IF doesn't care from where we are returning + eret_o = 1'b1; + // return to the previous privilege level and restore all enable flags + // get the previous machine interrupt enable flag + mstatus_d.mie = mstatus_q.mpie; + // restore the previous privilege level + priv_lvl_d = mstatus_q.mpp; + // set mpp to user mode + mstatus_d.mpp = riscv::PRIV_LVL_U; + // set mpie to 1 + mstatus_d.mpie = 1'b1; + end + + if (CVA6Cfg.RVS && sret) begin + // return from exception, IF doesn't care from where we are returning + eret_o = 1'b1; + // return the previous supervisor interrupt enable flag + mstatus_d.sie = mstatus_q.spie; + // restore the previous privilege level + priv_lvl_d = riscv::priv_lvl_t'({1'b0, mstatus_q.spp}); + // set spp to user mode + mstatus_d.spp = 1'b0; + // set spie to 1 + mstatus_d.spie = 1'b1; + end + + // return from debug mode + if (CVA6Cfg.DebugEn && dret) begin + // return from exception, IF doesn't care from where we are returning + eret_o = 1'b1; + // restore the previous privilege level + priv_lvl_d = riscv::priv_lvl_t'(dcsr_q.prv); + // actually return from debug mode + debug_mode_d = 1'b0; + end + end + + // --------------------------- + // CSR OP Select Logic + // --------------------------- + always_comb begin : csr_op_logic + csr_wdata = csr_wdata_i; + csr_we = 1'b1; + csr_read = 1'b1; + mret = 1'b0; + sret = 1'b0; + dret = 1'b0; + + unique case (csr_op_i) + CSR_WRITE: csr_wdata = csr_wdata_i; + CSR_SET: csr_wdata = csr_wdata_i | csr_rdata; + CSR_CLEAR: csr_wdata = (~csr_wdata_i) & csr_rdata; + CSR_READ: csr_we = 1'b0; + MRET: begin + // the return should not have any write or read side-effects + csr_we = 1'b0; + csr_read = 1'b0; + mret = 1'b1; // signal a return from machine mode + end + default: begin + if (CVA6Cfg.RVS && csr_op_i == SRET) begin + // the return should not have any write or read side-effects + csr_we = 1'b0; + csr_read = 1'b0; + sret = 1'b1; // signal a return from supervisor mode + end else if (CVA6Cfg.DebugEn && csr_op_i == DRET) begin + // the return should not have any write or read side-effects + csr_we = 1'b0; + csr_read = 1'b0; + dret = 1'b1; // signal a return from debug mode + end else begin + csr_we = 1'b0; + csr_read = 1'b0; + end + end + endcase + // if we are violating our privilges do not update the architectural state + if (privilege_violation) begin + csr_we = 1'b0; + csr_read = 1'b0; + end + end + + assign irq_ctrl_o.mie = mie_q; + assign irq_ctrl_o.mip = mip_q; + assign irq_ctrl_o.sie = mstatus_q.sie; + assign irq_ctrl_o.mideleg = mideleg_q; + assign irq_ctrl_o.global_enable = (~debug_mode_q) + // interrupts are enabled during single step or we are not stepping + // No need to check interrupts during single step if we don't support DEBUG mode + & (~CVA6Cfg.DebugEn | (~dcsr_q.step | dcsr_q.stepie)) + & ((mstatus_q.mie & (priv_lvl_o == riscv::PRIV_LVL_M)) + | (priv_lvl_o != riscv::PRIV_LVL_M)); + + always_comb begin : privilege_check + // ----------------- + // Privilege Check + // ----------------- + privilege_violation = 1'b0; + // if we are reading or writing, check for the correct privilege level this has + // precedence over interrupts + if (csr_op_i inside {CSR_WRITE, CSR_SET, CSR_CLEAR, CSR_READ}) begin + if ((riscv::priv_lvl_t'(priv_lvl_o & csr_addr.csr_decode.priv_lvl) != csr_addr.csr_decode.priv_lvl)) begin + privilege_violation = 1'b1; + end + // check access to debug mode only CSRs + if ((!CVA6Cfg.DebugEn && csr_addr_i[11:4] == 8'h7b) || (CVA6Cfg.DebugEn && csr_addr_i[11:4] == 8'h7b && !debug_mode_q)) begin + privilege_violation = 1'b1; + end + // check counter-enabled counter CSR accesses + // counter address range is C00 to C1F + if (csr_addr_i inside {[riscv::CSR_CYCLE : riscv::CSR_HPM_COUNTER_31]}) begin + if (priv_lvl_o == riscv::PRIV_LVL_S && CVA6Cfg.RVS) begin + privilege_violation = ~mcounteren_q[csr_addr_i[4:0]]; + end else if (priv_lvl_o == riscv::PRIV_LVL_U && CVA6Cfg.RVU) begin + privilege_violation = ~mcounteren_q[csr_addr_i[4:0]] | ~scounteren_q[csr_addr_i[4:0]]; + end else if (priv_lvl_o == riscv::PRIV_LVL_M) begin + privilege_violation = 1'b0; + end + end + end + end + // ---------------------- + // CSR Exception Control + // ---------------------- + always_comb begin : exception_ctrl + csr_exception_o = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0}; + // ---------------------------------- + // Illegal Access (decode exception) + // ---------------------------------- + // we got an exception in one of the processes above + // throw an illegal instruction exception + if (update_access_exception || read_access_exception) begin + csr_exception_o.cause = riscv::ILLEGAL_INSTR; + // we don't set the tval field as this will be set by the commit stage + // this spares the extra wiring from commit to CSR and back to commit + csr_exception_o.valid = 1'b1; + end + + if (privilege_violation) begin + csr_exception_o.cause = riscv::ILLEGAL_INSTR; + csr_exception_o.valid = 1'b1; + end + end + + // ------------------- + // Wait for Interrupt + // ------------------- + always_comb begin : wfi_ctrl + // wait for interrupt register + wfi_d = wfi_q; + // if there is any (enabled) interrupt pending un-stall the core + // also un-stall if we want to enter debug mode + if (|(mip_q & mie_q) || (CVA6Cfg.DebugEn && debug_req_i) || irq_i[1]) begin + wfi_d = 1'b0; + // or alternatively if there is no exception pending and we are not in debug mode wait here + // for the interrupt + end else if (((CVA6Cfg.DebugEn && !debug_mode_q) && csr_op_i == WFI && !ex_i.valid) || (!CVA6Cfg.DebugEn && csr_op_i == WFI && !ex_i.valid)) begin + wfi_d = 1'b1; + end + end + + // output assignments dependent on privilege mode + always_comb begin : priv_output + trap_vector_base_o = {mtvec_q[riscv::VLEN-1:2], 2'b0}; + // output user mode stvec + if (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S) begin + trap_vector_base_o = {stvec_q[riscv::VLEN-1:2], 2'b0}; + end + + // if we are in debug mode jump to a specific address + if (CVA6Cfg.DebugEn && debug_mode_q) begin + trap_vector_base_o = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.ExceptionAddress[riscv::VLEN-1:0]; + end + + // check if we are in vectored mode, if yes then do BASE + 4 * cause we + // are imposing an additional alignment-constraint of 64 * 4 bytes since + // we want to spare the costly addition. Furthermore check to which + // privilege level we are jumping and whether the vectored mode is + // activated for _that_ privilege level. + if (ex_i.cause[riscv::XLEN-1] && + ((((CVA6Cfg.RVS || CVA6Cfg.RVU) && trap_to_priv_lvl == riscv::PRIV_LVL_M && mtvec_q[0]) || (!CVA6Cfg.RVS && !CVA6Cfg.RVU && mtvec_q[0])) + || (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S && stvec_q[0]))) begin + trap_vector_base_o[7:2] = ex_i.cause[5:0]; + end + + epc_o = mepc_q[riscv::VLEN-1:0]; + // we are returning from supervisor mode, so take the sepc register + if (CVA6Cfg.RVS && sret) begin + epc_o = sepc_q[riscv::VLEN-1:0]; + end + // we are returning from debug mode, to take the dpc register + if (CVA6Cfg.DebugEn && dret) begin + epc_o = dpc_q[riscv::VLEN-1:0]; + end + end + + // ------------------- + // Output Assignments + // ------------------- + always_comb begin + // When the SEIP bit is read with a CSRRW, CSRRS, or CSRRC instruction, the value + // returned in the rd destination register contains the logical-OR of the software-writable + // bit and the interrupt signal from the interrupt controller. + csr_rdata_o = csr_rdata; + + unique case (csr_addr.address) + riscv::CSR_MIP: + csr_rdata_o = csr_rdata | ({{riscv::XLEN - 1{1'b0}}, irq_i[1]} << riscv::IRQ_S_EXT); + // in supervisor mode we also need to check whether we delegated this bit + riscv::CSR_SIP: begin + if (CVA6Cfg.RVS) begin + csr_rdata_o = csr_rdata + | ({{riscv::XLEN-1{1'b0}}, (irq_i[1] & mideleg_q[riscv::IRQ_S_EXT])} << riscv::IRQ_S_EXT); + end + end + default: ; + endcase + end + + // in debug mode we execute with privilege level M + assign priv_lvl_o = (CVA6Cfg.DebugEn && debug_mode_q) ? riscv::PRIV_LVL_M : priv_lvl_q; + // FPU outputs + assign fflags_o = fcsr_q.fflags; + assign frm_o = fcsr_q.frm; + assign fprec_o = fcsr_q.fprec; + // MMU outputs + assign satp_ppn_o = satp_q.ppn; + assign asid_o = satp_q.asid[AsidWidth-1:0]; + assign sum_o = mstatus_q.sum; + // we support bare memory addressing and SV39 + assign en_translation_o = ((CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == riscv::MODE_SV) && + priv_lvl_o != riscv::PRIV_LVL_M) + ? 1'b1 + : 1'b0; + assign mxr_o = mstatus_q.mxr; + assign tvm_o = mstatus_q.tvm; + assign tw_o = mstatus_q.tw; + assign tsr_o = mstatus_q.tsr; + assign halt_csr_o = wfi_q; +`ifdef PITON_ARIANE + assign icache_en_o = icache_q[0]; +`else + assign icache_en_o = icache_q[0] & (~debug_mode_q); +`endif + assign dcache_en_o = dcache_q[0]; + assign acc_cons_en_o = CVA6Cfg.EnableAccelerator ? acc_cons_q[0] : 1'b0; + + // determine if mprv needs to be considered if in debug mode + assign mprv = (CVA6Cfg.DebugEn && debug_mode_q && !dcsr_q.mprven) ? 1'b0 : mstatus_q.mprv; + assign debug_mode_o = debug_mode_q; + assign single_step_o = dcsr_q.step; + assign mcountinhibit_o = {{29 - MHPMCounterNum{1'b0}}, mcountinhibit_q}; + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + priv_lvl_q <= riscv::PRIV_LVL_M; + // floating-point registers + fcsr_q <= '0; + // debug signals + debug_mode_q <= 1'b0; + if (CVA6Cfg.DebugEn) begin + dcsr_q <= '0; + dcsr_q.prv <= riscv::PRIV_LVL_M; + dcsr_q.xdebugver <= 4'h4; + dpc_q <= '0; + dscratch0_q <= {riscv::XLEN{1'b0}}; + dscratch1_q <= {riscv::XLEN{1'b0}}; + end + // machine mode registers + mstatus_q <= 64'b0; + // set to boot address + direct mode + 4 byte offset which is the initial trap + mtvec_rst_load_q <= 1'b1; + mtvec_q <= '0; + mip_q <= {riscv::XLEN{1'b0}}; + mie_q <= {riscv::XLEN{1'b0}}; + mepc_q <= {riscv::XLEN{1'b0}}; + mcause_q <= {riscv::XLEN{1'b0}}; + mcounteren_q <= {riscv::XLEN{1'b0}}; + mscratch_q <= {riscv::XLEN{1'b0}}; + mtval_q <= {riscv::XLEN{1'b0}}; + fiom_q <= '0; + dcache_q <= {{riscv::XLEN - 1{1'b0}}, 1'b1}; + icache_q <= {{riscv::XLEN - 1{1'b0}}, 1'b1}; + mcountinhibit_q <= '0; + acc_cons_q <= {{riscv::XLEN - 1{1'b0}}, CVA6Cfg.EnableAccelerator}; + // supervisor mode registers + if (CVA6Cfg.RVS) begin + medeleg_q <= {riscv::XLEN{1'b0}}; + mideleg_q <= {riscv::XLEN{1'b0}}; + sepc_q <= {riscv::XLEN{1'b0}}; + scause_q <= {riscv::XLEN{1'b0}}; + stvec_q <= {riscv::XLEN{1'b0}}; + scounteren_q <= {riscv::XLEN{1'b0}}; + sscratch_q <= {riscv::XLEN{1'b0}}; + stval_q <= {riscv::XLEN{1'b0}}; + satp_q <= {riscv::XLEN{1'b0}}; + end + // timer and counters + cycle_q <= 64'b0; + instret_q <= 64'b0; + // aux registers + en_ld_st_translation_q <= 1'b0; + // wait for interrupt + wfi_q <= 1'b0; + // pmp + for (int i = 0; i < 16; i++) begin + if (i < CVA6Cfg.NrPMPEntries) begin + pmpcfg_q[i] <= riscv::pmpcfg_t'(CVA6Cfg.PMPCfgRstVal[i]); + pmpaddr_q[i] <= CVA6Cfg.PMPAddrRstVal[i][riscv::PLEN-3:0]; + end else begin + pmpcfg_q[i] <= '0; + pmpaddr_q[i] <= '0; + end + end + end else begin + priv_lvl_q <= priv_lvl_d; + // floating-point registers + fcsr_q <= fcsr_d; + // debug signals + if (CVA6Cfg.DebugEn) begin + debug_mode_q <= debug_mode_d; + dcsr_q <= dcsr_d; + dpc_q <= dpc_d; + dscratch0_q <= dscratch0_d; + dscratch1_q <= dscratch1_d; + end + // machine mode registers + mstatus_q <= mstatus_d; + mtvec_rst_load_q <= 1'b0; + mtvec_q <= mtvec_d; + mip_q <= mip_d; + mie_q <= mie_d; + mepc_q <= mepc_d; + mcause_q <= mcause_d; + mcounteren_q <= mcounteren_d; + mscratch_q <= mscratch_d; + mtval_q <= mtval_d; + fiom_q <= fiom_d; + dcache_q <= dcache_d; + icache_q <= icache_d; + mcountinhibit_q <= mcountinhibit_d; + acc_cons_q <= acc_cons_d; + // supervisor mode registers + if (CVA6Cfg.RVS) begin + medeleg_q <= medeleg_d; + mideleg_q <= mideleg_d; + sepc_q <= sepc_d; + scause_q <= scause_d; + stvec_q <= stvec_d; + scounteren_q <= scounteren_d; + sscratch_q <= sscratch_d; + stval_q <= stval_d; + satp_q <= satp_d; + end + // timer and counters + cycle_q <= cycle_d; + instret_q <= instret_d; + // aux registers + en_ld_st_translation_q <= en_ld_st_translation_d; + // wait for interrupt + wfi_q <= wfi_d; + // pmp + for (int i = 0; i < 16; i++) begin + if (i < CVA6Cfg.NrPMPEntries) begin + // We only support >=8-byte granularity, NA4 is disabled + if(!CVA6Cfg.PMPEntryReadOnly[i] && pmpcfg_d[i].addr_mode != riscv::NA4 && !(pmpcfg_d[i].access_type.r == '0 && pmpcfg_d[i].access_type.w == '1)) begin + pmpcfg_q[i] <= pmpcfg_d[i]; + end else begin + pmpcfg_q[i] <= pmpcfg_q[i]; + end + if (!CVA6Cfg.PMPEntryReadOnly[i]) begin + pmpaddr_q[i] <= pmpaddr_d[i]; + end else begin + pmpaddr_q[i] <= pmpaddr_q[i]; + end + end else begin + pmpcfg_q[i] <= '0; + pmpaddr_q[i] <= '0; + end + end + end + end + + //------------- + // Assertions + //------------- + //pragma translate_off + // check that eret and ex are never valid together + assert property (@(posedge clk_i) disable iff (!rst_ni !== '0) !(eret_o && ex_i.valid)) + else begin + $error("eret and exception should never be valid at the same time"); + $stop(); + end + //pragma translate_on +endmodule diff --git a/test/type_param/core/cva6.sv b/test/type_param/core/cva6.sv new file mode 100644 index 00000000..4b9ccf00 --- /dev/null +++ b/test/type_param/core/cva6.sv @@ -0,0 +1,1401 @@ +// Copyright 2017-2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.03.2017 +// Description: CVA6 Top-level module + + +module cva6 + import ariane_pkg::*; +#( + // CVA6 config + parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg, + parameter bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace), + // RVFI + parameter type rvfi_probes_t = struct packed { + logic [TRANS_ID_BITS-1:0] issue_pointer; + logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer; + logic flush_unissued_instr; + logic decoded_instr_valid; + logic flush; + logic decoded_instr_ack; + logic issue_instr_ack; + logic fetch_entry_valid; + logic [31:0] instruction; + logic is_compressed; + riscv::xlen_t rs1_forwarding; + riscv::xlen_t rs2_forwarding; + scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr; + exception_t ex_commit; + riscv::priv_lvl_t priv_lvl; + lsu_ctrl_t lsu_ctrl; + logic [((CVA6Cfg.CvxifEn || CVA6Cfg.RVV) ? 5 : 4)-1:0][riscv::XLEN-1:0] wbdata; + logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack; + logic [riscv::PLEN-1:0] mem_paddr; + logic debug_mode; + logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata; + }, + + // AXI types + parameter type axi_ar_chan_t = struct packed { + logic [CVA6Cfg.AxiIdWidth-1:0] id; + logic [CVA6Cfg.AxiAddrWidth-1:0] addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type axi_aw_chan_t = struct packed { + logic [CVA6Cfg.AxiIdWidth-1:0] id; + logic [CVA6Cfg.AxiAddrWidth-1:0] addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + axi_pkg::atop_t atop; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type axi_w_chan_t = struct packed { + logic [CVA6Cfg.AxiDataWidth-1:0] data; + logic [(CVA6Cfg.AxiDataWidth/8)-1:0] strb; + logic last; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type b_chan_t = struct packed { + logic [CVA6Cfg.AxiIdWidth-1:0] id; + axi_pkg::resp_t resp; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type r_chan_t = struct packed { + logic [CVA6Cfg.AxiIdWidth-1:0] id; + logic [CVA6Cfg.AxiDataWidth-1:0] data; + axi_pkg::resp_t resp; + logic last; + logic [CVA6Cfg.AxiUserWidth-1:0] user; + }, + parameter type noc_req_t = struct packed { + axi_aw_chan_t aw; + logic aw_valid; + axi_w_chan_t w; + logic w_valid; + logic b_ready; + axi_ar_chan_t ar; + logic ar_valid; + logic r_ready; + }, + parameter type noc_resp_t = struct packed { + logic aw_ready; + logic ar_ready; + logic w_ready; + logic b_valid; + b_chan_t b; + logic r_valid; + r_chan_t r; + }, + // + parameter type acc_cfg_t = logic, + parameter acc_cfg_t AccCfg = '0, + parameter type cvxif_req_t = cvxif_pkg::cvxif_req_t, + parameter type cvxif_resp_t = cvxif_pkg::cvxif_resp_t +) ( + input logic clk_i, + input logic rst_ni, + // Core ID, Cluster ID and boot address are considered more or less static + input logic [riscv::VLEN-1:0] boot_addr_i, // reset boot address + input logic [riscv::XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR) + // Interrupt inputs + input logic [1:0] irq_i, // level sensitive IR lines, mip & sip (async) + input logic ipi_i, // inter-processor interrupts (async) + // Timer facilities + input logic time_irq_i, // timer interrupt in (async) + input logic debug_req_i, // debug request (async) + // RISC-V formal interface port (`rvfi`): + // Can be left open when formal tracing is not needed. + output rvfi_probes_t rvfi_probes_o, + output cvxif_req_t cvxif_req_o, + input cvxif_resp_t cvxif_resp_i, + // memory side + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i +); + + // ------------------------------------------ + // CVA6 configuration + // ------------------------------------------ + // Extended config + localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn; + localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn; + localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8; + localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec; // Are non-standard extensions present? + localparam int unsigned FLen = RVD ? 64 : // D ext. + RVF ? 32 : // F ext. + CVA6Cfg.XF16 ? 16 : // Xf16 ext. + CVA6Cfg.XF16ALT ? 16 : // Xf16alt ext. + CVA6Cfg.XF8 ? 8 : // Xf8 ext. + 1; // Unused in case of no FP + + // Transprecision floating-point extensions configuration + localparam bit RVFVec = RVF & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled + localparam bit XF16Vec = CVA6Cfg.XF16 & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled + localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled + localparam bit XF8Vec = CVA6Cfg.XF8 & CVA6Cfg.XFVec & FLen>8; // FP8 vectors available if vectors and larger fmt enabled + + localparam bit EnableAccelerator = CVA6Cfg.RVV; // Currently only used by V extension (Ara) + localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4; + + localparam NrRgprPorts = 2; + + localparam bit NonIdemPotenceEn = CVA6Cfg.NrNonIdempotentRules && CVA6Cfg.NonIdempotentLength; // Currently only used by V extension (Ara) + + localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = { + CVA6Cfg.NrCommitPorts, + CVA6Cfg.AxiAddrWidth, + CVA6Cfg.AxiDataWidth, + CVA6Cfg.AxiIdWidth, + CVA6Cfg.AxiUserWidth, + CVA6Cfg.NrLoadBufEntries, + CVA6Cfg.FpuEn, + CVA6Cfg.XF16, + CVA6Cfg.XF16ALT, + CVA6Cfg.XF8, + CVA6Cfg.RVA, + CVA6Cfg.RVB, + CVA6Cfg.RVV, + CVA6Cfg.RVC, + CVA6Cfg.RVZCB, + CVA6Cfg.XFVec, + CVA6Cfg.CvxifEn, + CVA6Cfg.ZiCondExtEn, + // Extended + bit'(RVF), + bit'(RVD), + bit'(FpPresent), + bit'(NSX), + unsigned'(FLen), + bit'(RVFVec), + bit'(XF16Vec), + bit'(XF16ALTVec), + bit'(XF8Vec), + unsigned'(NrRgprPorts), + unsigned'(NrWbPorts), + bit'(EnableAccelerator), + CVA6Cfg.RVS, + CVA6Cfg.RVU, + CVA6Cfg.HaltAddress, + CVA6Cfg.ExceptionAddress, + CVA6Cfg.RASDepth, + CVA6Cfg.BTBEntries, + CVA6Cfg.BHTEntries, + CVA6Cfg.DmBaseAddress, + CVA6Cfg.NrPMPEntries, + CVA6Cfg.PMPCfgRstVal, + CVA6Cfg.PMPAddrRstVal, + CVA6Cfg.PMPEntryReadOnly, + CVA6Cfg.NOCType, + CVA6Cfg.NrNonIdempotentRules, + CVA6Cfg.NonIdempotentAddrBase, + CVA6Cfg.NonIdempotentLength, + CVA6Cfg.NrExecuteRegionRules, + CVA6Cfg.ExecuteRegionAddrBase, + CVA6Cfg.ExecuteRegionLength, + CVA6Cfg.NrCachedRegionRules, + CVA6Cfg.CachedRegionAddrBase, + CVA6Cfg.CachedRegionLength, + CVA6Cfg.MaxOutstandingStores, + CVA6Cfg.DebugEn, + NonIdemPotenceEn, + CVA6Cfg.AxiBurstWriteEn + }; + + + // ------------------------------------------ + // Global Signals + // Signals connecting more than one module + // ------------------------------------------ + riscv::priv_lvl_t priv_lvl; + exception_t ex_commit; // exception from commit stage + bp_resolve_t resolved_branch; + logic [ riscv::VLEN-1:0] pc_commit; + logic eret; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] commit_ack; + + localparam NumPorts = 4; + cvxif_pkg::cvxif_req_t cvxif_req; + cvxif_pkg::cvxif_resp_t cvxif_resp; + + // -------------- + // PCGEN <-> CSR + // -------------- + logic [riscv::VLEN-1:0] trap_vector_base_commit_pcgen; + logic [riscv::VLEN-1:0] epc_commit_pcgen; + // -------------- + // IF <-> ID + // -------------- + fetch_entry_t fetch_entry_if_id; + logic fetch_valid_if_id; + logic fetch_ready_id_if; + + // -------------- + // ID <-> ISSUE + // -------------- + scoreboard_entry_t issue_entry_id_issue; + logic issue_entry_valid_id_issue; + logic is_ctrl_fow_id_issue; + logic issue_instr_issue_id; + + // -------------- + // ISSUE <-> EX + // -------------- + logic [riscv::VLEN-1:0] rs1_forwarding_id_ex; // unregistered version of fu_data_o.operanda + logic [riscv::VLEN-1:0] rs2_forwarding_id_ex; // unregistered version of fu_data_o.operandb + + fu_data_t fu_data_id_ex; + logic [riscv::VLEN-1:0] pc_id_ex; + logic is_compressed_instr_id_ex; + // fixed latency units + logic flu_ready_ex_id; + logic [TRANS_ID_BITS-1:0] flu_trans_id_ex_id; + logic flu_valid_ex_id; + riscv::xlen_t flu_result_ex_id; + exception_t flu_exception_ex_id; + // ALU + logic alu_valid_id_ex; + // Branches and Jumps + logic branch_valid_id_ex; + + branchpredict_sbe_t branch_predict_id_ex; + logic resolve_branch_ex_id; + // LSU + logic lsu_valid_id_ex; + logic lsu_ready_ex_id; + + logic [TRANS_ID_BITS-1:0] load_trans_id_ex_id; + riscv::xlen_t load_result_ex_id; + logic load_valid_ex_id; + exception_t load_exception_ex_id; + + riscv::xlen_t store_result_ex_id; + logic [TRANS_ID_BITS-1:0] store_trans_id_ex_id; + logic store_valid_ex_id; + exception_t store_exception_ex_id; + // MULT + logic mult_valid_id_ex; + // FPU + logic fpu_ready_ex_id; + logic fpu_valid_id_ex; + logic [1:0] fpu_fmt_id_ex; + logic [2:0] fpu_rm_id_ex; + logic [TRANS_ID_BITS-1:0] fpu_trans_id_ex_id; + riscv::xlen_t fpu_result_ex_id; + logic fpu_valid_ex_id; + exception_t fpu_exception_ex_id; + // Accelerator + logic stall_acc_id; + scoreboard_entry_t issue_instr_id_acc; + logic issue_instr_hs_id_acc; + logic [TRANS_ID_BITS-1:0] acc_trans_id_ex_id; + riscv::xlen_t acc_result_ex_id; + logic acc_valid_ex_id; + exception_t acc_exception_ex_id; + logic halt_acc_ctrl; + logic [4:0] acc_resp_fflags; + logic acc_resp_fflags_valid; + // CSR + logic csr_valid_id_ex; + // CVXIF + logic [TRANS_ID_BITS-1:0] x_trans_id_ex_id; + riscv::xlen_t x_result_ex_id; + logic x_valid_ex_id; + exception_t x_exception_ex_id; + logic x_we_ex_id; + logic x_issue_valid_id_ex; + logic x_issue_ready_ex_id; + logic [31:0] x_off_instr_id_ex; + // -------------- + // EX <-> COMMIT + // -------------- + // CSR Commit + logic csr_commit_commit_ex; + logic dirty_fp_state; + logic dirty_v_state; + // LSU Commit + logic lsu_commit_commit_ex; + logic lsu_commit_ready_ex_commit; + logic [TRANS_ID_BITS-1:0] lsu_commit_trans_id; + logic stall_st_pending_ex; + logic no_st_pending_ex; + logic no_st_pending_commit; + logic amo_valid_commit; + // ACCEL Commit + logic acc_valid_acc_ex; + // -------------- + // ID <-> COMMIT + // -------------- + scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0] commit_instr_id_commit; + // -------------- + // RVFI + // -------------- + logic [TRANS_ID_BITS-1:0] rvfi_issue_pointer; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] rvfi_commit_pointer; + // -------------- + // COMMIT <-> ID + // -------------- + logic [CVA6ExtendCfg.NrCommitPorts-1:0][4:0] waddr_commit_id; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_commit_id; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] we_gpr_commit_id; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] we_fpr_commit_id; + // -------------- + // CSR <-> * + // -------------- + logic [4:0] fflags_csr_commit; + riscv::xs_t fs; + logic [2:0] frm_csr_id_issue_ex; + logic [6:0] fprec_csr_ex; + riscv::xs_t vs; + logic enable_translation_csr_ex; + logic en_ld_st_translation_csr_ex; + riscv::priv_lvl_t ld_st_priv_lvl_csr_ex; + logic sum_csr_ex; + logic mxr_csr_ex; + logic [riscv::PPNW-1:0] satp_ppn_csr_ex; + logic [ASID_WIDTH-1:0] asid_csr_ex; + logic [11:0] csr_addr_ex_csr; + fu_op csr_op_commit_csr; + riscv::xlen_t csr_wdata_commit_csr; + riscv::xlen_t csr_rdata_csr_commit; + exception_t csr_exception_csr_commit; + logic tvm_csr_id; + logic tw_csr_id; + logic tsr_csr_id; + irq_ctrl_t irq_ctrl_csr_id; + logic dcache_en_csr_nbdcache; + logic csr_write_fflags_commit_cs; + logic icache_en_csr; + logic acc_cons_en_csr; + logic debug_mode; + logic single_step_csr_commit; + riscv::pmpcfg_t [15:0] pmpcfg; + logic [15:0][riscv::PLEN-3:0] pmpaddr; + logic [31:0] mcountinhibit_csr_perf; + // ---------------------------- + // Performance Counters <-> * + // ---------------------------- + logic [11:0] addr_csr_perf; + riscv::xlen_t data_csr_perf, data_perf_csr; + logic we_csr_perf; + + logic icache_flush_ctrl_cache; + logic itlb_miss_ex_perf; + logic dtlb_miss_ex_perf; + logic dcache_miss_cache_perf; + logic icache_miss_cache_perf; + logic [ NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits; + logic stall_issue; + // -------------- + // CTRL <-> * + // -------------- + logic set_pc_ctrl_pcgen; + logic flush_csr_ctrl; + logic flush_unissued_instr_ctrl_id; + logic flush_ctrl_if; + logic flush_ctrl_id; + logic flush_ctrl_ex; + logic flush_ctrl_bp; + logic flush_tlb_ctrl_ex; + logic fence_i_commit_controller; + logic fence_commit_controller; + logic sfence_vma_commit_controller; + logic halt_ctrl; + logic halt_csr_ctrl; + logic dcache_flush_ctrl_cache; + logic dcache_flush_ack_cache_ctrl; + logic set_debug_pc; + logic flush_commit; + logic flush_acc; + + icache_areq_t icache_areq_ex_cache; + icache_arsp_t icache_areq_cache_ex; + icache_dreq_t icache_dreq_if_cache; + icache_drsp_t icache_dreq_cache_if; + + amo_req_t amo_req; + amo_resp_t amo_resp; + logic sb_full; + + // ---------------- + // DCache <-> * + // ---------------- + dcache_req_i_t [ 2:0] dcache_req_ports_ex_cache; + dcache_req_o_t [ 2:0] dcache_req_ports_cache_ex; + dcache_req_i_t [ 1:0] dcache_req_ports_acc_cache; + dcache_req_o_t [ 1:0] dcache_req_ports_cache_acc; + logic dcache_commit_wbuffer_empty; + logic dcache_commit_wbuffer_not_ni; + + //RVFI + lsu_ctrl_t rvfi_lsu_ctrl; + logic [riscv::PLEN-1:0] rvfi_mem_paddr; + logic rvfi_is_compressed; + rvfi_probes_t rvfi_probes; + + + // Accelerator port + logic [ 63:0] inval_addr; + logic inval_valid; + logic inval_ready; + + // -------------- + // Frontend + // -------------- + frontend #( + .CVA6Cfg(CVA6ExtendCfg) + ) i_frontend ( + .flush_i (flush_ctrl_if), // not entirely correct + .flush_bp_i (1'b0), + .halt_i (halt_ctrl), + .debug_mode_i (debug_mode), + .boot_addr_i (boot_addr_i[riscv::VLEN-1:0]), + .icache_dreq_i (icache_dreq_cache_if), + .icache_dreq_o (icache_dreq_if_cache), + .resolved_branch_i (resolved_branch), + .pc_commit_i (pc_commit), + .set_pc_commit_i (set_pc_ctrl_pcgen), + .set_debug_pc_i (set_debug_pc), + .epc_i (epc_commit_pcgen), + .eret_i (eret), + .trap_vector_base_i (trap_vector_base_commit_pcgen), + .ex_valid_i (ex_commit.valid), + .fetch_entry_o (fetch_entry_if_id), + .fetch_entry_valid_o(fetch_valid_if_id), + .fetch_entry_ready_i(fetch_ready_id_if), + .* + ); + + // --------- + // ID + // --------- + id_stage #( + .CVA6Cfg(CVA6ExtendCfg) + ) id_stage_i ( + .clk_i, + .rst_ni, + .flush_i(flush_ctrl_if), + .debug_req_i, + + .fetch_entry_i (fetch_entry_if_id), + .fetch_entry_valid_i(fetch_valid_if_id), + .fetch_entry_ready_o(fetch_ready_id_if), + + .issue_entry_o (issue_entry_id_issue), + .issue_entry_valid_o(issue_entry_valid_id_issue), + .is_ctrl_flow_o (is_ctrl_fow_id_issue), + .issue_instr_ack_i (issue_instr_issue_id), + + .rvfi_is_compressed_o(rvfi_is_compressed), + + .priv_lvl_i (priv_lvl), + .fs_i (fs), + .frm_i (frm_csr_id_issue_ex), + .vs_i (vs), + .irq_i (irq_i), + .irq_ctrl_i (irq_ctrl_csr_id), + .debug_mode_i(debug_mode), + .tvm_i (tvm_csr_id), + .tw_i (tw_csr_id), + .tsr_i (tsr_csr_id) + ); + + logic [NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_ex_id; + logic [NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_ex_id; + exception_t [NrWbPorts-1:0] ex_ex_ex_id; // exception from execute, ex_stage to id_stage + logic [NrWbPorts-1:0] wt_valid_ex_id; + + if (CVA6ExtendCfg.CvxifEn) begin + assign trans_id_ex_id = { + x_trans_id_ex_id, + flu_trans_id_ex_id, + load_trans_id_ex_id, + store_trans_id_ex_id, + fpu_trans_id_ex_id + }; + assign wbdata_ex_id = { + x_result_ex_id, flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id + }; + assign ex_ex_ex_id = { + x_exception_ex_id, + flu_exception_ex_id, + load_exception_ex_id, + store_exception_ex_id, + fpu_exception_ex_id + }; + assign wt_valid_ex_id = { + x_valid_ex_id, flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id + }; + end else if (CVA6ExtendCfg.EnableAccelerator) begin + assign trans_id_ex_id = { + flu_trans_id_ex_id, + load_trans_id_ex_id, + store_trans_id_ex_id, + fpu_trans_id_ex_id, + acc_trans_id_ex_id + }; + assign wbdata_ex_id = { + flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id, acc_result_ex_id + }; + assign ex_ex_ex_id = { + flu_exception_ex_id, + load_exception_ex_id, + store_exception_ex_id, + fpu_exception_ex_id, + acc_exception_ex_id + }; + assign wt_valid_ex_id = { + flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id, acc_valid_ex_id + }; + end else begin + assign trans_id_ex_id = { + flu_trans_id_ex_id, load_trans_id_ex_id, store_trans_id_ex_id, fpu_trans_id_ex_id + }; + assign wbdata_ex_id = { + flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id + }; + assign ex_ex_ex_id = { + flu_exception_ex_id, load_exception_ex_id, store_exception_ex_id, fpu_exception_ex_id + }; + assign wt_valid_ex_id = {flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id}; + end + + if (CVA6ExtendCfg.CvxifEn && CVA6ExtendCfg.EnableAccelerator) begin : gen_err_xif_and_acc + $error("X-interface and accelerator port cannot be enabled at the same time."); + end + + // --------- + // Issue + // --------- + issue_stage #( + .CVA6Cfg(CVA6ExtendCfg) + ) issue_stage_i ( + .clk_i, + .rst_ni, + .sb_full_o (sb_full), + .flush_unissued_instr_i(flush_unissued_instr_ctrl_id), + .flush_i (flush_ctrl_id), + .stall_i (stall_acc_id), + // ID Stage + .decoded_instr_i (issue_entry_id_issue), + .decoded_instr_valid_i (issue_entry_valid_id_issue), + .is_ctrl_flow_i (is_ctrl_fow_id_issue), + .decoded_instr_ack_o (issue_instr_issue_id), + // Functional Units + .rs1_forwarding_o (rs1_forwarding_id_ex), + .rs2_forwarding_o (rs2_forwarding_id_ex), + .fu_data_o (fu_data_id_ex), + .pc_o (pc_id_ex), + .is_compressed_instr_o (is_compressed_instr_id_ex), + // fixed latency unit ready + .flu_ready_i (flu_ready_ex_id), + // ALU + .alu_valid_o (alu_valid_id_ex), + // Branches and Jumps + .branch_valid_o (branch_valid_id_ex), // branch is valid + .branch_predict_o (branch_predict_id_ex), // branch predict to ex + .resolve_branch_i (resolve_branch_ex_id), // in order to resolve the branch + // LSU + .lsu_ready_i (lsu_ready_ex_id), + .lsu_valid_o (lsu_valid_id_ex), + // Multiplier + .mult_valid_o (mult_valid_id_ex), + // FPU + .fpu_ready_i (fpu_ready_ex_id), + .fpu_valid_o (fpu_valid_id_ex), + .fpu_fmt_o (fpu_fmt_id_ex), + .fpu_rm_o (fpu_rm_id_ex), + // CSR + .csr_valid_o (csr_valid_id_ex), + // CVXIF + .x_issue_valid_o (x_issue_valid_id_ex), + .x_issue_ready_i (x_issue_ready_ex_id), + .x_off_instr_o (x_off_instr_id_ex), + // Accelerator + .issue_instr_o (issue_instr_id_acc), + .issue_instr_hs_o (issue_instr_hs_id_acc), + // Commit + .resolved_branch_i (resolved_branch), + .trans_id_i (trans_id_ex_id), + .wbdata_i (wbdata_ex_id), + .ex_ex_i (ex_ex_ex_id), + .wt_valid_i (wt_valid_ex_id), + .x_we_i (x_we_ex_id), + + .waddr_i (waddr_commit_id), + .wdata_i (wdata_commit_id), + .we_gpr_i (we_gpr_commit_id), + .we_fpr_i (we_fpr_commit_id), + .commit_instr_o (commit_instr_id_commit), + .commit_ack_i (commit_ack), + // Performance Counters + .stall_issue_o (stall_issue), + //RVFI + .rvfi_issue_pointer_o (rvfi_issue_pointer), + .rvfi_commit_pointer_o(rvfi_commit_pointer), + .* + ); + + // --------- + // EX + // --------- + ex_stage #( + .CVA6Cfg (CVA6ExtendCfg), + .ASID_WIDTH(ASID_WIDTH) + ) ex_stage_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .debug_mode_i (debug_mode), + .flush_i (flush_ctrl_ex), + .rs1_forwarding_i (rs1_forwarding_id_ex), + .rs2_forwarding_i (rs2_forwarding_id_ex), + .fu_data_i (fu_data_id_ex), + .pc_i (pc_id_ex), + .is_compressed_instr_i(is_compressed_instr_id_ex), + // fixed latency units + .flu_result_o (flu_result_ex_id), + .flu_trans_id_o (flu_trans_id_ex_id), + .flu_valid_o (flu_valid_ex_id), + .flu_exception_o (flu_exception_ex_id), + .flu_ready_o (flu_ready_ex_id), + // ALU + .alu_valid_i (alu_valid_id_ex), + // Branches and Jumps + .branch_valid_i (branch_valid_id_ex), + .branch_predict_i (branch_predict_id_ex), // branch predict to ex + .resolved_branch_o (resolved_branch), + .resolve_branch_o (resolve_branch_ex_id), + // CSR + .csr_valid_i (csr_valid_id_ex), + .csr_addr_o (csr_addr_ex_csr), + .csr_commit_i (csr_commit_commit_ex), // from commit + // MULT + .mult_valid_i (mult_valid_id_ex), + // LSU + .lsu_ready_o (lsu_ready_ex_id), + .lsu_valid_i (lsu_valid_id_ex), + + .load_result_o (load_result_ex_id), + .load_trans_id_o (load_trans_id_ex_id), + .load_valid_o (load_valid_ex_id), + .load_exception_o(load_exception_ex_id), + + .store_result_o (store_result_ex_id), + .store_trans_id_o (store_trans_id_ex_id), + .store_valid_o (store_valid_ex_id), + .store_exception_o(store_exception_ex_id), + + .lsu_commit_i (lsu_commit_commit_ex), // from commit + .lsu_commit_ready_o (lsu_commit_ready_ex_commit), // to commit + .commit_tran_id_i (lsu_commit_trans_id), // from commit + .stall_st_pending_i (stall_st_pending_ex), + .no_st_pending_o (no_st_pending_ex), + // FPU + .fpu_ready_o (fpu_ready_ex_id), + .fpu_valid_i (fpu_valid_id_ex), + .fpu_fmt_i (fpu_fmt_id_ex), + .fpu_rm_i (fpu_rm_id_ex), + .fpu_frm_i (frm_csr_id_issue_ex), + .fpu_prec_i (fprec_csr_ex), + .fpu_trans_id_o (fpu_trans_id_ex_id), + .fpu_result_o (fpu_result_ex_id), + .fpu_valid_o (fpu_valid_ex_id), + .fpu_exception_o (fpu_exception_ex_id), + .amo_valid_commit_i (amo_valid_commit), + .amo_req_o (amo_req), + .amo_resp_i (amo_resp), + // CoreV-X-Interface + .x_valid_i (x_issue_valid_id_ex), + .x_ready_o (x_issue_ready_ex_id), + .x_off_instr_i (x_off_instr_id_ex), + .x_trans_id_o (x_trans_id_ex_id), + .x_exception_o (x_exception_ex_id), + .x_result_o (x_result_ex_id), + .x_valid_o (x_valid_ex_id), + .x_we_o (x_we_ex_id), + .cvxif_req_o (cvxif_req), + .cvxif_resp_i (cvxif_resp), + // Accelerator + .acc_valid_i (acc_valid_acc_ex), + // Performance counters + .itlb_miss_o (itlb_miss_ex_perf), + .dtlb_miss_o (dtlb_miss_ex_perf), + // Memory Management + .enable_translation_i (enable_translation_csr_ex), // from CSR + .en_ld_st_translation_i (en_ld_st_translation_csr_ex), + .flush_tlb_i (flush_tlb_ctrl_ex), + .priv_lvl_i (priv_lvl), // from CSR + .ld_st_priv_lvl_i (ld_st_priv_lvl_csr_ex), // from CSR + .sum_i (sum_csr_ex), // from CSR + .mxr_i (mxr_csr_ex), // from CSR + .satp_ppn_i (satp_ppn_csr_ex), // from CSR + .asid_i (asid_csr_ex), // from CSR + .icache_areq_i (icache_areq_cache_ex), + .icache_areq_o (icache_areq_ex_cache), + // DCACHE interfaces + .dcache_req_ports_i (dcache_req_ports_cache_ex), + .dcache_req_ports_o (dcache_req_ports_ex_cache), + .dcache_wbuffer_empty_i (dcache_commit_wbuffer_empty), + .dcache_wbuffer_not_ni_i(dcache_commit_wbuffer_not_ni), + // PMP + .pmpcfg_i (pmpcfg), + .pmpaddr_i (pmpaddr), + //RVFI + .rvfi_lsu_ctrl_o (rvfi_lsu_ctrl), + .rvfi_mem_paddr_o (rvfi_mem_paddr) + ); + + // --------- + // Commit + // --------- + + // we have to make sure that the whole write buffer path is empty before + // used e.g. for fence instructions. + assign no_st_pending_commit = no_st_pending_ex & dcache_commit_wbuffer_empty; + + commit_stage #( + .CVA6Cfg(CVA6ExtendCfg) + ) commit_stage_i ( + .clk_i, + .rst_ni, + .halt_i (halt_ctrl), + .flush_dcache_i (dcache_flush_ctrl_cache), + .exception_o (ex_commit), + .dirty_fp_state_o (dirty_fp_state), + .single_step_i (single_step_csr_commit), + .commit_instr_i (commit_instr_id_commit), + .commit_ack_o (commit_ack), + .no_st_pending_i (no_st_pending_commit), + .waddr_o (waddr_commit_id), + .wdata_o (wdata_commit_id), + .we_gpr_o (we_gpr_commit_id), + .we_fpr_o (we_fpr_commit_id), + .commit_lsu_o (lsu_commit_commit_ex), + .commit_lsu_ready_i(lsu_commit_ready_ex_commit), + .commit_tran_id_o (lsu_commit_trans_id), + .amo_valid_commit_o(amo_valid_commit), + .amo_resp_i (amo_resp), + .commit_csr_o (csr_commit_commit_ex), + .pc_o (pc_commit), + .csr_op_o (csr_op_commit_csr), + .csr_wdata_o (csr_wdata_commit_csr), + .csr_rdata_i (csr_rdata_csr_commit), + .csr_write_fflags_o(csr_write_fflags_commit_cs), + .csr_exception_i (csr_exception_csr_commit), + .fence_i_o (fence_i_commit_controller), + .fence_o (fence_commit_controller), + .sfence_vma_o (sfence_vma_commit_controller), + .flush_commit_o (flush_commit), + .* + ); + + // --------- + // CSR + // --------- + csr_regfile #( + .CVA6Cfg (CVA6ExtendCfg), + .AsidWidth (ASID_WIDTH), + .MHPMCounterNum(MHPMCounterNum) + ) csr_regfile_i ( + .flush_o (flush_csr_ctrl), + .halt_csr_o (halt_csr_ctrl), + .commit_instr_i (commit_instr_id_commit), + .commit_ack_i (commit_ack), + .boot_addr_i (boot_addr_i[riscv::VLEN-1:0]), + .hart_id_i (hart_id_i[riscv::XLEN-1:0]), + .ex_i (ex_commit), + .csr_op_i (csr_op_commit_csr), + .csr_write_fflags_i (csr_write_fflags_commit_cs), + .dirty_fp_state_i (dirty_fp_state), + .dirty_v_state_i (dirty_v_state), + .csr_addr_i (csr_addr_ex_csr), + .csr_wdata_i (csr_wdata_commit_csr), + .csr_rdata_o (csr_rdata_csr_commit), + .pc_i (pc_commit), + .csr_exception_o (csr_exception_csr_commit), + .epc_o (epc_commit_pcgen), + .eret_o (eret), + .set_debug_pc_o (set_debug_pc), + .trap_vector_base_o (trap_vector_base_commit_pcgen), + .priv_lvl_o (priv_lvl), + .acc_fflags_ex_i (acc_resp_fflags), + .acc_fflags_ex_valid_i (acc_resp_fflags_valid), + .fs_o (fs), + .fflags_o (fflags_csr_commit), + .frm_o (frm_csr_id_issue_ex), + .fprec_o (fprec_csr_ex), + .vs_o (vs), + .irq_ctrl_o (irq_ctrl_csr_id), + .ld_st_priv_lvl_o (ld_st_priv_lvl_csr_ex), + .en_translation_o (enable_translation_csr_ex), + .en_ld_st_translation_o(en_ld_st_translation_csr_ex), + .sum_o (sum_csr_ex), + .mxr_o (mxr_csr_ex), + .satp_ppn_o (satp_ppn_csr_ex), + .asid_o (asid_csr_ex), + .tvm_o (tvm_csr_id), + .tw_o (tw_csr_id), + .tsr_o (tsr_csr_id), + .debug_mode_o (debug_mode), + .single_step_o (single_step_csr_commit), + .dcache_en_o (dcache_en_csr_nbdcache), + .icache_en_o (icache_en_csr), + .acc_cons_en_o (acc_cons_en_csr), + .perf_addr_o (addr_csr_perf), + .perf_data_o (data_csr_perf), + .perf_data_i (data_perf_csr), + .perf_we_o (we_csr_perf), + .pmpcfg_o (pmpcfg), + .pmpaddr_o (pmpaddr), + .mcountinhibit_o (mcountinhibit_csr_perf), + .debug_req_i, + .ipi_i, + .irq_i, + .time_irq_i, + .* + ); + + // ------------------------ + // Performance Counters + // ------------------------ + if (PERF_COUNTER_EN) begin : gen_perf_counter + perf_counters #( + .CVA6Cfg (CVA6ExtendCfg), + .NumPorts(NumPorts) + ) perf_counters_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .debug_mode_i (debug_mode), + .addr_i (addr_csr_perf), + .we_i (we_csr_perf), + .data_i (data_csr_perf), + .data_o (data_perf_csr), + .commit_instr_i(commit_instr_id_commit), + .commit_ack_i (commit_ack), + + .l1_icache_miss_i (icache_miss_cache_perf), + .l1_dcache_miss_i (dcache_miss_cache_perf), + .itlb_miss_i (itlb_miss_ex_perf), + .dtlb_miss_i (dtlb_miss_ex_perf), + .sb_full_i (sb_full), + .if_empty_i (~fetch_valid_if_id), + .ex_i (ex_commit), + .eret_i (eret), + .resolved_branch_i (resolved_branch), + .branch_exceptions_i(flu_exception_ex_id), + .l1_icache_access_i (icache_dreq_if_cache), + .l1_dcache_access_i (dcache_req_ports_ex_cache), + .miss_vld_bits_i (miss_vld_bits), + .i_tlb_flush_i (flush_tlb_ctrl_ex), + .stall_issue_i (stall_issue), + .mcountinhibit_i (mcountinhibit_csr_perf) + ); + end : gen_perf_counter + else begin : gen_no_perf_counter + assign data_perf_csr = '0; + end : gen_no_perf_counter + + // ------------ + // Controller + // ------------ + controller #( + .CVA6Cfg(CVA6ExtendCfg) + ) controller_i ( + // flush ports + .set_pc_commit_o (set_pc_ctrl_pcgen), + .flush_unissued_instr_o(flush_unissued_instr_ctrl_id), + .flush_if_o (flush_ctrl_if), + .flush_id_o (flush_ctrl_id), + .flush_ex_o (flush_ctrl_ex), + .flush_bp_o (flush_ctrl_bp), + .flush_tlb_o (flush_tlb_ctrl_ex), + .flush_dcache_o (dcache_flush_ctrl_cache), + .flush_dcache_ack_i (dcache_flush_ack_cache_ctrl), + + .halt_csr_i (halt_csr_ctrl), + .halt_acc_i (halt_acc_ctrl), + .halt_o (halt_ctrl), + // control ports + .eret_i (eret), + .ex_valid_i (ex_commit.valid), + .set_debug_pc_i (set_debug_pc), + .flush_csr_i (flush_csr_ctrl), + .resolved_branch_i(resolved_branch), + .fence_i_i (fence_i_commit_controller), + .fence_i (fence_commit_controller), + .sfence_vma_i (sfence_vma_commit_controller), + .flush_commit_i (flush_commit), + .flush_acc_i (flush_acc), + + .flush_icache_o(icache_flush_ctrl_cache), + .* + ); + + // ------------------- + // Cache Subsystem + // ------------------- + + // Acc dispatcher and store buffer share a dcache request port. + // Store buffer always has priority access over acc dipsatcher. + dcache_req_i_t [NumPorts-1:0] dcache_req_to_cache; + dcache_req_o_t [NumPorts-1:0] dcache_req_from_cache; + + // D$ request + assign dcache_req_to_cache[0] = dcache_req_ports_ex_cache[0]; + assign dcache_req_to_cache[1] = dcache_req_ports_ex_cache[1]; + assign dcache_req_to_cache[2] = dcache_req_ports_acc_cache[0]; + assign dcache_req_to_cache[3] = dcache_req_ports_ex_cache[2].data_req ? dcache_req_ports_ex_cache [2] : + dcache_req_ports_acc_cache[1]; + + // D$ response + assign dcache_req_ports_cache_ex[0] = dcache_req_from_cache[0]; + assign dcache_req_ports_cache_ex[1] = dcache_req_from_cache[1]; + assign dcache_req_ports_cache_acc[0] = dcache_req_from_cache[2]; + always_comb begin : gen_dcache_req_store_data_gnt + dcache_req_ports_cache_ex[2] = dcache_req_from_cache[3]; + dcache_req_ports_cache_acc[1] = dcache_req_from_cache[3]; + + // Set gnt signal + dcache_req_ports_cache_ex[2].data_gnt &= dcache_req_ports_ex_cache[2].data_req; + dcache_req_ports_cache_acc[1].data_gnt &= !dcache_req_ports_ex_cache[2].data_req; + end + + if (DCACHE_TYPE == int'(config_pkg::WT)) begin : gen_cache_wt + // this is a cache subsystem that is compatible with OpenPiton + wt_cache_subsystem #( + .CVA6Cfg (CVA6ExtendCfg), + .NumPorts (NumPorts), + .noc_req_t (noc_req_t), + .noc_resp_t(noc_resp_t) + ) i_cache_subsystem ( + // to D$ + .clk_i (clk_i), + .rst_ni (rst_ni), + // I$ + .icache_en_i (icache_en_csr), + .icache_flush_i (icache_flush_ctrl_cache), + .icache_miss_o (icache_miss_cache_perf), + .icache_areq_i (icache_areq_ex_cache), + .icache_areq_o (icache_areq_cache_ex), + .icache_dreq_i (icache_dreq_if_cache), + .icache_dreq_o (icache_dreq_cache_if), + // D$ + .dcache_enable_i (dcache_en_csr_nbdcache), + .dcache_flush_i (dcache_flush_ctrl_cache), + .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl), + // to commit stage + .dcache_amo_req_i (amo_req), + .dcache_amo_resp_o (amo_resp), + // from PTW, Load Unit and Store Unit + .dcache_miss_o (dcache_miss_cache_perf), + .miss_vld_bits_o (miss_vld_bits), + .dcache_req_ports_i(dcache_req_to_cache), + .dcache_req_ports_o(dcache_req_from_cache), + // write buffer status + .wbuffer_empty_o (dcache_commit_wbuffer_empty), + .wbuffer_not_ni_o (dcache_commit_wbuffer_not_ni), + // memory side + .noc_req_o (noc_req_o), + .noc_resp_i (noc_resp_i), + .inval_addr_i (inval_addr), + .inval_valid_i (inval_valid), + .inval_ready_o (inval_ready) + ); + end else if (DCACHE_TYPE == int'(config_pkg::HPDCACHE)) begin : gen_cache_hpd + cva6_hpdcache_subsystem #( + .CVA6Cfg (CVA6ExtendCfg), + .NumPorts (NumPorts), + .noc_req_t (noc_req_t), + .noc_resp_t(noc_resp_t), + .cmo_req_t (logic /*FIXME*/), + .cmo_rsp_t (logic /*FIXME*/) + ) i_cache_subsystem ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + .icache_en_i (icache_en_csr), + .icache_flush_i(icache_flush_ctrl_cache), + .icache_miss_o (icache_miss_cache_perf), + .icache_areq_i (icache_areq_ex_cache), + .icache_areq_o (icache_areq_cache_ex), + .icache_dreq_i (icache_dreq_if_cache), + .icache_dreq_o (icache_dreq_cache_if), + + .dcache_enable_i (dcache_en_csr_nbdcache), + .dcache_flush_i (dcache_flush_ctrl_cache), + .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl), + .dcache_miss_o (dcache_miss_cache_perf), + + .dcache_amo_req_i (amo_req), + .dcache_amo_resp_o(amo_resp), + + .dcache_cmo_req_i ('0 /*FIXME*/), + .dcache_cmo_resp_o( /*FIXME*/), + + .dcache_req_ports_i(dcache_req_to_cache), + .dcache_req_ports_o(dcache_req_from_cache), + + .wbuffer_empty_o (dcache_commit_wbuffer_empty), + .wbuffer_not_ni_o(dcache_commit_wbuffer_not_ni), + + .hwpf_base_set_i ('0 /*FIXME*/), + .hwpf_base_i ('0 /*FIXME*/), + .hwpf_base_o ( /*FIXME*/), + .hwpf_param_set_i ('0 /*FIXME*/), + .hwpf_param_i ('0 /*FIXME*/), + .hwpf_param_o ( /*FIXME*/), + .hwpf_throttle_set_i('0 /*FIXME*/), + .hwpf_throttle_i ('0 /*FIXME*/), + .hwpf_throttle_o ( /*FIXME*/), + .hwpf_status_o ( /*FIXME*/), + + .noc_req_o (noc_req_o), + .noc_resp_i(noc_resp_i) + ); + assign inval_ready = 1'b1; + end else begin : gen_cache_wb + std_cache_subsystem #( + // note: this only works with one cacheable region + // not as important since this cache subsystem is about to be + // deprecated + .CVA6Cfg (CVA6ExtendCfg), + .NumPorts (NumPorts), + .axi_ar_chan_t(axi_ar_chan_t), + .axi_aw_chan_t(axi_aw_chan_t), + .axi_w_chan_t (axi_w_chan_t), + .axi_req_t (noc_req_t), + .axi_rsp_t (noc_resp_t) + ) i_cache_subsystem ( + // to D$ + .clk_i (clk_i), + .rst_ni (rst_ni), + .priv_lvl_i (priv_lvl), + // I$ + .icache_en_i (icache_en_csr), + .icache_flush_i (icache_flush_ctrl_cache), + .icache_miss_o (icache_miss_cache_perf), + .icache_areq_i (icache_areq_ex_cache), + .icache_areq_o (icache_areq_cache_ex), + .icache_dreq_i (icache_dreq_if_cache), + .icache_dreq_o (icache_dreq_cache_if), + // D$ + .dcache_enable_i (dcache_en_csr_nbdcache), + .dcache_flush_i (dcache_flush_ctrl_cache), + .dcache_flush_ack_o(dcache_flush_ack_cache_ctrl), + // to commit stage + .amo_req_i (amo_req), + .amo_resp_o (amo_resp), + .dcache_miss_o (dcache_miss_cache_perf), + // this is statically set to 1 as the std_cache does not have a wbuffer + .wbuffer_empty_o (dcache_commit_wbuffer_empty), + // from PTW, Load Unit and Store Unit + .dcache_req_ports_i(dcache_req_to_cache), + .dcache_req_ports_o(dcache_req_from_cache), + // memory side + .axi_req_o (noc_req_o), + .axi_resp_i (noc_resp_i) + ); + assign dcache_commit_wbuffer_not_ni = 1'b1; + assign inval_ready = 1'b1; + end + + // ---------------- + // Accelerator + // ---------------- + + if (CVA6ExtendCfg.EnableAccelerator) begin : gen_accelerator + acc_dispatcher #( + .CVA6Cfg (CVA6ExtendCfg), + .acc_cfg_t (acc_cfg_t), + .AccCfg (AccCfg), + .acc_req_t (cvxif_req_t), + .acc_resp_t(cvxif_resp_t) + ) i_acc_dispatcher ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_unissued_instr_i(flush_unissued_instr_ctrl_id), + .flush_ex_i (flush_ctrl_ex), + .flush_pipeline_o (flush_acc), + .acc_cons_en_i (acc_cons_en_csr), + .acc_fflags_valid_o (acc_resp_fflags_valid), + .acc_fflags_o (acc_resp_fflags), + .ld_st_priv_lvl_i (ld_st_priv_lvl_csr_ex), + .sum_i (sum_csr_ex), + .pmpcfg_i (pmpcfg), + .pmpaddr_i (pmpaddr), + .fcsr_frm_i (frm_csr_id_issue_ex), + .dirty_v_state_o (dirty_v_state), + .issue_instr_i (issue_instr_id_acc), + .issue_instr_hs_i (issue_instr_hs_id_acc), + .issue_stall_o (stall_acc_id), + .fu_data_i (fu_data_id_ex), + .commit_instr_i (commit_instr_id_commit), + .commit_st_barrier_i (fence_i_commit_controller | fence_commit_controller), + .acc_trans_id_o (acc_trans_id_ex_id), + .acc_result_o (acc_result_ex_id), + .acc_valid_o (acc_valid_ex_id), + .acc_exception_o (acc_exception_ex_id), + .acc_valid_ex_o (acc_valid_acc_ex), + .commit_ack_i (commit_ack), + .acc_stall_st_pending_o(stall_st_pending_ex), + .acc_no_st_pending_i (no_st_pending_commit), + .dcache_req_ports_i (dcache_req_ports_ex_cache), + .ctrl_halt_o (halt_acc_ctrl), + .acc_dcache_req_ports_o(dcache_req_ports_acc_cache), + .acc_dcache_req_ports_i(dcache_req_ports_cache_acc), + .inval_ready_i (inval_ready), + .inval_valid_o (inval_valid), + .inval_addr_o (inval_addr), + .acc_req_o (cvxif_req_o), + .acc_resp_i (cvxif_resp_i) + ); + end : gen_accelerator + else begin : gen_no_accelerator + assign acc_trans_id_ex_id = '0; + assign acc_result_ex_id = '0; + assign acc_valid_ex_id = '0; + assign acc_exception_ex_id = '0; + assign acc_resp_fflags = '0; + assign acc_resp_fflags_valid = '0; + assign stall_acc_id = '0; + assign dirty_v_state = '0; + assign acc_valid_acc_ex = '0; + assign halt_acc_ctrl = '0; + assign stall_st_pending_ex = '0; + assign flush_acc = '0; + + // D$ connection is unused + assign dcache_req_ports_acc_cache = '0; + + // No invalidation interface + assign inval_valid = '0; + assign inval_addr = '0; + + // Feed through cvxif + assign cvxif_req_o = cvxif_req; + assign cvxif_resp = cvxif_resp_i; + end : gen_no_accelerator + + // ------------------- + // Parameter Check + // ------------------- + // pragma translate_off +`ifndef VERILATOR + initial config_pkg::check_cfg(CVA6Cfg); +`endif + // pragma translate_on + + // ------------------- + // Instruction Tracer + // ------------------- + + //pragma translate_off +`ifdef PITON_ARIANE + localparam PC_QUEUE_DEPTH = 16; + + logic piton_pc_vld; + logic [ riscv::VLEN-1:0] piton_pc; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][riscv::VLEN-1:0] pc_data; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] pc_pop, pc_empty; + + for (genvar i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin : gen_pc_fifo + fifo_v3 #( + .DATA_WIDTH(64), + .DEPTH(PC_QUEUE_DEPTH) + ) i_pc_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i ('0), + .testmode_i('0), + .full_o (), + .empty_o (pc_empty[i]), + .usage_o (), + .data_i (commit_instr_id_commit[i].pc), + .push_i (commit_ack[i] & ~commit_instr_id_commit[i].ex.valid), + .data_o (pc_data[i]), + .pop_i (pc_pop[i]) + ); + end + + rr_arb_tree #( + .NumIn(CVA6ExtendCfg.NrCommitPorts), + .DataWidth(64) + ) i_rr_arb_tree ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i('0), + .rr_i ('0), + .req_i (~pc_empty), + .gnt_o (pc_pop), + .data_i (pc_data), + .gnt_i (piton_pc_vld), + .req_o (piton_pc_vld), + .data_o (piton_pc), + .idx_o () + ); +`endif // PITON_ARIANE + +`ifndef VERILATOR + instr_tracer_if tracer_if (clk_i); + // assign instruction tracer interface + // control signals + assign tracer_if.rstn = rst_ni; + assign tracer_if.flush_unissued = flush_unissued_instr_ctrl_id; + assign tracer_if.flush = flush_ctrl_ex; + // fetch + assign tracer_if.instruction = id_stage_i.fetch_entry_i.instruction; + assign tracer_if.fetch_valid = id_stage_i.fetch_entry_valid_i; + assign tracer_if.fetch_ack = id_stage_i.fetch_entry_ready_o; + // Issue + assign tracer_if.issue_ack = issue_stage_i.i_scoreboard.issue_ack_i; + assign tracer_if.issue_sbe = issue_stage_i.i_scoreboard.issue_instr_o; + // write-back + assign tracer_if.waddr = waddr_commit_id; + assign tracer_if.wdata = wdata_commit_id; + assign tracer_if.we_gpr = we_gpr_commit_id; + assign tracer_if.we_fpr = we_fpr_commit_id; + // commit + assign tracer_if.commit_instr = commit_instr_id_commit; + assign tracer_if.commit_ack = commit_ack; + // branch predict + assign tracer_if.resolve_branch = resolved_branch; + // address translation + // stores + assign tracer_if.st_valid = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.valid_i; + assign tracer_if.st_paddr = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.paddr_i; + // loads + assign tracer_if.ld_valid = ex_stage_i.lsu_i.i_load_unit.req_port_o.tag_valid; + assign tracer_if.ld_kill = ex_stage_i.lsu_i.i_load_unit.req_port_o.kill_req; + assign tracer_if.ld_paddr = ex_stage_i.lsu_i.i_load_unit.paddr_i; + // exceptions + assign tracer_if.exception = commit_stage_i.exception_o; + // assign current privilege level + assign tracer_if.priv_lvl = priv_lvl; + assign tracer_if.debug_mode = debug_mode; + + instr_tracer instr_tracer_i ( + .tracer_if(tracer_if), + .hart_id_i + ); + + // mock tracer for Verilator, to be used with spike-dasm +`else + + int f; + logic [63:0] cycles; + + initial begin + string fn; + $sformat(fn, "trace_hart_%0.0f.dasm", hart_id_i); + f = $fopen(fn, "w"); + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + cycles <= 0; + end else begin + byte mode = ""; + if (CVA6Cfg.DebugEn && debug_mode) mode = "D"; + else begin + case (priv_lvl) + riscv::PRIV_LVL_M: mode = "M"; + riscv::PRIV_LVL_S: if (CVA6Cfg.RVS) mode = "S"; + riscv::PRIV_LVL_U: mode = "U"; + default: ; // Do nothing + endcase + end + for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin + if (commit_ack[i] && !commit_instr_id_commit[i].ex.valid) begin + $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, mode, + commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]); + end else if (commit_ack[i] && commit_instr_id_commit[i].ex.valid) begin + if (commit_instr_id_commit[i].ex.cause == 2) begin + $fwrite(f, "Exception Cause: Illegal Instructions, DASM(%h) PC=%h\n", + commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].pc); + end else begin + if (CVA6Cfg.DebugEn && debug_mode) begin + $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, + mode, commit_instr_id_commit[i].ex.tval[31:0], + commit_instr_id_commit[i].ex.tval[31:0]); + end else begin + $fwrite(f, "Exception Cause: %5d, DASM(%h) PC=%h\n", + commit_instr_id_commit[i].ex.cause, commit_instr_id_commit[i].ex.tval[31:0], + commit_instr_id_commit[i].pc); + end + end + end + end + cycles <= cycles + 1; + end + end + + final begin + $fclose(f); + end +`endif // VERILATOR + //pragma translate_on + + + if (IsRVFI) begin + + cva6_rvfi_probes #( + .CVA6Cfg (CVA6ExtendCfg), + .rvfi_probes_t(rvfi_probes_t) + ) i_cva6_rvfi_combi ( + + .flush_i (flush_ctrl_if), + .issue_instr_ack_i (issue_instr_issue_id), + .fetch_entry_valid_i(fetch_valid_if_id), + .instruction_i (fetch_entry_if_id.instruction), + .is_compressed_i (rvfi_is_compressed), + + .issue_pointer_i (rvfi_issue_pointer), + .commit_pointer_i(rvfi_commit_pointer), + + .flush_unissued_instr_i(flush_unissued_instr_ctrl_id), + .decoded_instr_valid_i (issue_entry_valid_id_issue), + .decoded_instr_ack_i (issue_instr_issue_id), + + .rs1_forwarding_i(rs1_forwarding_id_ex), + .rs2_forwarding_i(rs2_forwarding_id_ex), + + .commit_instr_i(commit_instr_id_commit), + .ex_commit_i (ex_commit), + .priv_lvl_i (priv_lvl), + + .lsu_ctrl_i (rvfi_lsu_ctrl), + .wbdata_i (wbdata_ex_id), + .commit_ack_i(commit_ack), + .mem_paddr_i (rvfi_mem_paddr), + .debug_mode_i(debug_mode), + .wdata_i (wdata_commit_id), + + .rvfi_probes_o(rvfi_probes_o) + + ); + + end //IsRVFI + +endmodule // ariane diff --git a/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv b/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv new file mode 100644 index 00000000..20ffb1e9 --- /dev/null +++ b/test/type_param/core/cva6_accel_first_pass_decoder_stub.sv @@ -0,0 +1,31 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Author: Nils Wistoff + +// Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's +// first pass decoder. + +module cva6_accel_first_pass_decoder + import ariane_pkg::*; +( + input logic [31:0] instruction_i, // instruction from IF + input riscv::xs_t fs_i, // floating point extension status + input riscv::xs_t vs_i, // vector extension status + output logic is_accel_o, // is an accelerator instruction + output scoreboard_entry_t instruction_o, // predecoded instruction + output logic illegal_instr_o, // is an illegal instruction + output logic is_control_flow_instr_o // is a control flow instruction +); + + assign is_accel_o = 1'b0; + assign instruction_o = '0; + assign illegal_instr_o = 1'b0; + assign is_control_flow_instr_o = 1'b0; + + $error("cva6_accel_first_pass_decoder: instantiated non-functional module stub.\ + Please replace this with your accelerator's first pass decoder \ + (or unset ENABLE_ACCELERATOR)."); + +endmodule : cva6_accel_first_pass_decoder diff --git a/test/type_param/core/cva6_rvfi.sv b/test/type_param/core/cva6_rvfi.sv new file mode 100644 index 00000000..972a50a8 --- /dev/null +++ b/test/type_param/core/cva6_rvfi.sv @@ -0,0 +1,294 @@ +// Copyright 2024 Thales DIS France SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Yannick Casamatta - Thales +// Date: 09/01/2024 + + +module cva6_rvfi + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rvfi_instr_t = logic, + parameter type rvfi_probes_t = logic +) ( + + input logic clk_i, + input logic rst_ni, + + input rvfi_probes_t rvfi_probes_i, + output rvfi_instr_t [CVA6Cfg.NrCommitPorts-1:0] rvfi_o + +); + + // ------------------------------------------ + // CVA6 configuration + // ------------------------------------------ + // Extended config + localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn; + localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn; + localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8; + localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec; // Are non-standard extensions present? + localparam int unsigned FLen = RVD ? 64 : // D ext. + RVF ? 32 : // F ext. + CVA6Cfg.XF16 ? 16 : // Xf16 ext. + CVA6Cfg.XF16ALT ? 16 : // Xf16alt ext. + CVA6Cfg.XF8 ? 8 : // Xf8 ext. + 1; // Unused in case of no FP + + // Transprecision floating-point extensions configuration + localparam bit RVFVec = RVF & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled + localparam bit XF16Vec = CVA6Cfg.XF16 & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled + localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled + localparam bit XF8Vec = CVA6Cfg.XF8 & CVA6Cfg.XFVec & FLen>8; // FP8 vectors available if vectors and larger fmt enabled + + localparam bit EnableAccelerator = CVA6Cfg.RVV; // Currently only used by V extension (Ara) + localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4; + + localparam NrRgprPorts = 2; + + localparam bit NonIdemPotenceEn = CVA6Cfg.NrNonIdempotentRules && CVA6Cfg.NonIdempotentLength; // Currently only used by V extension (Ara) + + localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = { + CVA6Cfg.NrCommitPorts, + CVA6Cfg.AxiAddrWidth, + CVA6Cfg.AxiDataWidth, + CVA6Cfg.AxiIdWidth, + CVA6Cfg.AxiUserWidth, + CVA6Cfg.NrLoadBufEntries, + CVA6Cfg.FpuEn, + CVA6Cfg.XF16, + CVA6Cfg.XF16ALT, + CVA6Cfg.XF8, + CVA6Cfg.RVA, + CVA6Cfg.RVB, + CVA6Cfg.RVV, + CVA6Cfg.RVC, + CVA6Cfg.RVZCB, + CVA6Cfg.XFVec, + CVA6Cfg.CvxifEn, + CVA6Cfg.ZiCondExtEn, + // Extended + bit'(RVF), + bit'(RVD), + bit'(FpPresent), + bit'(NSX), + unsigned'(FLen), + bit'(RVFVec), + bit'(XF16Vec), + bit'(XF16ALTVec), + bit'(XF8Vec), + unsigned'(NrRgprPorts), + unsigned'(NrWbPorts), + bit'(EnableAccelerator), + CVA6Cfg.RVS, + CVA6Cfg.RVU, + CVA6Cfg.HaltAddress, + CVA6Cfg.ExceptionAddress, + CVA6Cfg.RASDepth, + CVA6Cfg.BTBEntries, + CVA6Cfg.BHTEntries, + CVA6Cfg.DmBaseAddress, + CVA6Cfg.NrPMPEntries, + CVA6Cfg.PMPCfgRstVal, + CVA6Cfg.PMPAddrRstVal, + CVA6Cfg.PMPEntryReadOnly, + CVA6Cfg.NOCType, + CVA6Cfg.NrNonIdempotentRules, + CVA6Cfg.NonIdempotentAddrBase, + CVA6Cfg.NonIdempotentLength, + CVA6Cfg.NrExecuteRegionRules, + CVA6Cfg.ExecuteRegionAddrBase, + CVA6Cfg.ExecuteRegionLength, + CVA6Cfg.NrCachedRegionRules, + CVA6Cfg.CachedRegionAddrBase, + CVA6Cfg.CachedRegionLength, + CVA6Cfg.MaxOutstandingStores, + CVA6Cfg.DebugEn, + NonIdemPotenceEn, + CVA6Cfg.AxiBurstWriteEn + }; + + logic flush; + logic issue_instr_ack; + logic fetch_entry_valid; + logic [ 31:0] instruction; + logic is_compressed; + + logic [ TRANS_ID_BITS-1:0] issue_pointer; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer; + + logic flush_unissued_instr; + logic decoded_instr_valid; + logic decoded_instr_ack; + + riscv::xlen_t rs1_forwarding; + riscv::xlen_t rs2_forwarding; + + scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0] commit_instr; + exception_t ex_commit; + riscv::priv_lvl_t priv_lvl; + + lsu_ctrl_t lsu_ctrl; + logic [ CVA6ExtendCfg.NrWbPorts-1:0][ riscv::XLEN-1:0] wbdata; + logic [CVA6ExtendCfg.NrCommitPorts-1:0] commit_ack; + logic [ riscv::PLEN-1:0] mem_paddr; + logic debug_mode; + logic [CVA6ExtendCfg.NrCommitPorts-1:0][ riscv::XLEN-1:0] wdata; + + logic [ riscv::VLEN-1:0] lsu_addr; + logic [ (riscv::XLEN/8)-1:0] lsu_rmask; + logic [ (riscv::XLEN/8)-1:0] lsu_wmask; + logic [ TRANS_ID_BITS-1:0] lsu_addr_trans_id; + + assign flush = rvfi_probes_i.flush; + assign issue_instr_ack = rvfi_probes_i.issue_instr_ack; + assign fetch_entry_valid = rvfi_probes_i.fetch_entry_valid; + assign instruction = rvfi_probes_i.instruction; + assign is_compressed = rvfi_probes_i.is_compressed; + + assign issue_pointer = rvfi_probes_i.issue_pointer; + assign commit_pointer = rvfi_probes_i.commit_pointer; + + assign flush_unissued_instr = rvfi_probes_i.flush_unissued_instr; + assign decoded_instr_valid = rvfi_probes_i.decoded_instr_valid; + assign decoded_instr_ack = rvfi_probes_i.decoded_instr_ack; + + assign rs1_forwarding = rvfi_probes_i.rs1_forwarding; + assign rs2_forwarding = rvfi_probes_i.rs2_forwarding; + + assign commit_instr = rvfi_probes_i.commit_instr; + assign ex_commit = rvfi_probes_i.ex_commit; + assign priv_lvl = rvfi_probes_i.priv_lvl; + + assign lsu_ctrl = rvfi_probes_i.lsu_ctrl; + assign wbdata = rvfi_probes_i.wbdata; + assign commit_ack = rvfi_probes_i.commit_ack; + assign mem_paddr = rvfi_probes_i.mem_paddr; + assign debug_mode = rvfi_probes_i.debug_mode; + assign wdata = rvfi_probes_i.wdata; + + assign lsu_addr = lsu_ctrl.vaddr; + assign lsu_rmask = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0; + assign lsu_wmask = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0; + assign lsu_addr_trans_id = lsu_ctrl.trans_id; + + + //ID STAGE + + typedef struct packed { + logic valid; + logic [31:0] instr; + } issue_struct_t; + issue_struct_t issue_n, issue_q; + + always_comb begin + issue_n = issue_q; + + if (issue_instr_ack) issue_n.valid = 1'b0; + + if ((!issue_q.valid || issue_instr_ack) && fetch_entry_valid) begin + issue_n.valid = 1'b1; + issue_n.instr = (is_compressed) ? {{16{1'b0}}, instruction[15:0]} : instruction; + end + + if (flush) issue_n.valid = 1'b0; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + issue_q <= '0; + end else begin + issue_q <= issue_n; + end + end + + //ISSUE STAGE + + // this is the FIFO struct of the issue queue + typedef struct packed { + riscv::xlen_t rs1_rdata; + riscv::xlen_t rs2_rdata; + logic [riscv::VLEN-1:0] lsu_addr; + logic [(riscv::XLEN/8)-1:0] lsu_rmask; + logic [(riscv::XLEN/8)-1:0] lsu_wmask; + riscv::xlen_t lsu_wdata; + logic [31:0] instr; + } sb_mem_t; + sb_mem_t [NR_SB_ENTRIES-1:0] mem_q, mem_n; + + always_comb begin : issue_fifo + mem_n = mem_q; + + if (decoded_instr_valid && decoded_instr_ack && !flush_unissued_instr) begin + mem_n[issue_pointer] = '{ + rs1_rdata: rs1_forwarding, + rs2_rdata: rs2_forwarding, + lsu_addr: '0, + lsu_rmask: '0, + lsu_wmask: '0, + lsu_wdata: '0, + instr: issue_q.instr + }; + end + + if (lsu_rmask != 0) begin + mem_n[lsu_addr_trans_id].lsu_addr = lsu_addr; + mem_n[lsu_addr_trans_id].lsu_rmask = lsu_rmask; + end else if (lsu_wmask != 0) begin + mem_n[lsu_addr_trans_id].lsu_addr = lsu_addr; + mem_n[lsu_addr_trans_id].lsu_wmask = lsu_wmask; + mem_n[lsu_addr_trans_id].lsu_wdata = wbdata[1]; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : regs + if (!rst_ni) begin + mem_q <= '{default: sb_mem_t'(0)}; + end else begin + mem_q <= mem_n; + end + end + + //---------------------------------------------------------------------------------------------------------- + // PACK + //---------------------------------------------------------------------------------------------------------- + + always_comb begin + for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin + logic exception; + exception = commit_instr[i].valid && ex_commit.valid; + rvfi_o[i].valid = (commit_ack[i] && !ex_commit.valid) || + (exception && (ex_commit.cause == riscv::ENV_CALL_MMODE || + ex_commit.cause == riscv::ENV_CALL_SMODE || + ex_commit.cause == riscv::ENV_CALL_UMODE)); + rvfi_o[i].insn = mem_q[commit_pointer[i]].instr; + // when trap, the instruction is not executed + rvfi_o[i].trap = exception; + rvfi_o[i].cause = ex_commit.cause; + rvfi_o[i].mode = (CVA6ExtendCfg.DebugEn && debug_mode) ? 2'b10 : priv_lvl; + rvfi_o[i].ixl = riscv::XLEN == 64 ? 2 : 1; + rvfi_o[i].rs1_addr = commit_instr[i].rs1[4:0]; + rvfi_o[i].rs2_addr = commit_instr[i].rs2[4:0]; + rvfi_o[i].rd_addr = commit_instr[i].rd[4:0]; + rvfi_o[i].rd_wdata = (CVA6ExtendCfg.FpPresent && is_rd_fpr(commit_instr[i].op)) ? + commit_instr[i].result : wdata[i]; + rvfi_o[i].pc_rdata = commit_instr[i].pc; + rvfi_o[i].mem_addr = mem_q[commit_pointer[i]].lsu_addr; + // So far, only write paddr is reported. TODO: read paddr + rvfi_o[i].mem_paddr = mem_paddr; + rvfi_o[i].mem_wmask = mem_q[commit_pointer[i]].lsu_wmask; + rvfi_o[i].mem_wdata = mem_q[commit_pointer[i]].lsu_wdata; + rvfi_o[i].mem_rmask = mem_q[commit_pointer[i]].lsu_rmask; + rvfi_o[i].mem_rdata = commit_instr[i].result; + rvfi_o[i].rs1_rdata = mem_q[commit_pointer[i]].rs1_rdata; + rvfi_o[i].rs2_rdata = mem_q[commit_pointer[i]].rs2_rdata; + end + end + + +endmodule diff --git a/test/type_param/core/cva6_rvfi_probes.sv b/test/type_param/core/cva6_rvfi_probes.sv new file mode 100644 index 00000000..81d2c5df --- /dev/null +++ b/test/type_param/core/cva6_rvfi_probes.sv @@ -0,0 +1,81 @@ +// Copyright 2024 Thales DIS France SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Yannick Casamatta - Thales +// Date: 09/01/2024 + + +module cva6_rvfi_probes + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rvfi_probes_t = logic +) ( + + input logic flush_i, + input logic issue_instr_ack_i, + input logic fetch_entry_valid_i, + input logic [31:0] instruction_i, + input logic is_compressed_i, + + input logic [TRANS_ID_BITS-1:0] issue_pointer_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] commit_pointer_i, + + input logic flush_unissued_instr_i, + input logic decoded_instr_valid_i, + input logic decoded_instr_ack_i, + + input riscv::xlen_t rs1_forwarding_i, + input riscv::xlen_t rs2_forwarding_i, + + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, + input exception_t ex_commit_i, + input riscv::priv_lvl_t priv_lvl_i, + + input lsu_ctrl_t lsu_ctrl_i, + input logic [ CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + input logic [ riscv::PLEN-1:0] mem_paddr_i, + input logic debug_mode_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, + output rvfi_probes_t rvfi_probes_o +); + + always_comb begin + rvfi_probes_o = '0; + + rvfi_probes_o.flush = flush_i; + rvfi_probes_o.issue_instr_ack = issue_instr_ack_i; + rvfi_probes_o.fetch_entry_valid = fetch_entry_valid_i; + rvfi_probes_o.instruction = instruction_i; + rvfi_probes_o.is_compressed = is_compressed_i; + + rvfi_probes_o.issue_pointer = issue_pointer_i; + rvfi_probes_o.commit_pointer = commit_pointer_i; + + rvfi_probes_o.flush_unissued_instr = flush_unissued_instr_i; + rvfi_probes_o.decoded_instr_valid = decoded_instr_valid_i; + rvfi_probes_o.decoded_instr_ack = decoded_instr_ack_i; + + rvfi_probes_o.rs1_forwarding = rs1_forwarding_i; + rvfi_probes_o.rs2_forwarding = rs2_forwarding_i; + + rvfi_probes_o.commit_instr = commit_instr_i; + rvfi_probes_o.ex_commit = ex_commit_i; + rvfi_probes_o.priv_lvl = priv_lvl_i; + + rvfi_probes_o.lsu_ctrl = lsu_ctrl_i; + rvfi_probes_o.wbdata = wbdata_i; + rvfi_probes_o.commit_ack = commit_ack_i; + rvfi_probes_o.mem_paddr = mem_paddr_i; + rvfi_probes_o.debug_mode = debug_mode_i; + rvfi_probes_o.wdata = wdata_i; + + end + + +endmodule diff --git a/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv b/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv new file mode 100644 index 00000000..08e801c3 --- /dev/null +++ b/test/type_param/core/cvxif_example/cvxif_example_coprocessor.sv @@ -0,0 +1,155 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com) +// Example coprocessor adds rs1,rs2(,rs3) together and gives back the result to the CPU via the CoreV-X-Interface. +// Coprocessor delays the sending of the result depending on result least significant bits. + +module cvxif_example_coprocessor + import cvxif_pkg::*; + import cvxif_instr_pkg::*; +( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input cvxif_req_t cvxif_req_i, + output cvxif_resp_t cvxif_resp_o +); + + //Compressed interface + logic x_compressed_valid_i; + logic x_compressed_ready_o; + x_compressed_req_t x_compressed_req_i; + x_compressed_resp_t x_compressed_resp_o; + //Issue interface + logic x_issue_valid_i; + logic x_issue_ready_o; + x_issue_req_t x_issue_req_i; + x_issue_resp_t x_issue_resp_o; + //Commit interface + logic x_commit_valid_i; + x_commit_t x_commit_i; + //Memory interface + logic x_mem_valid_o; + logic x_mem_ready_i; + x_mem_req_t x_mem_req_o; + x_mem_resp_t x_mem_resp_i; + //Memory result interface + logic x_mem_result_valid_i; + x_mem_result_t x_mem_result_i; + //Result interface + logic x_result_valid_o; + logic x_result_ready_i; + x_result_t x_result_o; + + assign x_compressed_valid_i = cvxif_req_i.x_compressed_valid; + assign x_compressed_req_i = cvxif_req_i.x_compressed_req; + assign x_issue_valid_i = cvxif_req_i.x_issue_valid; + assign x_issue_req_i = cvxif_req_i.x_issue_req; + assign x_commit_valid_i = cvxif_req_i.x_commit_valid; + assign x_commit_i = cvxif_req_i.x_commit; + assign x_mem_ready_i = cvxif_req_i.x_mem_ready; + assign x_mem_resp_i = cvxif_req_i.x_mem_resp; + assign x_mem_result_valid_i = cvxif_req_i.x_mem_result_valid; + assign x_mem_result_i = cvxif_req_i.x_mem_result; + assign x_result_ready_i = cvxif_req_i.x_result_ready; + + assign cvxif_resp_o.x_compressed_ready = x_compressed_ready_o; + assign cvxif_resp_o.x_compressed_resp = x_compressed_resp_o; + assign cvxif_resp_o.x_issue_ready = x_issue_ready_o; + assign cvxif_resp_o.x_issue_resp = x_issue_resp_o; + assign cvxif_resp_o.x_mem_valid = x_mem_valid_o; + assign cvxif_resp_o.x_mem_req = x_mem_req_o; + assign cvxif_resp_o.x_result_valid = x_result_valid_o; + assign cvxif_resp_o.x_result = x_result_o; + + //Compressed interface + assign x_compressed_ready_o = '0; + assign x_compressed_resp_o.instr = '0; + assign x_compressed_resp_o.accept = '0; + + instr_decoder #( + .NbInstr (cvxif_instr_pkg::NbInstr), + .CoproInstr(cvxif_instr_pkg::CoproInstr) + ) instr_decoder_i ( + .clk_i (clk_i), + .x_issue_req_i (x_issue_req_i), + .x_issue_resp_o(x_issue_resp_o) + ); + + typedef struct packed { + x_issue_req_t req; + x_issue_resp_t resp; + } x_issue_t; + + logic fifo_full, fifo_empty; + logic x_issue_ready_q; + logic instr_push, instr_pop; + x_issue_t req_i; + x_issue_t req_o; + + + + assign instr_push = x_issue_resp_o.accept ? 1 : 0; + assign instr_pop = (x_commit_i.x_commit_kill && x_commit_valid_i) || x_result_valid_o; + assign x_issue_ready_q = ~fifo_full; // if something is in the fifo, the instruction is being processed + // so we can't receive anything else + assign req_i.req = x_issue_req_i; + assign req_i.resp = x_issue_resp_o; + + always_ff @(posedge clk_i or negedge rst_ni) begin : regs + if (!rst_ni) begin + x_issue_ready_o <= 1; + end else begin + x_issue_ready_o <= x_issue_ready_q; + end + end + + fifo_v3 #( + .FALL_THROUGH(1), //data_o ready and pop in the same cycle + .DATA_WIDTH (64), + .DEPTH (8), + .dtype (x_issue_t) + ) fifo_commit_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .testmode_i(1'b0), + .full_o (fifo_full), + .empty_o (fifo_empty), + .usage_o (), + .data_i (req_i), + .push_i (instr_push), + .data_o (req_o), + .pop_i (instr_pop) + ); + + logic [3:0] c; + counter #( + .WIDTH(4) + ) counter_i ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clear_i (~x_commit_i.x_commit_kill && x_commit_valid_i), + .en_i (1'b1), + .load_i (), + .down_i (), + .d_i (), + .q_o (c), + .overflow_o() + ); + + always_comb begin + x_result_o.data = req_o.req.rs[0] + req_o.req.rs[1] + (X_NUM_RS == 3 ? req_o.req.rs[2] : 0); + x_result_valid_o = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0; + x_result_o.id = req_o.req.id; + x_result_o.rd = req_o.req.instr[11:7]; + x_result_o.we = req_o.resp.writeback & x_result_valid_o; + x_result_o.exc = 0; + x_result_o.exccode = 0; + end + +endmodule diff --git a/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv b/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv new file mode 100644 index 00000000..035cb048 --- /dev/null +++ b/test/type_param/core/cvxif_example/include/cvxif_instr_pkg.sv @@ -0,0 +1,47 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com) + +package cvxif_instr_pkg; + + typedef struct packed { + logic [31:0] instr; + logic [31:0] mask; + cvxif_pkg::x_issue_resp_t resp; + } copro_issue_resp_t; + + // 2 Possible RISCV instructions for Coprocessor + parameter int unsigned NbInstr = 2; + parameter copro_issue_resp_t CoproInstr[NbInstr] = '{ + '{ + instr: 32'b00000_00_00000_00000_0_00_00000_0101011, // custom1 opcode + mask: 32'b00000_00_00000_00000_0_00_00000_1111111, + resp : '{ + accept : 1'b1, + writeback : 1'b0, + dualwrite : 1'b0, + dualread : 1'b0, + loadstore : 1'b0, + exc : 1'b0 + } + }, + '{ + instr: 32'b00000_00_00000_00000_0_00_00000_1011011, // custom2 opcode + mask: 32'b00000_00_00000_00000_0_00_00000_1111111, + resp : '{ + accept : 1'b1, + writeback : 1'b1, + dualwrite : 1'b0, + dualread : 1'b0, + loadstore : 1'b0, + exc : 1'b0 + } + } + }; + +endpackage diff --git a/test/type_param/core/cvxif_example/instr_decoder.sv b/test/type_param/core/cvxif_example/instr_decoder.sv new file mode 100644 index 00000000..0cf1bdf3 --- /dev/null +++ b/test/type_param/core/cvxif_example/instr_decoder.sv @@ -0,0 +1,49 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com) + +module instr_decoder + import cvxif_pkg::*; +#( + parameter int NbInstr = 1, + parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0} +) ( + input logic clk_i, + input x_issue_req_t x_issue_req_i, + output x_issue_resp_t x_issue_resp_o +); + + logic [NbInstr-1:0] sel; + + for (genvar i = 0; i < NbInstr; i++) begin : gen_predecoder_selector + assign sel[i] = ((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr); + end + + always_comb begin + x_issue_resp_o.accept = '0; + x_issue_resp_o.writeback = '0; + x_issue_resp_o.dualwrite = '0; + x_issue_resp_o.dualread = '0; + x_issue_resp_o.loadstore = '0; + x_issue_resp_o.exc = '0; + for (int unsigned i = 0; i < NbInstr; i++) begin + if (sel[i]) begin + x_issue_resp_o.accept = CoproInstr[i].resp.accept; + x_issue_resp_o.writeback = CoproInstr[i].resp.writeback; + x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite; + x_issue_resp_o.dualread = CoproInstr[i].resp.dualread; + x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore; + x_issue_resp_o.exc = CoproInstr[i].resp.exc; + end + end + end + + assert property (@(posedge clk_i) $onehot0(sel)) + else $warning("This offloaded instruction is valid for multiple coprocessor instructions !"); + +endmodule diff --git a/test/type_param/core/cvxif_fu.sv b/test/type_param/core/cvxif_fu.sv new file mode 100644 index 00000000..fb0058b9 --- /dev/null +++ b/test/type_param/core/cvxif_fu.sv @@ -0,0 +1,112 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume CHAUVON (guillaume.chauvon@thalesgroup.com) + +// Functional Unit for the logic of the CoreV-X-Interface + + +module cvxif_fu + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input fu_data_t fu_data_i, + input riscv::priv_lvl_t priv_lvl_i, + //from issue + input logic x_valid_i, + output logic x_ready_o, + input logic [ 31:0] x_off_instr_i, + //to writeback + output logic [TRANS_ID_BITS-1:0] x_trans_id_o, + output exception_t x_exception_o, + output riscv::xlen_t x_result_o, + output logic x_valid_o, + output logic x_we_o, + //to coprocessor + output cvxif_pkg::cvxif_req_t cvxif_req_o, + input cvxif_pkg::cvxif_resp_t cvxif_resp_i +); + localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; + + logic illegal_n, illegal_q; + logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q; + logic [31:0] illegal_instr_n, illegal_instr_q; + logic [X_NUM_RS-1:0] rs_valid; + + if (cvxif_pkg::X_NUM_RS == 3) begin : gen_third_operand + assign rs_valid = 3'b111; + end else begin : gen_no_third_operand + assign rs_valid = 2'b11; + end + + always_comb begin + cvxif_req_o = '0; + cvxif_req_o.x_result_ready = 1'b1; + x_ready_o = cvxif_resp_i.x_issue_ready; + if (x_valid_i) begin + cvxif_req_o.x_issue_valid = x_valid_i; + cvxif_req_o.x_issue_req.instr = x_off_instr_i; + cvxif_req_o.x_issue_req.mode = priv_lvl_i; + cvxif_req_o.x_issue_req.id = fu_data_i.trans_id; + cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a; + cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b; + if (cvxif_pkg::X_NUM_RS == 3) begin + cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm; + end + cvxif_req_o.x_issue_req.rs_valid = rs_valid; + cvxif_req_o.x_commit_valid = x_valid_i; + cvxif_req_o.x_commit.id = fu_data_i.trans_id; + cvxif_req_o.x_commit.x_commit_kill = 1'b0; + end + end + + always_comb begin + illegal_n = illegal_q; + illegal_id_n = illegal_id_q; + illegal_instr_n = illegal_instr_q; + if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin + illegal_n = 1'b1; + illegal_id_n = cvxif_req_o.x_issue_req.id; + illegal_instr_n = cvxif_req_o.x_issue_req.instr; + end + x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled + x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0; + x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0; + x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0; + x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0; + x_exception_o.tval = '0; + x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0; + if (illegal_n) begin + if (~x_valid_o) begin + x_trans_id_o = illegal_id_n; + x_result_o = '0; + x_valid_o = 1'b1; + x_exception_o.cause = riscv::ILLEGAL_INSTR; + x_exception_o.valid = 1'b1; + x_exception_o.tval = illegal_instr_n; + x_we_o = '0; + illegal_n = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it. + end + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + illegal_q <= 1'b0; + illegal_id_q <= '0; + illegal_instr_q <= '0; + end else begin + illegal_q <= illegal_n; + illegal_id_q <= illegal_id_n; + illegal_instr_q <= illegal_instr_n; + end + end + +endmodule diff --git a/test/type_param/core/decoder.sv b/test/type_param/core/decoder.sv new file mode 100644 index 00000000..eead0c48 --- /dev/null +++ b/test/type_param/core/decoder.sv @@ -0,0 +1,1397 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// File: issue_read_operands.sv +// Author: Florian Zaruba +// Date: 8.4.2017 +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// Description: Issues instruction from the scoreboard and fetches the operands +// This also includes all the forwarding logic +// + +module decoder + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic debug_req_i, // external debug request + input logic [riscv::VLEN-1:0] pc_i, // PC from IF + input logic is_compressed_i, // is a compressed instruction + input logic [15:0] compressed_instr_i, // compressed form of instruction + input logic is_illegal_i, // illegal compressed instruction + input logic [31:0] instruction_i, // instruction from IF + input branchpredict_sbe_t branch_predict_i, + input exception_t ex_i, // if an exception occured in if + input logic [1:0] irq_i, // external interrupt + input irq_ctrl_t irq_ctrl_i, // interrupt control and status information from CSRs + // From CSR + input riscv::priv_lvl_t priv_lvl_i, // current privilege level + input logic debug_mode_i, // we are in debug mode + input riscv::xs_t fs_i, // floating point extension status + input logic [2:0] frm_i, // floating-point dynamic rounding mode + input riscv::xs_t vs_i, // vector extension status + input logic tvm_i, // trap virtual memory + input logic tw_i, // timeout wait + input logic tsr_i, // trap sret + output scoreboard_entry_t instruction_o, // scoreboard entry to scoreboard + output logic is_control_flow_instr_o // this instruction will change the control flow +); + logic illegal_instr; + logic illegal_instr_bm; + logic illegal_instr_zic; + logic illegal_instr_non_bm; + // this instruction is an environment call (ecall), it is handled like an exception + logic ecall; + // this instruction is a software break-point + logic ebreak; + // this instruction needs floating-point rounding-mode verification + logic check_fprm; + riscv::instruction_t instr; + assign instr = riscv::instruction_t'(instruction_i); + // -------------------- + // Immediate select + // -------------------- + enum logic [3:0] {NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM, RS3} imm_select; + + riscv::xlen_t imm_i_type; + riscv::xlen_t imm_s_type; + riscv::xlen_t imm_sb_type; + riscv::xlen_t imm_u_type; + riscv::xlen_t imm_uj_type; + riscv::xlen_t imm_bi_type; + + // --------------------------------------- + // Accelerator instructions' first-pass decoder + // --------------------------------------- + logic is_accel; + scoreboard_entry_t acc_instruction; + logic acc_illegal_instr; + logic acc_is_control_flow_instr; + + if (CVA6Cfg.EnableAccelerator) begin : gen_accel_decoder + // This module is responsible for a light-weight decoding of accelerator instructions, + // identifying them, but also whether they read/write scalar registers. + // Accelerators are supposed to define this module. + cva6_accel_first_pass_decoder i_accel_decoder ( + .instruction_i(instruction_i), + .fs_i(fs_i), + .vs_i(vs_i), + .is_accel_o(is_accel), + .instruction_o(acc_instruction), + .illegal_instr_o(acc_illegal_instr), + .is_control_flow_instr_o(acc_is_control_flow_instr) + ); + end : gen_accel_decoder + else begin + assign is_accel = 1'b0; + assign acc_instruction = '0; + assign acc_illegal_instr = 1'b1; // this should never propagate + assign acc_is_control_flow_instr = 1'b0; + end + + always_comb begin : decoder + + imm_select = NOIMM; + is_control_flow_instr_o = 1'b0; + illegal_instr = 1'b0; + illegal_instr_non_bm = 1'b0; + illegal_instr_bm = 1'b0; + illegal_instr_zic = 1'b0; + instruction_o.pc = pc_i; + instruction_o.trans_id = '0; + instruction_o.fu = NONE; + instruction_o.op = ariane_pkg::ADD; + instruction_o.rs1 = '0; + instruction_o.rs2 = '0; + instruction_o.rd = '0; + instruction_o.use_pc = 1'b0; + instruction_o.is_compressed = is_compressed_i; + instruction_o.use_zimm = 1'b0; + instruction_o.bp = branch_predict_i; + instruction_o.vfp = 1'b0; + ecall = 1'b0; + ebreak = 1'b0; + check_fprm = 1'b0; + + if (~ex_i.valid) begin + case (instr.rtype.opcode) + riscv::OpcodeSystem: begin + instruction_o.fu = CSR; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rs2[4:0] = instr.rtype.rs2; //TODO: needs to be checked if better way is available + instruction_o.rd[4:0] = instr.itype.rd; + + unique case (instr.itype.funct3) + 3'b000: begin + // check if the RD and and RS1 fields are zero, this may be reset for the SENCE.VMA instruction + if (instr.itype.rs1 != '0 || instr.itype.rd != '0) illegal_instr = 1'b1; + // decode the immiediate field + case (instr.itype.imm) + // ECALL -> inject exception + 12'b0: ecall = 1'b1; + // EBREAK -> inject exception + 12'b1: ebreak = 1'b1; + // SRET + 12'b1_0000_0010: begin + if (CVA6Cfg.RVS) begin + instruction_o.op = ariane_pkg::SRET; + // check privilege level, SRET can only be executed in S and M mode + // we'll just decode an illegal instruction if we are in the wrong privilege level + if (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U) begin + illegal_instr = 1'b1; + // do not change privilege level if this is an illegal instruction + instruction_o.op = ariane_pkg::ADD; + end + // if we are in S-Mode and Trap SRET (tsr) is set -> trap on illegal instruction + if (priv_lvl_i == riscv::PRIV_LVL_S && tsr_i) begin + illegal_instr = 1'b1; + // do not change privilege level if this is an illegal instruction + instruction_o.op = ariane_pkg::ADD; + end + end else begin + illegal_instr = 1'b1; + instruction_o.op = ariane_pkg::ADD; + end + end + // MRET + 12'b11_0000_0010: begin + instruction_o.op = ariane_pkg::MRET; + // check privilege level, MRET can only be executed in M mode + // otherwise we decode an illegal instruction + if ((CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S) || (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U)) + illegal_instr = 1'b1; + end + // DRET + 12'b111_1011_0010: begin + instruction_o.op = ariane_pkg::DRET; + if (CVA6Cfg.DebugEn) begin + // check that we are in debug mode when executing this instruction + illegal_instr = (!debug_mode_i) ? 1'b1 : illegal_instr; + end else begin + illegal_instr = 1'b1; + end + end + // WFI + 12'b1_0000_0101: begin + instruction_o.op = ariane_pkg::WFI; + // if timeout wait is set, trap on an illegal instruction in S Mode + // (after 0 cycles timeout) + if (CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S && tw_i) begin + illegal_instr = 1'b1; + instruction_o.op = ariane_pkg::ADD; + end + // we don't support U mode interrupts so WFI is illegal in this context + if (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U) begin + illegal_instr = 1'b1; + instruction_o.op = ariane_pkg::ADD; + end + end + // SFENCE.VMA + default: begin + if (instr.instr[31:25] == 7'b1001) begin + // check privilege level, SFENCE.VMA can only be executed in M/S mode + // otherwise decode an illegal instruction + illegal_instr = (((CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S) || ((!CVA6Cfg.RVS && !CVA6Cfg.RVU) || priv_lvl_i == riscv::PRIV_LVL_M)) && instr.itype.rd == '0) ? 1'b0 : 1'b1; + instruction_o.op = ariane_pkg::SFENCE_VMA; + // check TVM flag and intercept SFENCE.VMA call if necessary + if (CVA6Cfg.RVS && priv_lvl_i == riscv::PRIV_LVL_S && tvm_i) + illegal_instr = 1'b1; + end else begin + illegal_instr = 1'b1; + end + end + endcase + end + // atomically swaps values in the CSR and integer register + 3'b001: begin // CSRRW + imm_select = IIMM; + instruction_o.op = ariane_pkg::CSR_WRITE; + end + // atomically set values in the CSR and write back to rd + 3'b010: begin // CSRRS + imm_select = IIMM; + // this is just a read + if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ; + else instruction_o.op = ariane_pkg::CSR_SET; + end + // atomically clear values in the CSR and write back to rd + 3'b011: begin // CSRRC + imm_select = IIMM; + // this is just a read + if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ; + else instruction_o.op = ariane_pkg::CSR_CLEAR; + end + // use zimm and iimm + 3'b101: begin // CSRRWI + instruction_o.rs1[4:0] = instr.itype.rs1; + imm_select = IIMM; + instruction_o.use_zimm = 1'b1; + instruction_o.op = ariane_pkg::CSR_WRITE; + end + 3'b110: begin // CSRRSI + instruction_o.rs1[4:0] = instr.itype.rs1; + imm_select = IIMM; + instruction_o.use_zimm = 1'b1; + // this is just a read + if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ; + else instruction_o.op = ariane_pkg::CSR_SET; + end + 3'b111: begin // CSRRCI + instruction_o.rs1[4:0] = instr.itype.rs1; + imm_select = IIMM; + instruction_o.use_zimm = 1'b1; + // this is just a read + if (instr.itype.rs1 == 5'b0) instruction_o.op = ariane_pkg::CSR_READ; + else instruction_o.op = ariane_pkg::CSR_CLEAR; + end + default: illegal_instr = 1'b1; + endcase + end + // Memory ordering instructions + riscv::OpcodeMiscMem: begin + instruction_o.fu = CSR; + instruction_o.rs1 = '0; + instruction_o.rs2 = '0; + instruction_o.rd = '0; + + case (instr.stype.funct3) + // FENCE + // Currently implemented as a whole DCache flush boldly ignoring other things + 3'b000: instruction_o.op = ariane_pkg::FENCE; + // FENCE.I + 3'b001: instruction_o.op = ariane_pkg::FENCE_I; + + default: illegal_instr = 1'b1; + endcase + end + + // -------------------------- + // Reg-Reg Operations + // -------------------------- + riscv::OpcodeOp: begin + // -------------------------------------------- + // Vectorial Floating-Point Reg-Reg Operations + // -------------------------------------------- + if (instr.rvftype.funct2 == 2'b10) begin // Prefix 10 for all Xfvec ops + // only generate decoder if FP extensions are enabled (static) + if (CVA6Cfg.FpPresent && CVA6Cfg.XFVec && fs_i != riscv::Off) begin + automatic logic allow_replication; // control honoring of replication flag + + instruction_o.fu = FPU_VEC; // Same unit, but sets 'vectorial' signal + instruction_o.rs1[4:0] = instr.rvftype.rs1; + instruction_o.rs2[4:0] = instr.rvftype.rs2; + instruction_o.rd[4:0] = instr.rvftype.rd; + check_fprm = 1'b1; + allow_replication = 1'b1; + // decode vectorial FP instruction + unique case (instr.rvftype.vecfltop) + 5'b00001: begin + instruction_o.op = ariane_pkg::FADD; // vfadd.vfmt - Vectorial FP Addition + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2[4:0] = instr.rvftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00010: begin + instruction_o.op = ariane_pkg::FSUB; // vfsub.vfmt - Vectorial FP Subtraction + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2[4:0] = instr.rvftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00011: + instruction_o.op = ariane_pkg::FMUL; // vfmul.vfmt - Vectorial FP Multiplication + 5'b00100: + instruction_o.op = ariane_pkg::FDIV; // vfdiv.vfmt - Vectorial FP Division + 5'b00101: begin + instruction_o.op = ariane_pkg::VFMIN; // vfmin.vfmt - Vectorial FP Minimum + check_fprm = 1'b0; // rounding mode irrelevant + end + 5'b00110: begin + instruction_o.op = ariane_pkg::VFMAX; // vfmax.vfmt - Vectorial FP Maximum + check_fprm = 1'b0; // rounding mode irrelevant + end + 5'b00111: begin + instruction_o.op = ariane_pkg::FSQRT; // vfsqrt.vfmt - Vectorial FP Square Root + allow_replication = 1'b0; // only one operand + if (instr.rvftype.rs2 != 5'b00000) illegal_instr = 1'b1; // rs2 must be 0 + end + 5'b01000: begin + instruction_o.op = ariane_pkg::FMADD; // vfmac.vfmt - Vectorial FP Multiply-Accumulate + imm_select = SIMM; // rd into result field (upper bits don't matter) + end + 5'b01001: begin + instruction_o.op = ariane_pkg::FMSUB; // vfmre.vfmt - Vectorial FP Multiply-Reduce + imm_select = SIMM; // rd into result field (upper bits don't matter) + end + 5'b01100: begin + unique case (instr.rvftype.rs2) inside // operation encoded in rs2, `inside` for matching ? + 5'b00000: begin + instruction_o.rs2[4:0] = instr.rvftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + if (instr.rvftype.repl) + instruction_o.op = ariane_pkg::FMV_X2F; // vfmv.vfmt.x - GPR to FPR Move + else instruction_o.op = ariane_pkg::FMV_F2X; // vfmv.x.vfmt - FPR to GPR Move + check_fprm = 1'b0; // no rounding for moves + end + 5'b00001: begin + instruction_o.op = ariane_pkg::FCLASS; // vfclass.vfmt - Vectorial FP Classify + check_fprm = 1'b0; // no rounding for classification + allow_replication = 1'b0; // R must not be set + end + 5'b00010: + instruction_o.op = ariane_pkg::FCVT_F2I; // vfcvt.x.vfmt - Vectorial FP to Int Conversion + 5'b00011: + instruction_o.op = ariane_pkg::FCVT_I2F; // vfcvt.vfmt.x - Vectorial Int to FP Conversion + 5'b001??: begin + instruction_o.op = ariane_pkg::FCVT_F2F; // vfcvt.vfmt.vfmt - Vectorial FP to FP Conversion + instruction_o.rs2[4:0] = instr.rvftype.rd; // set rs2 = rd as target vector for conversion + imm_select = IIMM; // rs2 holds part of the intruction + // TODO CHECK R bit for valid fmt combinations + // determine source format + unique case (instr.rvftype.rs2[21:20]) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~CVA6Cfg.RVFVec) illegal_instr = 1'b1; + 2'b01: if (~CVA6Cfg.XF16ALTVec) illegal_instr = 1'b1; + 2'b10: if (~CVA6Cfg.XF16Vec) illegal_instr = 1'b1; + 2'b11: if (~CVA6Cfg.XF8Vec) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + default: illegal_instr = 1'b1; + endcase + end + 5'b01101: begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = ariane_pkg::VFSGNJ; // vfsgnj.vfmt - Vectorial FP Sign Injection + end + 5'b01110: begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = ariane_pkg::VFSGNJN; // vfsgnjn.vfmt - Vectorial FP Negated Sign Injection + end + 5'b01111: begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = ariane_pkg::VFSGNJX; // vfsgnjx.vfmt - Vectorial FP XORed Sign Injection + end + 5'b10000: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFEQ; // vfeq.vfmt - Vectorial FP Equality + end + 5'b10001: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFNE; // vfne.vfmt - Vectorial FP Non-Equality + end + 5'b10010: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFLT; // vfle.vfmt - Vectorial FP Less Than + end + 5'b10011: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFGE; // vfge.vfmt - Vectorial FP Greater or Equal + end + 5'b10100: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFLE; // vfle.vfmt - Vectorial FP Less or Equal + end + 5'b10101: begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = ariane_pkg::VFGT; // vfgt.vfmt - Vectorial FP Greater Than + end + 5'b11000: begin + instruction_o.op = ariane_pkg::VFCPKAB_S; // vfcpka/b.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, lowest 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~CVA6Cfg.RVF) + illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: begin + if (~CVA6Cfg.RVFVec) + illegal_instr = 1'b1; // destination vector not supported + if (instr.rvftype.repl) + illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32 + end + 2'b01: begin + if (~CVA6Cfg.XF16ALTVec) + illegal_instr = 1'b1; // destination vector not supported + end + 2'b10: begin + if (~CVA6Cfg.XF16Vec) + illegal_instr = 1'b1; // destination vector not supported + end + 2'b11: begin + if (~CVA6Cfg.XF8Vec) + illegal_instr = 1'b1; // destination vector not supported + end + default: illegal_instr = 1'b1; + endcase + end + 5'b11001: begin + instruction_o.op = ariane_pkg::VFCPKCD_S; // vfcpkc/d.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, second 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~CVA6Cfg.RVF) + illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32 + 2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT + 2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16 + 2'b11: begin + if (~CVA6Cfg.XF8Vec) + illegal_instr = 1'b1; // destination vector not supported + end + default: illegal_instr = 1'b1; + endcase + end + 5'b11010: begin + instruction_o.op = ariane_pkg::VFCPKAB_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, lowest 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~CVA6Cfg.RVD) + illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: begin + if (~CVA6Cfg.RVFVec) + illegal_instr = 1'b1; // destination vector not supported + if (instr.rvftype.repl) + illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32 + end + 2'b01: begin + if (~CVA6Cfg.XF16ALTVec) + illegal_instr = 1'b1; // destination vector not supported + end + 2'b10: begin + if (~CVA6Cfg.XF16Vec) + illegal_instr = 1'b1; // destination vector not supported + end + 2'b11: begin + if (~CVA6Cfg.XF8Vec) + illegal_instr = 1'b1; // destination vector not supported + end + default: illegal_instr = 1'b1; + endcase + end + 5'b11011: begin + instruction_o.op = ariane_pkg::VFCPKCD_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, second 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~CVA6Cfg.RVD) + illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32 + 2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT + 2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16 + 2'b11: begin + if (~CVA6Cfg.XF8Vec) + illegal_instr = 1'b1; // destination vector not supported + end + default: illegal_instr = 1'b1; + endcase + end + default: illegal_instr = 1'b1; + endcase + + // check format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~CVA6Cfg.RVFVec) illegal_instr = 1'b1; + 2'b01: if (~CVA6Cfg.XF16ALTVec) illegal_instr = 1'b1; + 2'b10: if (~CVA6Cfg.XF16Vec) illegal_instr = 1'b1; + 2'b11: if (~CVA6Cfg.XF8Vec) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check disallowed replication + if (~allow_replication & instr.rvftype.repl) illegal_instr = 1'b1; + + // check rounding mode + if (check_fprm) begin + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + + end else begin // No vectorial FP enabled (static) + illegal_instr = 1'b1; + end + + // --------------------------- + // Integer Reg-Reg Operations + // --------------------------- + end else begin + if (CVA6Cfg.RVB) begin + instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001 || ((instr.rtype.funct7 == 7'b000_0101) && !(instr.rtype.funct3[14]))) ? MULT : ALU; + end else begin + instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; + end + instruction_o.rs1[4:0] = instr.rtype.rs1; + instruction_o.rs2[4:0] = instr.rtype.rs2; + instruction_o.rd[4:0] = instr.rtype.rd; + + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + {7'b000_0000, 3'b000} : instruction_o.op = ariane_pkg::ADD; // Add + {7'b010_0000, 3'b000} : instruction_o.op = ariane_pkg::SUB; // Sub + {7'b000_0000, 3'b010} : instruction_o.op = ariane_pkg::SLTS; // Set Lower Than + { + 7'b000_0000, 3'b011 + } : + instruction_o.op = ariane_pkg::SLTU; // Set Lower Than Unsigned + {7'b000_0000, 3'b100} : instruction_o.op = ariane_pkg::XORL; // Xor + {7'b000_0000, 3'b110} : instruction_o.op = ariane_pkg::ORL; // Or + {7'b000_0000, 3'b111} : instruction_o.op = ariane_pkg::ANDL; // And + {7'b000_0000, 3'b001} : instruction_o.op = ariane_pkg::SLL; // Shift Left Logical + {7'b000_0000, 3'b101} : instruction_o.op = ariane_pkg::SRL; // Shift Right Logical + {7'b010_0000, 3'b101} : instruction_o.op = ariane_pkg::SRA; // Shift Right Arithmetic + // Multiplications + {7'b000_0001, 3'b000} : instruction_o.op = ariane_pkg::MUL; + {7'b000_0001, 3'b001} : instruction_o.op = ariane_pkg::MULH; + {7'b000_0001, 3'b010} : instruction_o.op = ariane_pkg::MULHSU; + {7'b000_0001, 3'b011} : instruction_o.op = ariane_pkg::MULHU; + {7'b000_0001, 3'b100} : instruction_o.op = ariane_pkg::DIV; + {7'b000_0001, 3'b101} : instruction_o.op = ariane_pkg::DIVU; + {7'b000_0001, 3'b110} : instruction_o.op = ariane_pkg::REM; + {7'b000_0001, 3'b111} : instruction_o.op = ariane_pkg::REMU; + default: begin + illegal_instr_non_bm = 1'b1; + end + endcase + if (CVA6Cfg.RVB) begin + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + //Logical with Negate + {7'b010_0000, 3'b111} : instruction_o.op = ariane_pkg::ANDN; // Andn + {7'b010_0000, 3'b110} : instruction_o.op = ariane_pkg::ORN; // Orn + {7'b010_0000, 3'b100} : instruction_o.op = ariane_pkg::XNOR; // Xnor + //Shift and Add (Bitmanip) + {7'b001_0000, 3'b010} : instruction_o.op = ariane_pkg::SH1ADD; // Sh1add + {7'b001_0000, 3'b100} : instruction_o.op = ariane_pkg::SH2ADD; // Sh2add + {7'b001_0000, 3'b110} : instruction_o.op = ariane_pkg::SH3ADD; // Sh3add + // Integer maximum/minimum + {7'b000_0101, 3'b110} : instruction_o.op = ariane_pkg::MAX; // max + {7'b000_0101, 3'b111} : instruction_o.op = ariane_pkg::MAXU; // maxu + {7'b000_0101, 3'b100} : instruction_o.op = ariane_pkg::MIN; // min + {7'b000_0101, 3'b101} : instruction_o.op = ariane_pkg::MINU; // minu + // Single bit instructions + {7'b010_0100, 3'b001} : instruction_o.op = ariane_pkg::BCLR; // bclr + {7'b010_0100, 3'b101} : instruction_o.op = ariane_pkg::BEXT; // bext + {7'b011_0100, 3'b001} : instruction_o.op = ariane_pkg::BINV; // binv + {7'b001_0100, 3'b001} : instruction_o.op = ariane_pkg::BSET; // bset + // Carry-Less-Multiplication (clmul, clmulh, clmulr) + {7'b000_0101, 3'b001} : instruction_o.op = ariane_pkg::CLMUL; // clmul + {7'b000_0101, 3'b011} : instruction_o.op = ariane_pkg::CLMULH; // clmulh + {7'b000_0101, 3'b010} : instruction_o.op = ariane_pkg::CLMULR; // clmulr + // Bitwise Shifting + {7'b011_0000, 3'b001} : instruction_o.op = ariane_pkg::ROL; // rol + {7'b011_0000, 3'b101} : instruction_o.op = ariane_pkg::ROR; // ror + // Zero Extend Op + {7'b000_0100, 3'b100} : instruction_o.op = ariane_pkg::ZEXTH; + default: begin + illegal_instr_bm = 1'b1; + end + endcase + end + if (CVA6Cfg.ZiCondExtEn) begin + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + //Conditional move + {7'b000_0111, 3'b101} : instruction_o.op = ariane_pkg::CZERO_EQZ; // czero.eqz + {7'b000_0111, 3'b111} : instruction_o.op = ariane_pkg::CZERO_NEZ; // czero.nez + default: begin + illegal_instr_zic = 1'b1; + end + endcase + end + //VCS coverage on + unique case ({ + CVA6Cfg.RVB, CVA6Cfg.ZiCondExtEn + }) + 2'b00: illegal_instr = illegal_instr_non_bm; + 2'b01: illegal_instr = illegal_instr_non_bm & illegal_instr_zic; + 2'b10: illegal_instr = illegal_instr_non_bm & illegal_instr_bm; + 2'b11: illegal_instr = illegal_instr_non_bm & illegal_instr_bm & illegal_instr_zic; + endcase + end + end + + // -------------------------- + // 32bit Reg-Reg Operations + // -------------------------- + riscv::OpcodeOp32: begin + instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; + instruction_o.rs1[4:0] = instr.rtype.rs1; + instruction_o.rs2[4:0] = instr.rtype.rs2; + instruction_o.rd[4:0] = instr.rtype.rd; + if (riscv::IS_XLEN64) begin + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + {7'b000_0000, 3'b000} : instruction_o.op = ariane_pkg::ADDW; // addw + {7'b010_0000, 3'b000} : instruction_o.op = ariane_pkg::SUBW; // subw + {7'b000_0000, 3'b001} : instruction_o.op = ariane_pkg::SLLW; // sllw + {7'b000_0000, 3'b101} : instruction_o.op = ariane_pkg::SRLW; // srlw + {7'b010_0000, 3'b101} : instruction_o.op = ariane_pkg::SRAW; // sraw + // Multiplications + {7'b000_0001, 3'b000} : instruction_o.op = ariane_pkg::MULW; + {7'b000_0001, 3'b100} : instruction_o.op = ariane_pkg::DIVW; + {7'b000_0001, 3'b101} : instruction_o.op = ariane_pkg::DIVUW; + {7'b000_0001, 3'b110} : instruction_o.op = ariane_pkg::REMW; + {7'b000_0001, 3'b111} : instruction_o.op = ariane_pkg::REMUW; + default: illegal_instr_non_bm = 1'b1; + endcase + if (CVA6Cfg.RVB) begin + unique case ({ + instr.rtype.funct7, instr.rtype.funct3 + }) + // Shift with Add (Unsigned Word) + {7'b001_0000, 3'b010}: instruction_o.op = ariane_pkg::SH1ADDUW; // sh1add.uw + {7'b001_0000, 3'b100}: instruction_o.op = ariane_pkg::SH2ADDUW; // sh2add.uw + {7'b001_0000, 3'b110}: instruction_o.op = ariane_pkg::SH3ADDUW; // sh3add.uw + // Unsigned word Op's + {7'b000_0100, 3'b000}: instruction_o.op = ariane_pkg::ADDUW; // add.uw + // Bitwise Shifting + {7'b011_0000, 3'b001}: instruction_o.op = ariane_pkg::ROLW; // rolw + {7'b011_0000, 3'b101}: instruction_o.op = ariane_pkg::RORW; // rorw + default: illegal_instr_bm = 1'b1; + endcase + illegal_instr = illegal_instr_non_bm & illegal_instr_bm; + end else begin + illegal_instr = illegal_instr_non_bm; + end + end else illegal_instr = 1'b1; + end + // -------------------------------- + // Reg-Immediate Operations + // -------------------------------- + riscv::OpcodeOpImm: begin + instruction_o.fu = ALU; + imm_select = IIMM; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rd[4:0] = instr.itype.rd; + unique case (instr.itype.funct3) + 3'b000: instruction_o.op = ariane_pkg::ADD; // Add Immediate + 3'b010: instruction_o.op = ariane_pkg::SLTS; // Set to one if Lower Than Immediate + 3'b011: + instruction_o.op = ariane_pkg::SLTU; // Set to one if Lower Than Immediate Unsigned + 3'b100: instruction_o.op = ariane_pkg::XORL; // Exclusive Or with Immediate + 3'b110: instruction_o.op = ariane_pkg::ORL; // Or with Immediate + 3'b111: instruction_o.op = ariane_pkg::ANDL; // And with Immediate + + 3'b001: begin + instruction_o.op = ariane_pkg::SLL; // Shift Left Logical by Immediate + if (instr.instr[31:26] != 6'b0) illegal_instr_non_bm = 1'b1; + if (instr.instr[25] != 1'b0 && riscv::XLEN == 32) illegal_instr_non_bm = 1'b1; + end + + 3'b101: begin + if (instr.instr[31:26] == 6'b0) + instruction_o.op = ariane_pkg::SRL; // Shift Right Logical by Immediate + else if (instr.instr[31:26] == 6'b010_000) + instruction_o.op = ariane_pkg::SRA; // Shift Right Arithmetically by Immediate + else illegal_instr_non_bm = 1'b1; + if (instr.instr[25] != 1'b0 && riscv::XLEN == 32) illegal_instr_non_bm = 1'b1; + end + endcase + if (CVA6Cfg.RVB) begin + unique case (instr.itype.funct3) + 3'b001: begin + if (instr.instr[31:25] == 7'b0110000) begin + if (instr.instr[22:20] == 3'b100) instruction_o.op = ariane_pkg::SEXTB; + else if (instr.instr[22:20] == 3'b101) instruction_o.op = ariane_pkg::SEXTH; + else if (instr.instr[22:20] == 3'b010) instruction_o.op = ariane_pkg::CPOP; + else if (instr.instr[22:20] == 3'b000) instruction_o.op = ariane_pkg::CLZ; + else if (instr.instr[22:20] == 3'b001) instruction_o.op = ariane_pkg::CTZ; + end else if (instr.instr[31:26] == 6'b010010) instruction_o.op = ariane_pkg::BCLRI; + else if (instr.instr[31:26] == 6'b011010) instruction_o.op = ariane_pkg::BINVI; + else if (instr.instr[31:26] == 6'b001010) instruction_o.op = ariane_pkg::BSETI; + else illegal_instr_bm = 1'b1; + end + 3'b101: begin + if (instr.instr[31:20] == 12'b001010000111) instruction_o.op = ariane_pkg::ORCB; + else if (riscv::IS_XLEN64 && instr.instr[31:20] == 12'b011010111000) + instruction_o.op = ariane_pkg::REV8; + else if (instr.instr[31:20] == 12'b011010011000) + instruction_o.op = ariane_pkg::REV8; + else if (instr.instr[31:26] == 6'b010_010) instruction_o.op = ariane_pkg::BEXTI; + else if (instr.instr[31:26] == 6'b011_000) instruction_o.op = ariane_pkg::RORI; + else illegal_instr_bm = 1'b1; + end + default: illegal_instr_bm = 1'b1; + endcase + illegal_instr = illegal_instr_non_bm & illegal_instr_bm; + end else begin + illegal_instr = illegal_instr_non_bm; + end + end + + // -------------------------------- + // 32 bit Reg-Immediate Operations + // -------------------------------- + riscv::OpcodeOpImm32: begin + instruction_o.fu = ALU; + imm_select = IIMM; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rd[4:0] = instr.itype.rd; + if (riscv::IS_XLEN64) begin + unique case (instr.itype.funct3) + 3'b000: instruction_o.op = ariane_pkg::ADDW; // Add Immediate + 3'b001: begin + instruction_o.op = ariane_pkg::SLLW; // Shift Left Logical by Immediate + if (instr.instr[31:25] != 7'b0) illegal_instr_non_bm = 1'b1; + end + 3'b101: begin + if (instr.instr[31:25] == 7'b0) + instruction_o.op = ariane_pkg::SRLW; // Shift Right Logical by Immediate + else if (instr.instr[31:25] == 7'b010_0000) + instruction_o.op = ariane_pkg::SRAW; // Shift Right Arithmetically by Immediate + else illegal_instr_non_bm = 1'b1; + end + default: illegal_instr_non_bm = 1'b1; + endcase + if (CVA6Cfg.RVB) begin + unique case (instr.itype.funct3) + 3'b001: begin + if (instr.instr[31:25] == 7'b0110000) begin + if (instr.instr[21:20] == 2'b10) instruction_o.op = ariane_pkg::CPOPW; + else if (instr.instr[21:20] == 2'b00) instruction_o.op = ariane_pkg::CLZW; + else if (instr.instr[21:20] == 2'b01) instruction_o.op = ariane_pkg::CTZW; + else illegal_instr_bm = 1'b1; + end else if (instr.instr[31:26] == 6'b000010) begin + instruction_o.op = ariane_pkg::SLLIUW; // Shift Left Logic by Immediate (Unsigned Word) + end else illegal_instr_bm = 1'b1; + end + 3'b101: begin + if (instr.instr[31:25] == 7'b011_0000) instruction_o.op = ariane_pkg::RORIW; + else illegal_instr_bm = 1'b1; + end + default: illegal_instr_bm = 1'b1; + endcase + illegal_instr = illegal_instr_non_bm & illegal_instr_bm; + end else begin + illegal_instr = illegal_instr_non_bm; + end + + end else illegal_instr = 1'b1; + end + // -------------------------------- + // LSU + // -------------------------------- + riscv::OpcodeStore: begin + instruction_o.fu = STORE; + imm_select = SIMM; + instruction_o.rs1[4:0] = instr.stype.rs1; + instruction_o.rs2[4:0] = instr.stype.rs2; + // determine store size + unique case (instr.stype.funct3) + 3'b000: instruction_o.op = ariane_pkg::SB; + 3'b001: instruction_o.op = ariane_pkg::SH; + 3'b010: instruction_o.op = ariane_pkg::SW; + 3'b011: + if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::SD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + + riscv::OpcodeLoad: begin + instruction_o.fu = LOAD; + imm_select = IIMM; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rd[4:0] = instr.itype.rd; + // determine load size and signed type + unique case (instr.itype.funct3) + 3'b000: instruction_o.op = ariane_pkg::LB; + 3'b001: instruction_o.op = ariane_pkg::LH; + 3'b010: instruction_o.op = ariane_pkg::LW; + 3'b100: instruction_o.op = ariane_pkg::LBU; + 3'b101: instruction_o.op = ariane_pkg::LHU; + 3'b110: + if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::LWU; + else illegal_instr = 1'b1; + 3'b011: + if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::LD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + + // -------------------------------- + // Floating-Point Load/store + // -------------------------------- + riscv::OpcodeStoreFp: begin + if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = STORE; + imm_select = SIMM; + instruction_o.rs1[4:0] = instr.stype.rs1; + instruction_o.rs2[4:0] = instr.stype.rs2; + // determine store size + unique case (instr.stype.funct3) + // Only process instruction if corresponding extension is active (static) + 3'b000: + if (CVA6Cfg.XF8) instruction_o.op = ariane_pkg::FSB; + else illegal_instr = 1'b1; + 3'b001: + if (CVA6Cfg.XF16 | CVA6Cfg.XF16ALT) instruction_o.op = ariane_pkg::FSH; + else illegal_instr = 1'b1; + 3'b010: + if (CVA6Cfg.RVF) instruction_o.op = ariane_pkg::FSW; + else illegal_instr = 1'b1; + 3'b011: + if (CVA6Cfg.RVD) instruction_o.op = ariane_pkg::FSD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else illegal_instr = 1'b1; + end + + riscv::OpcodeLoadFp: begin + if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = LOAD; + imm_select = IIMM; + instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.rd[4:0] = instr.itype.rd; + // determine load size + unique case (instr.itype.funct3) + // Only process instruction if corresponding extension is active (static) + 3'b000: + if (CVA6Cfg.XF8) instruction_o.op = ariane_pkg::FLB; + else illegal_instr = 1'b1; + 3'b001: + if (CVA6Cfg.XF16 | CVA6Cfg.XF16ALT) instruction_o.op = ariane_pkg::FLH; + else illegal_instr = 1'b1; + 3'b010: + if (CVA6Cfg.RVF) instruction_o.op = ariane_pkg::FLW; + else illegal_instr = 1'b1; + 3'b011: + if (CVA6Cfg.RVD) instruction_o.op = ariane_pkg::FLD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else illegal_instr = 1'b1; + end + + // ---------------------------------- + // Floating-Point Reg-Reg Operations + // ---------------------------------- + riscv::OpcodeMadd, riscv::OpcodeMsub, riscv::OpcodeNmsub, riscv::OpcodeNmadd: begin + if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = FPU; + instruction_o.rs1[4:0] = instr.r4type.rs1; + instruction_o.rs2[4:0] = instr.r4type.rs2; + instruction_o.rd[4:0] = instr.r4type.rd; + imm_select = RS3; // rs3 into result field + check_fprm = 1'b1; + // select the correct fused operation + unique case (instr.r4type.opcode) + default: instruction_o.op = ariane_pkg::FMADD; // fmadd.fmt - FP Fused multiply-add + riscv::OpcodeMsub: + instruction_o.op = ariane_pkg::FMSUB; // fmsub.fmt - FP Fused multiply-subtract + riscv::OpcodeNmsub: + instruction_o.op = ariane_pkg::FNMSUB; // fnmsub.fmt - FP Negated fused multiply-subtract + riscv::OpcodeNmadd: + instruction_o.op = ariane_pkg::FNMADD; // fnmadd.fmt - FP Negated fused multiply-add + endcase + + // determine fp format + unique case (instr.r4type.funct2) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~CVA6Cfg.RVF) illegal_instr = 1'b1; + 2'b01: if (~CVA6Cfg.RVD) illegal_instr = 1'b1; + 2'b10: if (~CVA6Cfg.XF16 & ~CVA6Cfg.XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~CVA6Cfg.XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check rounding mode + if (check_fprm) begin + unique case (instr.rftype.rm) inside + [3'b000 : 3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~CVA6Cfg.XF16ALT || instr.rftype.fmt != 2'b10) illegal_instr = 1'b1; + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + 3'b111: begin + // rounding mode from frm csr + unique case (frm_i) inside + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + default: illegal_instr = 1'b1; + endcase + end + end else begin + illegal_instr = 1'b1; + end + end + + riscv::OpcodeOpFp: begin + if (CVA6Cfg.FpPresent && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = FPU; + instruction_o.rs1[4:0] = instr.rftype.rs1; + instruction_o.rs2[4:0] = instr.rftype.rs2; + instruction_o.rd[4:0] = instr.rftype.rd; + check_fprm = 1'b1; + // decode FP instruction + unique case (instr.rftype.funct5) + 5'b00000: begin + instruction_o.op = ariane_pkg::FADD; // fadd.fmt - FP Addition + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2[4:0] = instr.rftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00001: begin + instruction_o.op = ariane_pkg::FSUB; // fsub.fmt - FP Subtraction + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2[4:0] = instr.rftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00010: instruction_o.op = ariane_pkg::FMUL; // fmul.fmt - FP Multiplication + 5'b00011: instruction_o.op = ariane_pkg::FDIV; // fdiv.fmt - FP Division + 5'b01011: begin + instruction_o.op = ariane_pkg::FSQRT; // fsqrt.fmt - FP Square Root + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + 5'b00100: begin + instruction_o.op = ariane_pkg::FSGNJ; // fsgn{j[n]/jx}.fmt - FP Sign Injection + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (CVA6Cfg.XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000 : 3'b010], [3'b100 : 3'b110]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000 : 3'b010]})) illegal_instr = 1'b1; + end + end + 5'b00101: begin + instruction_o.op = ariane_pkg::FMIN_MAX; // fmin/fmax.fmt - FP Minimum / Maximum + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (CVA6Cfg.XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000 : 3'b001], [3'b100 : 3'b101]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000 : 3'b001]})) illegal_instr = 1'b1; + end + end + 5'b01000: begin + instruction_o.op = ariane_pkg::FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion + instruction_o.rs2[4:0] = instr.rvftype.rs1; // tie rs2 to rs1 to be safe (vectors use rs2) + imm_select = IIMM; // rs2 holds part of the intruction + if (|instr.rftype.rs2[24:23]) + illegal_instr = 1'b1; // bits [22:20] used, other bits must be 0 + // check source format + unique case (instr.rftype.rs2[22:20]) + // Only process instruction if corresponding extension is active (static) + 3'b000: if (~CVA6Cfg.RVF) illegal_instr = 1'b1; + 3'b001: if (~CVA6Cfg.RVD) illegal_instr = 1'b1; + 3'b010: if (~CVA6Cfg.XF16) illegal_instr = 1'b1; + 3'b110: if (~CVA6Cfg.XF16ALT) illegal_instr = 1'b1; + 3'b011: if (~CVA6Cfg.XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + 5'b10100: begin + instruction_o.op = ariane_pkg::FCMP; // feq/flt/fle.fmt - FP Comparisons + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (CVA6Cfg.XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000 : 3'b010], [3'b100 : 3'b110]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000 : 3'b010]})) illegal_instr = 1'b1; + end + end + 5'b11000: begin + instruction_o.op = ariane_pkg::FCVT_F2I; // fcvt.ifmt.fmt - FP to Int Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (|instr.rftype.rs2[24:22]) + illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end + 5'b11010: begin + instruction_o.op = ariane_pkg::FCVT_I2F; // fcvt.fmt.ifmt - Int to FP Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (|instr.rftype.rs2[24:22]) + illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end + 5'b11100: begin + instruction_o.rs2[4:0] = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm == 3'b000 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b100)) // FP16ALT has separate encoding + instruction_o.op = ariane_pkg::FMV_F2X; // fmv.ifmt.fmt - FPR to GPR Move + else if (instr.rftype.rm == 3'b001 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b101)) // FP16ALT has separate encoding + instruction_o.op = ariane_pkg::FCLASS; // fclass.fmt - FP Classify + else illegal_instr = 1'b1; + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + 5'b11110: begin + instruction_o.op = ariane_pkg::FMV_X2F; // fmv.fmt.ifmt - GPR to FPR Move + instruction_o.rs2[4:0] = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (!(instr.rftype.rm == 3'b000 || (CVA6Cfg.XF16ALT && instr.rftype.rm == 3'b100))) + illegal_instr = 1'b1; + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + default: illegal_instr = 1'b1; + endcase + + // check format + unique case (instr.rftype.fmt) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~CVA6Cfg.RVF) illegal_instr = 1'b1; + 2'b01: if (~CVA6Cfg.RVD) illegal_instr = 1'b1; + 2'b10: if (~CVA6Cfg.XF16 & ~CVA6Cfg.XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~CVA6Cfg.XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check rounding mode + if (check_fprm) begin + unique case (instr.rftype.rm) inside + [3'b000 : 3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~CVA6Cfg.XF16ALT || instr.rftype.fmt != 2'b10) illegal_instr = 1'b1; + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + 3'b111: begin + // rounding mode from frm csr + unique case (frm_i) inside + [3'b000 : 3'b100]: ; //legal rounding modes + default: illegal_instr = 1'b1; + endcase + end + default: illegal_instr = 1'b1; + endcase + end + end else begin + illegal_instr = 1'b1; + end + end + + // ---------------------------------- + // Atomic Operations + // ---------------------------------- + riscv::OpcodeAmo: begin + // we are going to use the load unit for AMOs + instruction_o.fu = STORE; + instruction_o.rs1[4:0] = instr.atype.rs1; + instruction_o.rs2[4:0] = instr.atype.rs2; + instruction_o.rd[4:0] = instr.atype.rd; + // TODO(zarubaf): Ordering + // words + if (CVA6Cfg.RVA && instr.stype.funct3 == 3'h2) begin + unique case (instr.instr[31:27]) + 5'h0: instruction_o.op = ariane_pkg::AMO_ADDW; + 5'h1: instruction_o.op = ariane_pkg::AMO_SWAPW; + 5'h2: begin + instruction_o.op = ariane_pkg::AMO_LRW; + if (instr.atype.rs2 != 0) illegal_instr = 1'b1; + end + 5'h3: instruction_o.op = ariane_pkg::AMO_SCW; + 5'h4: instruction_o.op = ariane_pkg::AMO_XORW; + 5'h8: instruction_o.op = ariane_pkg::AMO_ORW; + 5'hC: instruction_o.op = ariane_pkg::AMO_ANDW; + 5'h10: instruction_o.op = ariane_pkg::AMO_MINW; + 5'h14: instruction_o.op = ariane_pkg::AMO_MAXW; + 5'h18: instruction_o.op = ariane_pkg::AMO_MINWU; + 5'h1C: instruction_o.op = ariane_pkg::AMO_MAXWU; + default: illegal_instr = 1'b1; + endcase + // double words + end else if (riscv::IS_XLEN64 && CVA6Cfg.RVA && instr.stype.funct3 == 3'h3) begin + unique case (instr.instr[31:27]) + 5'h0: instruction_o.op = ariane_pkg::AMO_ADDD; + 5'h1: instruction_o.op = ariane_pkg::AMO_SWAPD; + 5'h2: begin + instruction_o.op = ariane_pkg::AMO_LRD; + if (instr.atype.rs2 != 0) illegal_instr = 1'b1; + end + 5'h3: instruction_o.op = ariane_pkg::AMO_SCD; + 5'h4: instruction_o.op = ariane_pkg::AMO_XORD; + 5'h8: instruction_o.op = ariane_pkg::AMO_ORD; + 5'hC: instruction_o.op = ariane_pkg::AMO_ANDD; + 5'h10: instruction_o.op = ariane_pkg::AMO_MIND; + 5'h14: instruction_o.op = ariane_pkg::AMO_MAXD; + 5'h18: instruction_o.op = ariane_pkg::AMO_MINDU; + 5'h1C: instruction_o.op = ariane_pkg::AMO_MAXDU; + default: illegal_instr = 1'b1; + endcase + end else begin + illegal_instr = 1'b1; + end + end + + // -------------------------------- + // Control Flow Instructions + // -------------------------------- + riscv::OpcodeBranch: begin + imm_select = SBIMM; + instruction_o.fu = CTRL_FLOW; + instruction_o.rs1[4:0] = instr.stype.rs1; + instruction_o.rs2[4:0] = instr.stype.rs2; + + is_control_flow_instr_o = 1'b1; + + case (instr.stype.funct3) + 3'b000: instruction_o.op = ariane_pkg::EQ; + 3'b001: instruction_o.op = ariane_pkg::NE; + 3'b100: instruction_o.op = ariane_pkg::LTS; + 3'b101: instruction_o.op = ariane_pkg::GES; + 3'b110: instruction_o.op = ariane_pkg::LTU; + 3'b111: instruction_o.op = ariane_pkg::GEU; + default: begin + is_control_flow_instr_o = 1'b0; + illegal_instr = 1'b1; + end + endcase + end + // Jump and link register + riscv::OpcodeJalr: begin + instruction_o.fu = CTRL_FLOW; + instruction_o.op = ariane_pkg::JALR; + instruction_o.rs1[4:0] = instr.itype.rs1; + imm_select = IIMM; + instruction_o.rd[4:0] = instr.itype.rd; + is_control_flow_instr_o = 1'b1; + // invalid jump and link register -> reserved for vector encoding + if (instr.itype.funct3 != 3'b0) illegal_instr = 1'b1; + end + // Jump and link + riscv::OpcodeJal: begin + instruction_o.fu = CTRL_FLOW; + imm_select = JIMM; + instruction_o.rd[4:0] = instr.utype.rd; + is_control_flow_instr_o = 1'b1; + end + + riscv::OpcodeAuipc: begin + instruction_o.fu = ALU; + imm_select = UIMM; + instruction_o.use_pc = 1'b1; + instruction_o.rd[4:0] = instr.utype.rd; + end + + riscv::OpcodeLui: begin + imm_select = UIMM; + instruction_o.fu = ALU; + instruction_o.rd[4:0] = instr.utype.rd; + end + + default: illegal_instr = 1'b1; + endcase + end + if (CVA6Cfg.CvxifEn) begin + if (is_illegal_i || illegal_instr) begin + instruction_o.fu = CVXIF; + instruction_o.rs1[4:0] = instr.r4type.rs1; + instruction_o.rs2[4:0] = instr.r4type.rs2; + instruction_o.rd[4:0] = instr.r4type.rd; + instruction_o.op = ariane_pkg::OFFLOAD; + imm_select = RS3; + end + end + + // Accelerator instructions. + // These can overwrite the previous decoding entirely. + if (CVA6Cfg.EnableAccelerator) begin // only generate decoder if accelerators are enabled (static) + if (is_accel) begin + instruction_o.fu = acc_instruction.fu; + instruction_o.vfp = acc_instruction.vfp; + instruction_o.rs1 = acc_instruction.rs1; + instruction_o.rs2 = acc_instruction.rs2; + instruction_o.rd = acc_instruction.rd; + instruction_o.op = acc_instruction.op; + illegal_instr = acc_illegal_instr; + is_control_flow_instr_o = acc_is_control_flow_instr; + end + end + end + + // -------------------------------- + // Sign extend immediate + // -------------------------------- + always_comb begin : sign_extend + imm_i_type = {{riscv::XLEN - 12{instruction_i[31]}}, instruction_i[31:20]}; + imm_s_type = {{riscv::XLEN - 12{instruction_i[31]}}, instruction_i[31:25], instruction_i[11:7]}; + imm_sb_type = { + {riscv::XLEN - 13{instruction_i[31]}}, + instruction_i[31], + instruction_i[7], + instruction_i[30:25], + instruction_i[11:8], + 1'b0 + }; + imm_u_type = { + {riscv::XLEN - 32{instruction_i[31]}}, instruction_i[31:12], 12'b0 + }; // JAL, AUIPC, sign extended to 64 bit + imm_uj_type = { + {riscv::XLEN - 20{instruction_i[31]}}, + instruction_i[19:12], + instruction_i[20], + instruction_i[30:21], + 1'b0 + }; + imm_bi_type = {{riscv::XLEN - 5{instruction_i[24]}}, instruction_i[24:20]}; + + // NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM, RS3 + // select immediate + case (imm_select) + IIMM: begin + instruction_o.result = imm_i_type; + instruction_o.use_imm = 1'b1; + end + SIMM: begin + instruction_o.result = imm_s_type; + instruction_o.use_imm = 1'b1; + end + SBIMM: begin + instruction_o.result = imm_sb_type; + instruction_o.use_imm = 1'b1; + end + UIMM: begin + instruction_o.result = imm_u_type; + instruction_o.use_imm = 1'b1; + end + JIMM: begin + instruction_o.result = imm_uj_type; + instruction_o.use_imm = 1'b1; + end + RS3: begin + // result holds address of fp operand rs3 + instruction_o.result = {{riscv::XLEN - 5{1'b0}}, instr.r4type.rs3}; + instruction_o.use_imm = 1'b0; + end + default: begin + instruction_o.result = {riscv::XLEN{1'b0}}; + instruction_o.use_imm = 1'b0; + end + endcase + + if (CVA6Cfg.EnableAccelerator) begin + if (is_accel) begin + instruction_o.result = acc_instruction.result; + instruction_o.use_imm = acc_instruction.use_imm; + end + end + end + + // --------------------- + // Exception handling + // --------------------- + riscv::xlen_t interrupt_cause; + + // this instruction has already executed if the exception is valid + assign instruction_o.valid = instruction_o.ex.valid; + + always_comb begin : exception_handling + interrupt_cause = '0; + instruction_o.ex = ex_i; + // look if we didn't already get an exception in any previous + // stage - we should not overwrite it as we retain order regarding the exception + if (~ex_i.valid) begin + // if we didn't already get an exception save the instruction here as we may need it + // in the commit stage if we got a access exception to one of the CSR registers + instruction_o.ex.tval = (is_compressed_i) ? {{riscv::XLEN-16{1'b0}}, compressed_instr_i} : {{riscv::XLEN-32{1'b0}}, instruction_i}; + // instructions which will throw an exception are marked as valid + // e.g.: they can be committed anytime and do not need to wait for any functional unit + // check here if we decoded an invalid instruction or if the compressed decoder already decoded + // a invalid instruction + if (illegal_instr || is_illegal_i) begin + if (!CVA6Cfg.CvxifEn) instruction_o.ex.valid = 1'b1; + // we decoded an illegal exception here + instruction_o.ex.cause = riscv::ILLEGAL_INSTR; + // we got an ecall, set the correct cause depending on the current privilege level + end else if (ecall) begin + // this exception is valid + instruction_o.ex.valid = 1'b1; + // depending on the privilege mode, set the appropriate cause + if (priv_lvl_i == riscv::PRIV_LVL_S && CVA6Cfg.RVS) begin + instruction_o.ex.cause = riscv::ENV_CALL_SMODE; + end else if (priv_lvl_i == riscv::PRIV_LVL_U && CVA6Cfg.RVU) begin + instruction_o.ex.cause = riscv::ENV_CALL_UMODE; + end else if (priv_lvl_i == riscv::PRIV_LVL_M) begin + instruction_o.ex.cause = riscv::ENV_CALL_MMODE; + end + end else if (ebreak) begin + // this exception is valid + instruction_o.ex.valid = 1'b1; + // set breakpoint cause + instruction_o.ex.cause = riscv::BREAKPOINT; + end + // ----------------- + // Interrupt Control + // ----------------- + // we decode an interrupt the same as an exception, hence it will be taken if the instruction did not + // throw any previous exception. + // we have three interrupt sources: external interrupts, software interrupts, timer interrupts (order of precedence) + // for two privilege levels: Supervisor and Machine Mode + // Supervisor Timer Interrupt + if (irq_ctrl_i.mie[riscv::IRQ_S_TIMER] && irq_ctrl_i.mip[riscv::IRQ_S_TIMER]) begin + interrupt_cause = riscv::S_TIMER_INTERRUPT; + end + // Supervisor Software Interrupt + if (irq_ctrl_i.mie[riscv::IRQ_S_SOFT] && irq_ctrl_i.mip[riscv::IRQ_S_SOFT]) begin + interrupt_cause = riscv::S_SW_INTERRUPT; + end + // Supervisor External Interrupt + // The logical-OR of the software-writable bit and the signal from the external interrupt controller is + // used to generate external interrupts to the supervisor + if (irq_ctrl_i.mie[riscv::IRQ_S_EXT] && (irq_ctrl_i.mip[riscv::IRQ_S_EXT] | irq_i[ariane_pkg::SupervisorIrq])) begin + interrupt_cause = riscv::S_EXT_INTERRUPT; + end + // Machine Timer Interrupt + if (irq_ctrl_i.mip[riscv::IRQ_M_TIMER] && irq_ctrl_i.mie[riscv::IRQ_M_TIMER]) begin + interrupt_cause = riscv::M_TIMER_INTERRUPT; + end + // Machine Mode Software Interrupt + if (irq_ctrl_i.mip[riscv::IRQ_M_SOFT] && irq_ctrl_i.mie[riscv::IRQ_M_SOFT]) begin + interrupt_cause = riscv::M_SW_INTERRUPT; + end + // Machine Mode External Interrupt + if (irq_ctrl_i.mip[riscv::IRQ_M_EXT] && irq_ctrl_i.mie[riscv::IRQ_M_EXT]) begin + interrupt_cause = riscv::M_EXT_INTERRUPT; + end + + if (interrupt_cause[riscv::XLEN-1] && irq_ctrl_i.global_enable) begin + // However, if bit i in mideleg is set, interrupts are considered to be globally enabled if the hart’s current privilege + // mode equals the delegated privilege mode (S or U) and that mode’s interrupt enable bit + // (SIE or UIE in mstatus) is set, or if the current privilege mode is less than the delegated privilege mode. + if (irq_ctrl_i.mideleg[interrupt_cause[$clog2(riscv::XLEN)-1:0]]) begin + if ((CVA6Cfg.RVS && irq_ctrl_i.sie && priv_lvl_i == riscv::PRIV_LVL_S) || (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U)) begin + instruction_o.ex.valid = 1'b1; + instruction_o.ex.cause = interrupt_cause; + end + end else begin + instruction_o.ex.valid = 1'b1; + instruction_o.ex.cause = interrupt_cause; + end + end + end + + // a debug request has precendece over everything else + if (CVA6Cfg.DebugEn && debug_req_i && !debug_mode_i) begin + instruction_o.ex.valid = 1'b1; + instruction_o.ex.cause = riscv::DEBUG_REQUEST; + end + end +endmodule diff --git a/test/type_param/core/ex_stage.sv b/test/type_param/core/ex_stage.sv new file mode 100644 index 00000000..978e2a3e --- /dev/null +++ b/test/type_param/core/ex_stage.sv @@ -0,0 +1,413 @@ + +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.04.2017 +// Description: Instantiation of all functional units residing in the execute stage + + +module ex_stage + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic debug_mode_i, + + input logic [riscv::VLEN-1:0] rs1_forwarding_i, + input logic [riscv::VLEN-1:0] rs2_forwarding_i, + input fu_data_t fu_data_i, + input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction + input logic is_compressed_instr_i, // we need to know if this was a compressed instruction + // in order to calculate the next PC on a mis-predict + // Fixed latency unit(s) + output riscv::xlen_t flu_result_o, + output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back + output exception_t flu_exception_o, + output logic flu_ready_o, // FLU is ready + output logic flu_valid_o, // FLU result is valid + // Branches and Jumps + // ALU 1 + input logic alu_valid_i, // Output is valid + // Branch Unit + input logic branch_valid_i, // we are using the branch unit + input branchpredict_sbe_t branch_predict_i, + output bp_resolve_t resolved_branch_o, // the branch engine uses the write back from the ALU + output logic resolve_branch_o, // to ID signaling that we resolved the branch + // CSR + input logic csr_valid_i, + output logic [11:0] csr_addr_o, + input logic csr_commit_i, + // MULT + input logic mult_valid_i, // Output is valid + // LSU + output logic lsu_ready_o, // FU is ready + input logic lsu_valid_i, // Input is valid + + output logic load_valid_o, + output riscv::xlen_t load_result_o, + output logic [TRANS_ID_BITS-1:0] load_trans_id_o, + output exception_t load_exception_o, + output logic store_valid_o, + output riscv::xlen_t store_result_o, + output logic [TRANS_ID_BITS-1:0] store_trans_id_o, + output exception_t store_exception_o, + + input logic lsu_commit_i, + output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request + input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + input logic stall_st_pending_i, + output logic no_st_pending_o, + input logic amo_valid_commit_i, + // FPU + output logic fpu_ready_o, // FU is ready + input logic fpu_valid_i, // Output is valid + input logic [1:0] fpu_fmt_i, // FP format + input logic [2:0] fpu_rm_i, // FP rm + input logic [2:0] fpu_frm_i, // FP frm csr + input logic [6:0] fpu_prec_i, // FP precision control + output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output riscv::xlen_t fpu_result_o, + output logic fpu_valid_o, + output exception_t fpu_exception_o, + // CoreV-X-Interface + input logic x_valid_i, + output logic x_ready_o, + input logic [31:0] x_off_instr_i, + output logic [TRANS_ID_BITS-1:0] x_trans_id_o, + output exception_t x_exception_o, + output riscv::xlen_t x_result_o, + output logic x_valid_o, + output logic x_we_o, + output cvxif_pkg::cvxif_req_t cvxif_req_o, + input cvxif_pkg::cvxif_resp_t cvxif_resp_i, + input logic acc_valid_i, // Output is valid + // Memory Management + input logic enable_translation_i, + input logic en_ld_st_translation_i, + input logic flush_tlb_i, + + input riscv::priv_lvl_t priv_lvl_i, + input riscv::priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input logic mxr_i, + input logic [riscv::PPNW-1:0] satp_ppn_i, + input logic [ ASID_WIDTH-1:0] asid_i, + // icache translation requests + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + + // interface to dcache + input dcache_req_o_t [2:0] dcache_req_ports_i, + output dcache_req_i_t [2:0] dcache_req_ports_o, + input logic dcache_wbuffer_empty_i, + input logic dcache_wbuffer_not_ni_i, + output amo_req_t amo_req_o, // request to cache subsytem + input amo_resp_t amo_resp_i, // response from cache subsystem + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PMPs + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + + // RVFI + output lsu_ctrl_t rvfi_lsu_ctrl_o, + output [riscv::PLEN-1:0] rvfi_mem_paddr_o +); + + // ------------------------- + // Fixed Latency Units + // ------------------------- + // all fixed latency units share a single issue port and a sing write + // port into the scoreboard. At the moment those are: + // 1. ALU - all operations are single cycle + // 2. Branch unit: operation is single cycle, the ALU is needed + // for comparison + // 3. CSR: This is a small buffer which saves the address of the CSR. + // The value is then re-fetched once the instruction retires. The buffer + // is only a single entry deep, hence this operation will block all + // other operations once this buffer is full. This should not be a major + // concern though as CSRs are infrequent. + // 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle. + // The issue logic will take care of not issuing + // another instruction if it will collide on the + // output port. Divisions are arbitrary in length + // they will simply block the issue of all other + // instructions. + + + logic current_instruction_is_sfence_vma; + // These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA` + // instruction to be used for TLB flush in the next clock cycle. + logic [ASID_WIDTH-1:0] asid_to_be_flushed; + logic [riscv::VLEN-1:0] vaddr_to_be_flushed; + + // from ALU to branch unit + logic alu_branch_res; // branch comparison result + riscv::xlen_t alu_result, csr_result, mult_result; + logic [riscv::VLEN-1:0] branch_result; + logic csr_ready, mult_ready; + logic [TRANS_ID_BITS-1:0] mult_trans_id; + logic mult_valid; + + // 1. ALU (combinatorial) + // data silence operation + fu_data_t alu_data; + assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0; + + alu #( + .CVA6Cfg(CVA6Cfg) + ) alu_i ( + .clk_i, + .rst_ni, + .fu_data_i (alu_data), + .result_o (alu_result), + .alu_branch_res_o(alu_branch_res) + ); + + // 2. Branch Unit (combinatorial) + // we don't silence the branch unit as this is already critical and we do + // not want to add another layer of logic + branch_unit #( + .CVA6Cfg(CVA6Cfg) + ) branch_unit_i ( + .clk_i, + .rst_ni, + .debug_mode_i, + .fu_data_i, + .pc_i, + .is_compressed_instr_i, + // any functional unit is valid, check that there is no accidental mis-predict + .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) , + .branch_valid_i, + .branch_comp_res_i(alu_branch_res), + .branch_result_o(branch_result), + .branch_predict_i, + .resolved_branch_o, + .resolve_branch_o, + .branch_exception_o(flu_exception_o) + ); + + // 3. CSR (sequential) + csr_buffer #( + .CVA6Cfg(CVA6Cfg) + ) csr_buffer_i ( + .clk_i, + .rst_ni, + .flush_i, + .fu_data_i, + .csr_valid_i, + .csr_ready_o (csr_ready), + .csr_result_o(csr_result), + .csr_commit_i, + .csr_addr_o + ); + + assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid; + + // result MUX + always_comb begin + // Branch result as default case + flu_result_o = {{riscv::XLEN - riscv::VLEN{1'b0}}, branch_result}; + flu_trans_id_o = fu_data_i.trans_id; + // ALU result + if (alu_valid_i) begin + flu_result_o = alu_result; + // CSR result + end else if (csr_valid_i) begin + flu_result_o = csr_result; + end else if (mult_valid) begin + flu_result_o = mult_result; + flu_trans_id_o = mult_trans_id; + end + end + + // ready flags for FLU + always_comb begin + flu_ready_o = csr_ready & mult_ready; + end + + // 4. Multiplication (Sequential) + fu_data_t mult_data; + // input silencing of multiplier + assign mult_data = mult_valid_i ? fu_data_i : '0; + + mult #( + .CVA6Cfg(CVA6Cfg) + ) i_mult ( + .clk_i, + .rst_ni, + .flush_i, + .mult_valid_i, + .fu_data_i (mult_data), + .result_o (mult_result), + .mult_valid_o (mult_valid), + .mult_ready_o (mult_ready), + .mult_trans_id_o(mult_trans_id) + ); + + // ---------------- + // FPU + // ---------------- + generate + if (CVA6Cfg.FpPresent) begin : fpu_gen + fu_data_t fpu_data; + assign fpu_data = fpu_valid_i ? fu_data_i : '0; + + fpu_wrap #( + .CVA6Cfg(CVA6Cfg) + ) fpu_i ( + .clk_i, + .rst_ni, + .flush_i, + .fpu_valid_i, + .fpu_ready_o, + .fu_data_i(fpu_data), + .fpu_fmt_i, + .fpu_rm_i, + .fpu_frm_i, + .fpu_prec_i, + .fpu_trans_id_o, + .result_o (fpu_result_o), + .fpu_valid_o, + .fpu_exception_o + ); + end else begin : no_fpu_gen + assign fpu_ready_o = '0; + assign fpu_trans_id_o = '0; + assign fpu_result_o = '0; + assign fpu_valid_o = '0; + assign fpu_exception_o = '0; + end + endgenerate + + // ---------------- + // Load-Store Unit + // ---------------- + fu_data_t lsu_data; + + assign lsu_data = lsu_valid_i ? fu_data_i : '0; + + load_store_unit #( + .CVA6Cfg (CVA6Cfg), + .ASID_WIDTH(ASID_WIDTH) + ) lsu_i ( + .clk_i, + .rst_ni, + .flush_i, + .stall_st_pending_i, + .no_st_pending_o, + .fu_data_i (lsu_data), + .lsu_ready_o, + .lsu_valid_i, + .load_trans_id_o, + .load_result_o, + .load_valid_o, + .load_exception_o, + .store_trans_id_o, + .store_result_o, + .store_valid_o, + .store_exception_o, + .commit_i (lsu_commit_i), + .commit_ready_o (lsu_commit_ready_o), + .commit_tran_id_i, + .enable_translation_i, + .en_ld_st_translation_i, + .icache_areq_i, + .icache_areq_o, + .priv_lvl_i, + .ld_st_priv_lvl_i, + .sum_i, + .mxr_i, + .satp_ppn_i, + .asid_i, + .asid_to_be_flushed_i (asid_to_be_flushed), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed), + .flush_tlb_i, + .itlb_miss_o, + .dtlb_miss_o, + .dcache_req_ports_i, + .dcache_req_ports_o, + .dcache_wbuffer_empty_i, + .dcache_wbuffer_not_ni_i, + .amo_valid_commit_i, + .amo_req_o, + .amo_resp_i, + .pmpcfg_i, + .pmpaddr_i, + .rvfi_lsu_ctrl_o, + .rvfi_mem_paddr_o + ); + + if (CVA6Cfg.CvxifEn) begin : gen_cvxif + fu_data_t cvxif_data; + assign cvxif_data = x_valid_i ? fu_data_i : '0; + cvxif_fu #( + .CVA6Cfg(CVA6Cfg) + ) cvxif_fu_i ( + .clk_i, + .rst_ni, + .fu_data_i, + .priv_lvl_i(ld_st_priv_lvl_i), + .x_valid_i, + .x_ready_o, + .x_off_instr_i, + .x_trans_id_o, + .x_exception_o, + .x_result_o, + .x_valid_o, + .x_we_o, + .cvxif_req_o, + .cvxif_resp_i + ); + end else begin : gen_no_cvxif + assign cvxif_req_o = '0; + assign x_trans_id_o = '0; + assign x_exception_o = '0; + assign x_result_o = '0; + assign x_valid_o = '0; + end + + if (CVA6Cfg.RVS) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + current_instruction_is_sfence_vma <= 1'b0; + end else begin + if (flush_i) begin + current_instruction_is_sfence_vma <= 1'b0; + end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin + current_instruction_is_sfence_vma <= 1'b1; + end + end + end + + // This process stores the rs1 and rs2 parameters of a SFENCE_VMA instruction. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + asid_to_be_flushed <= '0; + vaddr_to_be_flushed <= '0; + // if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen + end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin + vaddr_to_be_flushed <= rs1_forwarding_i; + asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0]; + end + end + end else begin + assign current_instruction_is_sfence_vma = 1'b0; + assign asid_to_be_flushed = '0; + assign vaddr_to_be_flushed = '0; + end + +endmodule diff --git a/test/type_param/core/fpu_wrap.sv b/test/type_param/core/fpu_wrap.sv new file mode 100644 index 00000000..9219029d --- /dev/null +++ b/test/type_param/core/fpu_wrap.sv @@ -0,0 +1,568 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Stefan Mach, ETH Zurich +// Date: 12.04.2018 +// Description: Wrapper for the floating-point unit + + +module fpu_wrap + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic fpu_valid_i, + output logic fpu_ready_o, + input fu_data_t fu_data_i, + + input logic [ 1:0] fpu_fmt_i, + input logic [ 2:0] fpu_rm_i, + input logic [ 2:0] fpu_frm_i, + input logic [ 6:0] fpu_prec_i, + output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output logic [ CVA6Cfg.FLen-1:0] result_o, + output logic fpu_valid_o, + output exception_t fpu_exception_o +); + + // this is a workaround + // otherwise compilation might issue an error if FLEN=0 + enum logic { + READY, + STALL + } + state_q, state_d; + if (CVA6Cfg.FpPresent) begin : fpu_gen + logic [CVA6Cfg.FLen-1:0] operand_a_i; + logic [CVA6Cfg.FLen-1:0] operand_b_i; + logic [CVA6Cfg.FLen-1:0] operand_c_i; + assign operand_a_i = fu_data_i.operand_a[CVA6Cfg.FLen-1:0]; + assign operand_b_i = fu_data_i.operand_b[CVA6Cfg.FLen-1:0]; + assign operand_c_i = fu_data_i.imm[CVA6Cfg.FLen-1:0]; + + //----------------------------------- + // FPnew config from FPnew package + //----------------------------------- + localparam OPBITS = fpnew_pkg::OP_BITS; + localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS); + localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS); + + // Features (enabled formats, vectors etc.) + localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{ + Width: unsigned'(riscv::XLEN), // parameterized using XLEN + EnableVectors: CVA6Cfg.XFVec, + EnableNanBox: 1'b1, + FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT}, + IntFmtMask: { + CVA6Cfg.XFVec && CVA6Cfg.XF8, + CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT), + 1'b1, + 1'b1 + } + }; + + // Implementation (number of registers etc) + localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{ + PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt + '{ + unsigned'(LAT_COMP_FP32), + unsigned'(LAT_COMP_FP64), + unsigned'(LAT_COMP_FP16), + unsigned'(LAT_COMP_FP8), + unsigned'(LAT_COMP_FP16ALT) + }, // ADDMUL + '{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT + '{default: unsigned'(LAT_NONCOMP)}, // NONCOMP + '{default: unsigned'(LAT_CONV)} + }, // CONV + UnitTypes: '{ + '{default: fpnew_pkg::PARALLEL}, // ADDMUL + '{default: fpnew_pkg::MERGED}, // DIVSQRT + '{default: fpnew_pkg::PARALLEL}, // NONCOMP + '{default: fpnew_pkg::MERGED} + }, // CONV + PipeConfig: fpnew_pkg::DISTRIBUTED + }; + + //------------------------------------------------- + // Inputs to the FPU and protocol inversion buffer + //------------------------------------------------- + logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a; + logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b; + logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c; + logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op; + logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod; + logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt; + logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt; + logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt; + logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm; + logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op; + + logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag; + + logic fpu_in_ready, fpu_in_valid; + logic fpu_out_ready, fpu_out_valid; + + logic [4:0] fpu_status; + + // FSM to handle protocol inversion + logic hold_inputs; + logic use_hold; + + //----------------------------- + // Translate inputs + //----------------------------- + + always_comb begin : input_translation + + automatic logic vec_replication; // control honoring of replication flag + automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB) + automatic logic check_ah; // Decide for AH from RM field encoding + + // Default Values + operand_a_d = operand_a_i; + operand_b_d = operand_b_i; // immediates come through this port unless used as operand + operand_c_d = operand_c_i; // immediates come through this port unless used as operand + fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default + fpu_op_mod_d = 1'b0; + fpu_dstfmt_d = fpnew_pkg::FP32; + fpu_ifmt_d = fpnew_pkg::INT32; + fpu_rm_d = fpu_rm_i; + fpu_vec_op_d = fu_data_i.fu == FPU_VEC; + fpu_tag_d = fu_data_i.trans_id; + vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field + replicate_c = 1'b0; + check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i + + // Scalar Rounding Modes - some ops encode inside RM but use smaller range + if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i; + + // Vectorial ops always consult FRM + if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i; + + // Formats + unique case (fpu_fmt_i) + // FP32 + 2'b00: fpu_dstfmt_d = fpnew_pkg::FP32; + // FP64 or FP16ALT (vectorial) + 2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64; + // FP16 or FP16ALT (scalar) + 2'b10: begin + if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT; + else fpu_dstfmt_d = fpnew_pkg::FP16; + end + // FP8 + default: fpu_dstfmt_d = fpnew_pkg::FP8; + endcase + + // By default, set src=dst + fpu_srcfmt_d = fpu_dstfmt_d; + + // Operations (this can modify the rounding mode field and format!) + unique case (fu_data_i.operation) + // Addition + FADD: begin + fpu_op_d = fpnew_pkg::ADD; + replicate_c = 1'b1; // second operand is in C + end + // Subtraction is modified ADD + FSUB: begin + fpu_op_d = fpnew_pkg::ADD; + fpu_op_mod_d = 1'b1; + replicate_c = 1'b1; // second operand is in C + end + // Multiplication + FMUL: fpu_op_d = fpnew_pkg::MUL; + // Division + FDIV: fpu_op_d = fpnew_pkg::DIV; + // Min/Max - OP is encoded in rm (000-001) + FMIN_MAX: begin + fpu_op_d = fpnew_pkg::MINMAX; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Square Root + FSQRT: fpu_op_d = fpnew_pkg::SQRT; + // Fused Multiply Add + FMADD: fpu_op_d = fpnew_pkg::FMADD; + // Fused Multiply Subtract is modified FMADD + FMSUB: begin + fpu_op_d = fpnew_pkg::FMADD; + fpu_op_mod_d = 1'b1; + end + // Fused Negated Multiply Subtract + FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB; + // Fused Negated Multiply Add is modified FNMSUB + FNMADD: begin + fpu_op_d = fpnew_pkg::FNMSUB; + fpu_op_mod_d = 1'b1; + end + // Float to Int Cast - Op encoded in lowest two imm bits or rm + FCVT_F2I: begin + fpu_op_d = fpnew_pkg::F2I; + // Vectorial Ops encoded in R bit + if (fpu_vec_op_d) begin + fpu_op_mod_d = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00: fpu_ifmt_d = fpnew_pkg::INT32; + 2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16; + 2'b11: fpu_ifmt_d = fpnew_pkg::INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_d = operand_c_i[0]; + if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64; + else fpu_ifmt_d = fpnew_pkg::INT32; + end + end + // Int to Float Cast - Op encoded in lowest two imm bits or rm + FCVT_I2F: begin + fpu_op_d = fpnew_pkg::I2F; + // Vectorial Ops encoded in R bit + if (fpu_vec_op_d) begin + fpu_op_mod_d = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00: fpu_ifmt_d = fpnew_pkg::INT32; + 2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16; + 2'b11: fpu_ifmt_d = fpnew_pkg::INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_d = operand_c_i[0]; + if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64; + else fpu_ifmt_d = fpnew_pkg::INT32; + end + end + // Float to Float Cast - Source format encoded in lowest two/three imm bits + FCVT_F2F: begin + fpu_op_d = fpnew_pkg::F2F; + // Vectorial ops encoded in lowest two imm bits + if (fpu_vec_op_d) begin + vec_replication = 1'b0; // no replication for casts (not needed) + unique case (operand_c_i[1:0]) + 2'b00: fpu_srcfmt_d = fpnew_pkg::FP32; + 2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT; + 2'b10: fpu_srcfmt_d = fpnew_pkg::FP16; + 2'b11: fpu_srcfmt_d = fpnew_pkg::FP8; + endcase + // Scalar ops encoded in lowest three imm bits + end else begin + unique case (operand_c_i[2:0]) + 3'b000: fpu_srcfmt_d = fpnew_pkg::FP32; + 3'b001: fpu_srcfmt_d = fpnew_pkg::FP64; + 3'b010: fpu_srcfmt_d = fpnew_pkg::FP16; + 3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT; + 3'b011: fpu_srcfmt_d = fpnew_pkg::FP8; + default: ; // Do nothing + endcase + end + end + // Scalar Sign Injection - op encoded in rm (000-010) + FSGNJ: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding + FMV_F2X: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b011; // passthrough without checking nan-box + fpu_op_mod_d = 1'b1; // no NaN-Boxing + check_ah = 1'b1; // AH has RM MSB encoding + vec_replication = 1'b0; // no replication, we set second operand + end + // Move from GPR to FPR - mapped to NOP since no recoding + FMV_X2F: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b011; // passthrough without checking nan-box + check_ah = 1'b1; // AH has RM MSB encoding + vec_replication = 1'b0; // no replication, we set second operand + end + // Scalar Comparisons - op encoded in rm (000-010) + FCMP: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Classification + FCLASS: begin + fpu_op_d = fpnew_pkg::CLASSIFY; + fpu_rm_d = { + 1'b0, fpu_rm_i[1:0] + }; // mask out AH encoding bit - CLASS doesn't care anyways + check_ah = 1'b1; // AH has RM MSB encoding + end + // Vectorial Minimum - set up scalar encoding in rm + VFMIN: begin + fpu_op_d = fpnew_pkg::MINMAX; + fpu_rm_d = 3'b000; // min + end + // Vectorial Maximum - set up scalar encoding in rm + VFMAX: begin + fpu_op_d = fpnew_pkg::MINMAX; + fpu_rm_d = 3'b001; // max + end + // Vectorial Sign Injection - set up scalar encoding in rm + VFSGNJ: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b000; // sgnj + end + // Vectorial Negated Sign Injection - set up scalar encoding in rm + VFSGNJN: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b001; // sgnjn + end + // Vectorial Xored Sign Injection - set up scalar encoding in rm + VFSGNJX: begin + fpu_op_d = fpnew_pkg::SGNJ; + fpu_rm_d = 3'b010; // sgnjx + end + // Vectorial Equals - set up scalar encoding in rm + VFEQ: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_rm_d = 3'b010; // eq + end + // Vectorial Not Equals - set up scalar encoding in rm + VFNE: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b010; // eq + end + // Vectorial Less Than - set up scalar encoding in rm + VFLT: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_rm_d = 3'b001; // lt + end + // Vectorial Greater or Equal - set up scalar encoding in rm + VFGE: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b001; // lt + end + // Vectorial Less or Equal - set up scalar encoding in rm + VFLE: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_rm_d = 3'b000; // le + end + // Vectorial Greater Than - set up scalar encoding in rm + VFGT: begin + fpu_op_d = fpnew_pkg::CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b000; // le + end + // Vectorial Convert-and-Pack from FP32, lower 4 entries + VFCPKAB_S: begin + fpu_op_d = fpnew_pkg::CPKAB; + fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32 + end + // Vectorial Convert-and-Pack from FP32, upper 4 entries + VFCPKCD_S: begin + fpu_op_d = fpnew_pkg::CPKCD; + fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32 + end + // Vectorial Convert-and-Pack from FP64, lower 4 entries + VFCPKAB_D: begin + fpu_op_d = fpnew_pkg::CPKAB; + fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64 + end + // Vectorial Convert-and-Pack from FP64, upper 4 entries + VFCPKCD_D: begin + fpu_op_d = fpnew_pkg::CPKCD; + fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64 + end + // No changes per default + default: ; //nothing + endcase + + // Scalar AH encoding fixing + if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT; + + // Replication + if (fpu_vec_op_d && vec_replication) begin + if (replicate_c) begin + unique case (fpu_dstfmt_d) + fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i; + fpnew_pkg::FP16, fpnew_pkg::FP16ALT: + operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}}; + fpnew_pkg::FP8: + operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}}; + default: ; // Do nothing + endcase // fpu_dstfmt_d + end else begin + unique case (fpu_dstfmt_d) + fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i; + fpnew_pkg::FP16, fpnew_pkg::FP16ALT: + operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; + fpnew_pkg::FP8: + operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; + default: ; // Do nothing + endcase // fpu_dstfmt_d + end + end + end + + + //--------------------------------------------------------- + // Upstream protocol inversion: InValid depends on InReady + //--------------------------------------------------------- + + always_comb begin : p_inputFSM + // Default Values + fpu_ready_o = 1'b0; + fpu_in_valid = 1'b0; + hold_inputs = 1'b0; // hold register disabled + use_hold = 1'b0; // inputs go directly to unit + state_d = state_q; // stay in the same state + + // FSM + unique case (state_q) + // Default state, ready for instructions + READY: begin + fpu_ready_o = 1'b1; // Act as if FPU ready + fpu_in_valid = fpu_valid_i; // Forward input valid to FPU + // There is a transaction but the FPU can't handle it + if (fpu_valid_i & ~fpu_in_ready) begin + fpu_ready_o = 1'b0; // No token given to Issue + hold_inputs = 1'b1; // save inputs to the holding register + state_d = STALL; // stall future incoming requests + end + end + // We're stalling the upstream (ready=0) + STALL: begin + fpu_in_valid = 1'b1; // we have data for the FPU + use_hold = 1'b1; // the data comes from the hold reg + // Wait until it's consumed + if (fpu_in_ready) begin + fpu_ready_o = 1'b1; // Give a token to issue + state_d = READY; // accept future requests + end + end + // Default: emit default values + default: ; + endcase + + // Flushing will override issue and go back to idle + if (flush_i) begin + state_d = READY; + end + + end + + // Buffer register and FSM state holding + always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg + if (~rst_ni) begin + state_q <= READY; + operand_a_q <= '0; + operand_b_q <= '0; + operand_c_q <= '0; + fpu_op_q <= '0; + fpu_op_mod_q <= '0; + fpu_srcfmt_q <= '0; + fpu_dstfmt_q <= '0; + fpu_ifmt_q <= '0; + fpu_rm_q <= '0; + fpu_vec_op_q <= '0; + fpu_tag_q <= '0; + end else begin + state_q <= state_d; + // Hold register is [TRIGGERED] by FSM + if (hold_inputs) begin + operand_a_q <= operand_a_d; + operand_b_q <= operand_b_d; + operand_c_q <= operand_c_d; + fpu_op_q <= fpu_op_d; + fpu_op_mod_q <= fpu_op_mod_d; + fpu_srcfmt_q <= fpu_srcfmt_d; + fpu_dstfmt_q <= fpu_dstfmt_d; + fpu_ifmt_q <= fpu_ifmt_d; + fpu_rm_q <= fpu_rm_d; + fpu_vec_op_q <= fpu_vec_op_d; + fpu_tag_q <= fpu_tag_d; + end + end + end + + // Select FPU input data: from register if valid data in register, else directly from input + assign operand_a = use_hold ? operand_a_q : operand_a_d; + assign operand_b = use_hold ? operand_b_q : operand_b_d; + assign operand_c = use_hold ? operand_c_q : operand_c_d; + assign fpu_op = use_hold ? fpu_op_q : fpu_op_d; + assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d; + assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d; + assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d; + assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d; + assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d; + assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d; + assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d; + + // Consolidate operands + logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands; + + assign fpu_operands[0] = operand_a; + assign fpu_operands[1] = operand_b; + assign fpu_operands[2] = operand_c; + + //--------------- + // FPU instance + //--------------- + + fpnew_top #( + .Features (FPU_FEATURES), + .Implementation(FPU_IMPLEMENTATION), + .TagType (logic [TRANS_ID_BITS-1:0]) + ) i_fpnew_bulk ( + .clk_i, + .rst_ni, + .operands_i (fpu_operands), + .rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)), + .op_i (fpnew_pkg::operation_e'(fpu_op)), + .op_mod_i (fpu_op_mod), + .src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)), + .dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)), + .int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)), + .vectorial_op_i(fpu_vec_op), + .tag_i (fpu_tag), + .simd_mask_i (1'b1), + .in_valid_i (fpu_in_valid), + .in_ready_o (fpu_in_ready), + .flush_i, + .result_o, + .status_o (fpu_status), + .tag_o (fpu_trans_id_o), + .out_valid_o (fpu_out_valid), + .out_ready_i (fpu_out_ready), + .busy_o ( /* unused */) + ); + + // Pack status flag into exception cause, tval ignored in wb, exception is always invalid + assign fpu_exception_o.cause = {59'h0, fpu_status}; + assign fpu_exception_o.valid = 1'b0; + + // Donwstream write port is dedicated to FPU and always ready + assign fpu_out_ready = 1'b1; + + // Downstream valid from unit + assign fpu_valid_o = fpu_out_valid; + + end +endmodule diff --git a/test/type_param/core/frontend/bht.sv b/test/type_param/core/frontend/bht.sv new file mode 100644 index 00000000..bcfb78c7 --- /dev/null +++ b/test/type_param/core/frontend/bht.sv @@ -0,0 +1,215 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright 2023 - Thales for additionnal conribution. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 +// FPGA optimization: Sebastien Jacq, Thales +// Date: 2023-01-30 + +// branch history table - 2 bit saturation counter + +module bht #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NR_ENTRIES = 1024 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic debug_mode_i, + input logic [ riscv::VLEN-1:0] vpc_i, + input ariane_pkg::bht_update_t bht_update_i, + // we potentially need INSTR_PER_FETCH predictions/cycle + output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o +); + // the last bit is always zero, we don't need it for indexing + localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2; + // re-shape the branch history table + localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH; + // number of bits needed to index the row + localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); + localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; + // number of bits we should use for prediction + localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; + // we are not interested in all bits of the address + unread i_unread (.d_i(|vpc_i)); + + struct packed { + logic valid; + logic [1:0] saturation_counter; + } + bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], + bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0]; + + logic [$clog2(NR_ROWS)-1:0] index, update_pc; + logic [ROW_INDEX_BITS-1:0] update_row_index; + + assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET]; + assign update_pc = bht_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET]; + if (CVA6Cfg.RVC) begin : gen_update_row_index + assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET]; + end else begin + assign update_row_index = '0; + end + + if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET + + logic [1:0] saturation_counter; + // prediction assignment + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output + assign bht_prediction_o[i].valid = bht_q[index][i].valid; + assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1; + end + + always_comb begin : update_bht + bht_d = bht_q; + saturation_counter = bht_q[update_pc][update_row_index].saturation_counter; + + if ((bht_update_i.valid && CVA6Cfg.DebugEn && !debug_mode_i) || (bht_update_i.valid && !CVA6Cfg.DebugEn)) begin + bht_d[update_pc][update_row_index].valid = 1'b1; + + if (saturation_counter == 2'b11) begin + // we can safely decrease it + if (!bht_update_i.taken) + bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1; + // then check if it saturated in the negative regime e.g.: branch not taken + end else if (saturation_counter == 2'b00) begin + // we can safely increase it + if (bht_update_i.taken) + bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1; + end else begin // otherwise we are not in any boundaries and can decrease or increase it + if (bht_update_i.taken) + bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1; + else bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + for (int unsigned i = 0; i < NR_ROWS; i++) begin + for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + bht_q[i][j] <= '0; + end + end + end else begin + // evict all entries + if (flush_i) begin + for (int i = 0; i < NR_ROWS; i++) begin + for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + bht_q[i][j].valid <= 1'b0; + bht_q[i][j].saturation_counter <= 2'b10; + end + end + end else begin + bht_q <= bht_d; + end + end + end + + end else begin : gen_fpga_bht //FPGA TARGETS + + // number of bits par word in the bram + localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t); + logic [ ROW_INDEX_BITS-1:0] row_index; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1; + + ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht; + ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated; + + if (CVA6Cfg.RVC) begin : gen_row_index + assign row_index = vpc_i[ROW_ADDR_BITS+OFFSET-1:OFFSET]; + end else begin + assign row_index = '0; + end + + // ------------------------- + // prediction assignment & update Branch History Table + // ------------------------- + always_comb begin : prediction_update_bht + bht_ram_we = '0; + bht_ram_read_address_0 = '0; + bht_ram_read_address_1 = '0; + bht_ram_write_address = '0; + bht_ram_wdata = '0; + bht_updated = '0; + bht = '0; + + for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + if (row_index == i) begin + bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index; + bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2]; + bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1]; + end + end + + if (bht_update_i.valid && !debug_mode_i) begin + for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + if (update_row_index == i) begin + bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc; + bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS+:2]; + + if (bht[i].saturation_counter == 2'b11) begin + // we can safely decrease it + if (!bht_update_i.taken) + bht_updated[i].saturation_counter = bht[i].saturation_counter - 1; + else bht_updated[i].saturation_counter = 2'b11; + // then check if it saturated in the negative regime e.g.: branch not taken + end else if (bht[i].saturation_counter == 2'b00) begin + // we can safely increase it + if (bht_update_i.taken) + bht_updated[i].saturation_counter = bht[i].saturation_counter + 1; + else bht_updated[i].saturation_counter = 2'b00; + end else begin // otherwise we are not in any boundaries and can decrease or increase it + if (bht_update_i.taken) + bht_updated[i].saturation_counter = bht[i].saturation_counter + 1; + else bht_updated[i].saturation_counter = bht[i].saturation_counter - 1; + end + + bht_updated[i].valid = 1'b1; + bht_ram_we[i] = 1'b1; + bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc; + //bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] = 1'b1; //valid + bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = { + bht_updated[i].valid, bht_updated[i].saturation_counter + }; + + end + end + end + end + + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram + AsyncThreePortRam #( + .ADDR_WIDTH($clog2(NR_ROWS)), + .DATA_DEPTH(NR_ROWS), + .DATA_WIDTH(BRAM_WORD_BITS) + ) i_bht_ram ( + .Clk_CI (clk_i), + .WrEn_SI (bht_ram_we[i]), + .WrAddr_DI (bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .WrData_DI (bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]), + .RdAddr_DI_0(bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .RdAddr_DI_1(bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .RdData_DO_0(bht_ram_rdata_0[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]), + .RdData_DO_1(bht_ram_rdata_1[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]) + ); + end + + end +endmodule diff --git a/test/type_param/core/frontend/btb.sv b/test/type_param/core/frontend/btb.sv new file mode 100644 index 00000000..9500f373 --- /dev/null +++ b/test/type_param/core/frontend/btb.sv @@ -0,0 +1,185 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 +// +// Additional contributions by: +// Sebastien Jacq, Thales - sjthales on github.com +// Date: 2022-12-01 +// +// Description: This module is an adaptation of the BTB (Branch Target Buffer) +// module both FPGA and ASIC targets. +// Prediction target address is stored in BRAM on FPGA while for +// original module, target address is stored in D flip-flop. +// For FPGA flushing is not supported because the frontend module +// flushing signal is not connected. +// +// branch target buffer +module btb #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int NR_ENTRIES = 8 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the btb + input logic debug_mode_i, + + input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage + input ariane_pkg::btb_update_t btb_update_i, // update btb with this information + output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb +); + // the last bit is always zero, we don't need it for indexing + localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2; + // re-shape the branch history table + localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH; + // number of bits needed to index the row + localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); + localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; + // number of bits we should use for prediction + localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; + // prevent aliasing to degrade performance + localparam ANTIALIAS_BITS = 8; + // number of bits par word in the bram + localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t); + // we are not interested in all bits of the address + unread i_unread (.d_i(|vpc_i)); + + + logic [$clog2(NR_ROWS)-1:0] index, update_pc; + logic [ROW_INDEX_BITS-1:0] update_row_index; + + assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET]; + assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET]; + if (CVA6Cfg.RVC) begin : gen_update_row_index + assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET]; + end else begin + assign update_row_index = '0; + end + + if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction; + + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update; + logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update; + logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update; + + // output matching prediction + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output + assign btb_ram_csel_prediction[i] = 1'b1; + assign btb_ram_we_prediction[i] = 1'b0; + assign btb_ram_wdata_prediction = '0; + assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index; + assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]; + end + + // ------------------------- + // Update Branch Prediction + // ------------------------- + // update on a mis-predict + always_comb begin : update_branch_predict + btb_ram_csel_update = '0; + btb_ram_we_update = '0; + btb_ram_addr_update = '0; + btb_ram_wdata_update = '0; + + if (btb_update_i.valid && !debug_mode_i) begin + for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + if (update_row_index == i) begin + btb_ram_csel_update[i] = 1'b1; + btb_ram_we_update[i] = 1'b1; + btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc; + btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = { + 1'b1, btb_update_i.target_address + }; + end + end + end + end + + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram + SyncDpRam #( + .ADDR_WIDTH($clog2(NR_ROWS)), + .DATA_DEPTH(NR_ROWS), + .DATA_WIDTH(BRAM_WORD_BITS), + .OUT_REGS (0), + .SIM_INIT (1) + ) i_btb_ram ( + .Clk_CI (clk_i), + .Rst_RBI (rst_ni), + //---------------------------- + .CSelA_SI (btb_ram_csel_update[i]), + .WrEnA_SI (btb_ram_we_update[i]), + .AddrA_DI (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]), + .RdDataA_DO(), + //----------------------------- + .CSelB_SI (btb_ram_csel_prediction[i]), + .WrEnB_SI (btb_ram_we_prediction[i]), + .AddrB_DI (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]), + .WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]), + .RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]) + ); + end + + end else begin : gen_asic_btb // ASIC TARGET + + // typedef for all branch target entries + // we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects + ariane_pkg::btb_prediction_t + btb_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], + btb_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0]; + + // output matching prediction + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output + assign btb_prediction_o[i] = btb_q[index][i]; // workaround + end + + // ------------------------- + // Update Branch Prediction + // ------------------------- + // update on a mis-predict + always_comb begin : update_branch_predict + btb_d = btb_q; + + if (btb_update_i.valid && !debug_mode_i) begin + btb_d[update_pc][update_row_index].valid = 1'b1; + // the target address is simply updated + btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + // Bias the branches to be taken upon first arrival + for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0}; + end else begin + // evict all entries + if (flush_i) begin + for (int i = 0; i < NR_ROWS; i++) begin + for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + btb_q[i][j].valid <= 1'b0; + end + end + end else begin + btb_q <= btb_d; + end + end + end + end +endmodule diff --git a/test/type_param/core/frontend/frontend.sv b/test/type_param/core/frontend/frontend.sv new file mode 100644 index 00000000..8f2f50a0 --- /dev/null +++ b/test/type_param/core/frontend/frontend.sv @@ -0,0 +1,516 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Description: Ariane Instruction Fetch Frontend +// +// This module interfaces with the instruction cache, handles control +// change request from the back-end and does branch prediction. + +module frontend + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush request for PCGEN + input logic flush_bp_i, // flush branch prediction + input logic halt_i, // halt commit stage + input logic debug_mode_i, + // global input + input logic [riscv::VLEN-1:0] boot_addr_i, + // Set a new PC + // mispredict + input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB + // from commit, when flushing the whole pipeline + input logic set_pc_commit_i, // Take the PC from commit stage + input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage + // CSR input + input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to + input logic eret_i, // return from exception + input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector + input logic ex_valid_i, // exception is valid - from commit + input logic set_debug_pc_i, // jump to debug address + // Instruction Fetch + output icache_dreq_t icache_dreq_o, + input icache_drsp_t icache_dreq_i, + // instruction output port -> to processor back-end + output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage + output logic fetch_entry_valid_o, // instruction in IF is valid + input logic fetch_entry_ready_i // ID acknowledged this instruction +); + // Instruction Cache Registers, from I$ + logic [ FETCH_WIDTH-1:0] icache_data_q; + logic icache_valid_q; + ariane_pkg::frontend_exception_t icache_ex_valid_q; + logic [ riscv::VLEN-1:0] icache_vaddr_q; + logic instr_queue_ready; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed; + // upper-most branch-prediction from last cycle + btb_prediction_t btb_q; + bht_prediction_t bht_q; + // instruction fetch is ready + logic if_ready; + logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC + + // indicates whether we come out of reset (then we need to load boot_addr_i) + logic npc_rst_load_q; + + logic replay; + logic [ riscv::VLEN-1:0] replay_addr; + + // shift amount + logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt; + // address will always be 16 bit aligned, make this explicit here + if (CVA6Cfg.RVC) begin : gen_shamt + assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1]; + end else begin + assign shamt = 1'b0; + end + + // ----------------------- + // Ctrl Flow Speculation + // ----------------------- + // RVI ctrl flow prediction + logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump; + logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm; + // RVC branching + logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call; + logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm; + // re-aligned instruction and address (coming from cache - combinationally) + logic [INSTR_PER_FETCH-1:0][ 31:0] instr; + logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr; + logic [INSTR_PER_FETCH-1:0] instruction_valid; + // BHT, BTB and RAS prediction + bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction; + btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction; + bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted; + btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted; + ras_t ras_predict; + logic [ riscv::VLEN-1:0] vpc_btb; + + // branch-predict update + logic is_mispredict; + logic ras_push, ras_pop; + logic [ riscv::VLEN-1:0] ras_update; + + // Instruction FIFO + logic [ riscv::VLEN-1:0] predict_address; + cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf; + + logic serving_unaligned; + // Re-align instructions + instr_realign #( + .CVA6Cfg(CVA6Cfg) + ) i_instr_realign ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (icache_dreq_o.kill_s2), + .valid_i (icache_valid_q), + .serving_unaligned_o(serving_unaligned), + .address_i (icache_vaddr_q), + .data_i (icache_data_q), + .valid_o (instruction_valid), + .addr_o (addr), + .instr_o (instr) + ); + + // -------------------- + // Branch Prediction + // -------------------- + // select the right branch prediction result + // in case we are serving an unaligned instruction in instr[0] we need to take + // the prediction we saved from the previous fetch + if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted + assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2( + INSTR_PER_FETCH + ):1]]; + assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2( + INSTR_PER_FETCH + ):1]]; + + // for all other predictions we can use the generated address to index + // into the branch prediction data structures + for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address + assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]]; + assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]]; + end + end else begin + assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]]; + assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]]; + end + ; + + // for the return address stack it doens't matter as we have the + // address of the call/return already + logic bp_valid; + + logic [INSTR_PER_FETCH-1:0] is_branch; + logic [INSTR_PER_FETCH-1:0] is_call; + logic [INSTR_PER_FETCH-1:0] is_jump; + logic [INSTR_PER_FETCH-1:0] is_return; + logic [INSTR_PER_FETCH-1:0] is_jalr; + + for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin + // branch history table -> BHT + assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]); + // function calls -> RAS + assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]); + // function return -> RAS + assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]); + // unconditional jumps with known target -> immediately resolved + assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]); + // unconditional jumps with unknown target -> BTB + assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]); + end + + // taken/not taken + always_comb begin + taken_rvi_cf = '0; + taken_rvc_cf = '0; + predict_address = '0; + + for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF; + + ras_push = 1'b0; + ras_pop = 1'b0; + ras_update = '0; + + // lower most prediction gets precedence + for (int i = INSTR_PER_FETCH - 1; i >= 0; i--) begin + unique case ({ + is_branch[i], is_return[i], is_jump[i], is_jalr[i] + }) + 4'b0000: ; // regular instruction e.g.: no branch + // unconditional jump to register, we need the BTB to resolve this + 4'b0001: begin + ras_pop = 1'b0; + ras_push = 1'b0; + if (CVA6Cfg.BTBEntries && btb_prediction_shifted[i].valid) begin + predict_address = btb_prediction_shifted[i].target_address; + cf_type[i] = ariane_pkg::JumpR; + end + end + // its an unconditional jump to an immediate + 4'b0010: begin + ras_pop = 1'b0; + ras_push = 1'b0; + taken_rvi_cf[i] = rvi_jump[i]; + taken_rvc_cf[i] = rvc_jump[i]; + cf_type[i] = ariane_pkg::Jump; + end + // return + 4'b0100: begin + // make sure to only alter the RAS if we actually consumed the instruction + ras_pop = ras_predict.valid & instr_queue_consumed[i]; + ras_push = 1'b0; + predict_address = ras_predict.ra; + cf_type[i] = ariane_pkg::Return; + end + // branch prediction + 4'b1000: begin + ras_pop = 1'b0; + ras_push = 1'b0; + // if we have a valid dynamic prediction use it + if (bht_prediction_shifted[i].valid) begin + taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken; + taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken; + // otherwise default to static prediction + end else begin + // set if immediate is negative - static prediction + taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1]; + taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1]; + end + if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin + cf_type[i] = ariane_pkg::Branch; + end + end + default: ; + // default: $error("Decoded more than one control flow"); + endcase + // if this instruction, in addition, is a call, save the resulting address + // but only if we actually consumed the address + if (is_call[i]) begin + ras_push = instr_queue_consumed[i]; + ras_update = addr[i] + (rvc_call[i] ? 2 : 4); + end + // calculate the jump target address + if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin + predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]); + end + end + end + // or reduce struct + always_comb begin + bp_valid = 1'b0; + // BP cannot be valid if we have a return instruction and the RAS is not giving a valid address + // Check that we encountered a control flow and that for a return the RAS + // contains a valid prediction. + for (int i = 0; i < INSTR_PER_FETCH; i++) + bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid)); + end + assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict; + + // Cache interface + assign icache_dreq_o.req = instr_queue_ready; + assign if_ready = icache_dreq_i.ready & instr_queue_ready; + // We need to flush the cache pipeline if: + // 1. We mispredicted + // 2. Want to flush the whole processor front-end + // 3. Need to replay an instruction because the fetch-fifo was full + assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay; + // if we have a valid branch-prediction we need to only kill the last cache request + // also if we killed the first stage we also need to kill the second stage (inclusive flush) + assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid; + + // Update Control Flow Predictions + bht_update_t bht_update; + btb_update_t btb_update; + + // assert on branch, deassert when resolved + logic speculative_q, speculative_d; + assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i; + assign icache_dreq_o.spec = speculative_d; + + assign bht_update.valid = resolved_branch_i.valid + & (resolved_branch_i.cf_type == ariane_pkg::Branch); + assign bht_update.pc = resolved_branch_i.pc; + assign bht_update.taken = resolved_branch_i.is_taken; + // only update mispredicted branches e.g. no returns from the RAS + assign btb_update.valid = resolved_branch_i.valid + & resolved_branch_i.is_mispredict + & (resolved_branch_i.cf_type == ariane_pkg::JumpR); + assign btb_update.pc = resolved_branch_i.pc; + assign btb_update.target_address = resolved_branch_i.target_address; + + // ------------------- + // Next PC + // ------------------- + // next PC (NPC) can come from (in order of precedence): + // 0. Default assignment/replay instruction + // 1. Branch Predict taken + // 2. Control flow change request (misprediction) + // 3. Return from environment call + // 4. Exception/Interrupt + // 5. Pipeline Flush because of CSR side effects + // Mis-predict handling is a little bit different + // select PC a.k.a PC Gen + always_comb begin : npc_select + automatic logic [riscv::VLEN-1:0] fetch_address; + // check whether we come out of reset + // this is a workaround. some tools have issues + // having boot_addr_i in the asynchronous + // reset assignment to npc_q, even though + // boot_addr_i will be assigned a constant + // on the top-level. + if (npc_rst_load_q) begin + npc_d = boot_addr_i; + fetch_address = boot_addr_i; + end else begin + fetch_address = npc_q; + // keep stable by default + npc_d = npc_q; + end + // 0. Branch Prediction + if (bp_valid) begin + fetch_address = predict_address; + npc_d = predict_address; + end + // 1. Default assignment + if (if_ready) begin + npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4; + end + // 2. Replay instruction fetch + if (replay) begin + npc_d = replay_addr; + end + // 3. Control flow change request + if (is_mispredict) begin + npc_d = resolved_branch_i.target_address; + end + // 4. Return from environment call + if (eret_i) begin + npc_d = epc_i; + end + // 5. Exception/Interrupt + if (ex_valid_i) begin + npc_d = trap_vector_base_i; + end + // 6. Pipeline Flush because of CSR side effects + // On a pipeline flush start fetching from the next address + // of the instruction in the commit stage + // we either came here from a flush request of a CSR instruction or AMO, + // so as CSR or AMO instructions do not exist in a compressed form + // we can unconditionally do PC + 4 here + // or if the commit stage is halted, just take the current pc of the + // instruction in the commit stage + // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage + if (set_pc_commit_i) begin + npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN - 3{1'b0}}, 3'b100}); + end + // 7. Debug + // enter debug on a hard-coded base-address + if (CVA6Cfg.DebugEn && set_debug_pc_i) + npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0]; + icache_dreq_o.vaddr = fetch_address; + end + + logic [FETCH_WIDTH-1:0] icache_data; + // re-align the cache line + assign icache_data = icache_dreq_i.data >> {shamt, 4'b0}; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + npc_rst_load_q <= 1'b1; + npc_q <= '0; + speculative_q <= '0; + icache_data_q <= '0; + icache_valid_q <= 1'b0; + icache_vaddr_q <= 'b0; + icache_ex_valid_q <= ariane_pkg::FE_NONE; + btb_q <= '0; + bht_q <= '0; + end else begin + npc_rst_load_q <= 1'b0; + npc_q <= npc_d; + speculative_q <= speculative_d; + icache_valid_q <= icache_dreq_i.valid; + if (icache_dreq_i.valid) begin + icache_data_q <= icache_data; + icache_vaddr_q <= icache_dreq_i.vaddr; + // Map the only three exceptions which can occur in the frontend to a two bit enum + if (ariane_pkg::MMU_PRESENT && icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin + icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT; + end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin + icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT; + end else begin + icache_ex_valid_q <= ariane_pkg::FE_NONE; + end + // save the uppermost prediction + btb_q <= btb_prediction[INSTR_PER_FETCH-1]; + bht_q <= bht_prediction[INSTR_PER_FETCH-1]; + end + end + end + + if (CVA6Cfg.RASDepth == 0) begin + assign ras_predict = '0; + end else begin : ras_gen + ras #( + .CVA6Cfg(CVA6Cfg), + .DEPTH (CVA6Cfg.RASDepth) + ) i_ras ( + .clk_i, + .rst_ni, + .flush_i(flush_bp_i), + .push_i (ras_push), + .pop_i (ras_pop), + .data_i (ras_update), + .data_o (ras_predict) + ); + end + + //For FPGA, BTB is implemented in read synchronous BRAM + //while for ASIC, BTB is implemented in D flip-flop + //and can be read at the same cycle. + assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q; + + if (CVA6Cfg.BTBEntries == 0) begin + assign btb_prediction = '0; + end else begin : btb_gen + btb #( + .CVA6Cfg (CVA6Cfg), + .NR_ENTRIES(CVA6Cfg.BTBEntries) + ) i_btb ( + .clk_i, + .rst_ni, + .flush_i (flush_bp_i), + .debug_mode_i, + .vpc_i (vpc_btb), + .btb_update_i (btb_update), + .btb_prediction_o(btb_prediction) + ); + end + + if (CVA6Cfg.BHTEntries == 0) begin + assign bht_prediction = '0; + end else begin : bht_gen + bht #( + .CVA6Cfg (CVA6Cfg), + .NR_ENTRIES(CVA6Cfg.BHTEntries) + ) i_bht ( + .clk_i, + .rst_ni, + .flush_i (flush_bp_i), + .debug_mode_i, + .vpc_i (icache_vaddr_q), + .bht_update_i (bht_update), + .bht_prediction_o(bht_prediction) + ); + end + + // we need to inspect up to INSTR_PER_FETCH instructions for branches + // and jumps + for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan + instr_scan #( + .CVA6Cfg(CVA6Cfg) + ) i_instr_scan ( + .instr_i (instr[i]), + .rvi_return_o(rvi_return[i]), + .rvi_call_o (rvi_call[i]), + .rvi_branch_o(rvi_branch[i]), + .rvi_jalr_o (rvi_jalr[i]), + .rvi_jump_o (rvi_jump[i]), + .rvi_imm_o (rvi_imm[i]), + .rvc_branch_o(rvc_branch[i]), + .rvc_jump_o (rvc_jump[i]), + .rvc_jr_o (rvc_jr[i]), + .rvc_return_o(rvc_return[i]), + .rvc_jalr_o (rvc_jalr[i]), + .rvc_call_o (rvc_call[i]), + .rvc_imm_o (rvc_imm[i]) + ); + end + + instr_queue #( + .CVA6Cfg(CVA6Cfg) + ) i_instr_queue ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .instr_i (instr), // from re-aligner + .addr_i (addr), // from re-aligner + .exception_i (icache_ex_valid_q), // from I$ + .exception_addr_i (icache_vaddr_q), + .predict_address_i (predict_address), + .cf_type_i (cf_type), + .valid_i (instruction_valid), // from re-aligner + .consumed_o (instr_queue_consumed), + .ready_o (instr_queue_ready), + .replay_o (replay), + .replay_addr_o (replay_addr), + .fetch_entry_o (fetch_entry_o), // to back-end + .fetch_entry_valid_o(fetch_entry_valid_o), // to back-end + .fetch_entry_ready_i(fetch_entry_ready_i) // to back-end + ); + + // pragma translate_off +`ifndef VERILATOR + initial begin + assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) + else $fatal(1, "[frontend] fetch width != not supported"); + end +`endif + // pragma translate_on +endmodule diff --git a/test/type_param/core/frontend/instr_queue.sv b/test/type_param/core/frontend/instr_queue.sv new file mode 100644 index 00000000..3f955937 --- /dev/null +++ b/test/type_param/core/frontend/instr_queue.sv @@ -0,0 +1,459 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 26.10.2018sim:/ariane_tb/dut/i_ariane/i_frontend/icache_ex_valid_q + +// Description: Instruction Queue, separates instruction front-end from processor +// back-end. +// +// This is an optimized instruction queue which supports the handling of +// compressed instructions (16 bit instructions). Internally it is organized as +// FETCH_ENTRY x 32 bit queues which are filled in a consecutive manner. Two pointers +// point into (`idx_is_q` and `idx_ds_q`) the fill port and the read port. The read port +// is designed so that it will easily allow for multiple issue implementation. +// The input supports arbitrary power of two instruction fetch widths. +// +// The queue supports handling of branch prediction and will take care of +// only saving a valid instruction stream. +// +// Furthermore it contains a replay interface in case the instruction queue +// is already full. As instructions are in general easily replayed this should +// increase the efficiency as I$ misses are potentially hidden. This stands in +// contrast to pessimistic actions (early stalling) or credit based approaches. +// Credit based systems might be difficult to implement with the current system +// as we do not exactly know how much space we are going to need in the fifos +// as each instruction can take either one or two slots. +// +// So the consumed/valid interface degenerates to a `information` interface. If the +// upstream circuits keeps pushing the queue will discard the information +// and start replaying from the point were it could last manage to accept instructions. +// +// The instruction front-end will stop issuing instructions as soon as the +// fifo is full. This will gate the logic if the processor is e.g.: halted +// +// TODO(zarubaf): The instruction queues can be reduced to 16 bit. Potentially +// the replay mechanism gets more complicated as it can be that a 32 bit instruction +// can not be pushed at once. + +module instr_queue + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i, + input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i, + input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i, + output logic ready_o, + output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o, + // we've encountered an exception, at this point the only possible exceptions are page-table faults + input ariane_pkg::frontend_exception_t exception_i, + input logic [riscv::VLEN-1:0] exception_addr_i, + // branch predict + input logic [riscv::VLEN-1:0] predict_address_i, + input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i, + // replay instruction because one of the FIFO was already full + output logic replay_o, + output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction + // to processor backend + output ariane_pkg::fetch_entry_t fetch_entry_o, + output logic fetch_entry_valid_o, + input logic fetch_entry_ready_i +); + + typedef struct packed { + logic [31:0] instr; // instruction word + ariane_pkg::cf_t cf; // branch was taken + ariane_pkg::frontend_exception_t ex; // exception happened + logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception + } instr_data_t; + + logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index; + // instruction queues + logic [ariane_pkg::INSTR_PER_FETCH-1:0][$clog2( +ariane_pkg::FETCH_FIFO_DEPTH +)-1:0] instr_queue_usage; + instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full; + logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty; + logic instr_overflow; + // address queue + logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage; + logic [ riscv::VLEN-1:0] address_out; + logic pop_address; + logic push_address; + logic full_address; + logic empty_address; + logic address_overflow; + // input stream counter + logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q; + // Registers + // output FIFO select, one-hot + logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q; + logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC + logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush + + logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask; + logic branch_empty; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken; + // shift amount, e.g.: instructions we want to retire + logic [ariane_pkg::LOG2_INSTR_PER_FETCH:0] popcount; + logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] shamt; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid; + logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] consumed_extended; + // FIFO mask + logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] fifo_pos_extended; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_pos; + logic [ariane_pkg::INSTR_PER_FETCH*2-1:0][31:0] instr; + ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH*2-1:0] cf; + // replay interface + logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_overflow_fifo; + + assign ready_o = ~(|instr_queue_full) & ~full_address; + + if (ariane_pkg::RVC) begin : gen_multiple_instr_per_fetch_with_C + + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken + assign taken[i] = cf_type_i[i] != ariane_pkg::NoCF; + end + + // calculate a branch mask, e.g.: get the first taken branch + lzc #( + .WIDTH(ariane_pkg::INSTR_PER_FETCH), + .MODE (0) // count trailing zeros + ) i_lzc_branch_index ( + .in_i (taken), // we want to count trailing zeros + .cnt_o (branch_index), // first branch on branch_index + .empty_o(branch_empty) + ); + + + // the first index is for sure valid + // for example (64 bit fetch): + // taken mask: 0 1 1 0 + // leading zero count = 1 + // 0 0 0 1, 1 1 1 << 1 = 0 0 1 1, 1 1 0 + // take the upper 4 bits: 0 0 1 1 + assign branch_mask_extended = {{{ariane_pkg::INSTR_PER_FETCH-1}{1'b0}}, {{ariane_pkg::INSTR_PER_FETCH}{1'b1}}} << branch_index; + assign branch_mask = branch_mask_extended[ariane_pkg::INSTR_PER_FETCH * 2 - 2:ariane_pkg::INSTR_PER_FETCH - 1]; + + // mask with taken branches to get the actual amount of instructions we want to push + assign valid = valid_i & branch_mask; + // rotate right again + assign consumed_extended = {push_instr_fifo, push_instr_fifo} >> idx_is_q; + assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0]; + // count the numbers of valid instructions we've pushed from this package + popcount #( + .INPUT_WIDTH(ariane_pkg::INSTR_PER_FETCH) + ) i_popcount ( + .data_i (push_instr_fifo), + .popcount_o(popcount) + ); + assign shamt = popcount[$bits(shamt)-1:0]; + + // save the shift amount for next cycle + assign idx_is_d = idx_is_q + shamt; + + // ---------------------- + // Input interface + // ---------------------- + // rotate left by the current position + assign fifo_pos_extended = {valid, valid} << idx_is_q; + // we just care about the upper bits + assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH]; + // the fifo_position signal can directly be used to guide the push signal of each FIFO + // make sure it is not full + assign push_instr = fifo_pos & ~instr_queue_full; + + // duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0 + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input + assign instr[i] = instr_i[i]; + assign instr[i+ariane_pkg::INSTR_PER_FETCH] = instr_i[i]; + assign cf[i] = cf_type_i[i]; + assign cf[i+ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i]; + end + + // shift the inputs + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select + /* verilator lint_off WIDTH */ + assign instr_data_in[i].instr = instr[i+idx_is_q]; + assign instr_data_in[i].cf = cf[i+idx_is_q]; + assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet + assign instr_data_in[i].ex_vaddr = exception_addr_i; + /* verilator lint_on WIDTH */ + end + end else begin : gen_multiple_instr_per_fetch_without_C + + assign taken = '0; + assign branch_empty = '0; + assign branch_index = '0; + assign branch_mask_extended = '0; + assign branch_mask = '0; + assign consumed_extended = '0; + assign fifo_pos_extended = '0; + assign fifo_pos = '0; + assign instr = '0; + assign popcount = '0; + assign shamt = '0; + assign valid = '0; + + + assign consumed_o = push_instr_fifo[0]; + // ---------------------- + // Input interface + // ---------------------- + assign push_instr = valid_i & ~instr_queue_full; + + /* verilator lint_off WIDTH */ + assign instr_data_in[0].instr = instr_i[0]; + assign instr_data_in[0].cf = cf_type_i[0]; + assign instr_data_in[0].ex = exception_i; // exceptions hold for the whole fetch packet + assign instr_data_in[0].ex_vaddr = exception_addr_i; + /* verilator lint_on WIDTH */ + end + + // ---------------------- + // Replay Logic + // ---------------------- + // We need to replay a instruction fetch iff: + // 1. One of the instruction data FIFOs was full and we needed it + // (e.g.: we pushed and it was full) + // 2. The address/branch predict FIFO was full + // if one of the FIFOs was full we need to replay the faulting instruction + if (ariane_pkg::RVC == 1'b1) begin : gen_instr_overflow_fifo_with_C + assign instr_overflow_fifo = instr_queue_full & fifo_pos; + end else begin : gen_instr_overflow_fifo_without_C + assign instr_overflow_fifo = instr_queue_full & valid_i; + end + assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed + assign address_overflow = full_address & push_address; + assign replay_o = instr_overflow | address_overflow; + + if (ariane_pkg::RVC) begin : gen_replay_addr_o_with_c + // select the address, in the case of an address fifo overflow just + // use the base of this package + // if we successfully pushed some instructions we can output the next instruction + // which we didn't manage to push + assign replay_addr_o = (address_overflow) ? addr_i[0] : addr_i[shamt]; + end else begin : gen_replay_addr_o_without_C + assign replay_addr_o = addr_i[0]; + end + + // ---------------------- + // Downstream interface + // ---------------------- + // as long as there is at least one queue which can take the value we have a valid instruction + assign fetch_entry_valid_o = ~(&instr_queue_empty); + + if (ariane_pkg::RVC) begin : gen_downstream_itf_with_c + always_comb begin + idx_ds_d = idx_ds_q; + + pop_instr = '0; + // assemble fetch entry + fetch_entry_o.instruction = '0; + fetch_entry_o.address = pc_q; + fetch_entry_o.ex.valid = 1'b0; + fetch_entry_o.ex.cause = '0; + + fetch_entry_o.ex.tval = '0; + fetch_entry_o.branch_predict.predict_address = address_out; + fetch_entry_o.branch_predict.cf = ariane_pkg::NoCF; + // output mux select + for (int unsigned i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + if (idx_ds_q[i]) begin + if (instr_data_out[i].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin + fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT; + end else begin + fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT; + end + fetch_entry_o.instruction = instr_data_out[i].instr; + fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE; + fetch_entry_o.ex.tval = { + {(riscv::XLEN - riscv::VLEN) {1'b0}}, instr_data_out[i].ex_vaddr + }; + fetch_entry_o.branch_predict.cf = instr_data_out[i].cf; + pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i; + end + end + // rotate the pointer left + if (fetch_entry_ready_i) begin + idx_ds_d = { + idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1] + }; + end + end + end else begin : gen_downstream_itf_without_c + always_comb begin + idx_ds_d = '0; + idx_is_d = '0; + fetch_entry_o.instruction = instr_data_out[0].instr; + fetch_entry_o.address = pc_q; + + fetch_entry_o.ex.valid = instr_data_out[0].ex != ariane_pkg::FE_NONE; + if (instr_data_out[0].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin + fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT; + end else begin + fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT; + end + fetch_entry_o.ex.tval = {{64 - riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr}; + + fetch_entry_o.branch_predict.predict_address = address_out; + fetch_entry_o.branch_predict.cf = instr_data_out[0].cf; + + pop_instr[0] = fetch_entry_valid_o & fetch_entry_ready_i; + end + end + + // TODO(zarubaf): This needs to change for dual-issue + // if the handshaking is successful and we had a prediction pop one address entry + assign pop_address = ((fetch_entry_o.branch_predict.cf != ariane_pkg::NoCF) & |pop_instr); + + // ---------------------- + // Calculate (Next) PC + // ---------------------- + always_comb begin + pc_d = pc_q; + reset_address_d = flush_i ? 1'b1 : reset_address_q; + + if (fetch_entry_ready_i) begin + // TODO(zarubaf): This needs to change for a dual issue implementation + // advance the PC + if (ariane_pkg::RVC == 1'b1) begin : gen_pc_with_c_extension + pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4); + end else begin : gen_pc_without_c_extension + pc_d = pc_q + 'd4; + end + end + + if (pop_address) pc_d = address_out; + + // we previously flushed so we need to reset the address + if (valid_i[0] && reset_address_q) begin + // this is the base of the first instruction + pc_d = addr_i[0]; + reset_address_d = 1'b0; + end + end + + // FIFOs + for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_instr_fifo + // Make sure we don't save any instructions if we couldn't save the address + assign push_instr_fifo[i] = push_instr[i] & ~address_overflow; + fifo_v3 #( + .DEPTH(ariane_pkg::FETCH_FIFO_DEPTH), + .dtype(instr_data_t) + ) i_fifo_instr_data ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .testmode_i(1'b0), + .full_o (instr_queue_full[i]), + .empty_o (instr_queue_empty[i]), + .usage_o (instr_queue_usage[i]), + .data_i (instr_data_in[i]), + .push_i (push_instr_fifo[i]), + .data_o (instr_data_out[i]), + .pop_i (pop_instr[i]) + ); + end + // or reduce and check whether we are retiring a taken branch (might be that the corresponding) + // fifo is full. + always_comb begin + push_address = 1'b0; + // check if we are pushing a ctrl flow change, if so save the address + for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + push_address |= push_instr[i] & (instr_data_in[i].cf != ariane_pkg::NoCF); + end + end + + fifo_v3 #( + .DEPTH (ariane_pkg::FETCH_FIFO_DEPTH), // TODO(zarubaf): Fork out to separate param + .DATA_WIDTH(riscv::VLEN) + ) i_fifo_address ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .testmode_i(1'b0), + .full_o (full_address), + .empty_o (empty_address), + .usage_o (address_queue_usage), + .data_i (predict_address_i), + .push_i (push_address & ~full_address), + .data_o (address_out), + .pop_i (pop_address) + ); + + unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage})); + unread i_unread_branch_mask (.d_i(|branch_mask_extended)); + unread i_unread_lzc (.d_i(|{branch_empty})); + unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals + unread i_unread_instr_fifo (.d_i(|instr_queue_usage)); + + if (ariane_pkg::RVC) begin : gen_pc_q_with_c + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + idx_ds_q <= 'b1; + idx_is_q <= '0; + pc_q <= '0; + reset_address_q <= 1'b1; + end else begin + pc_q <= pc_d; + reset_address_q <= reset_address_d; + if (flush_i) begin + // one-hot encoded + idx_ds_q <= 'b1; + // binary encoded + idx_is_q <= '0; + reset_address_q <= 1'b1; + end else begin + idx_ds_q <= idx_ds_d; + idx_is_q <= idx_is_d; + end + end + end + end else begin : gen_pc_q_without_C + assign idx_ds_q = '0; + assign idx_is_q = '0; + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + pc_q <= '0; + reset_address_q <= 1'b1; + end else begin + pc_q <= pc_d; + reset_address_q <= reset_address_d; + if (flush_i) begin + reset_address_q <= 1'b1; + end + end + end + end + + // pragma translate_off +`ifndef VERILATOR + replay_address_fifo : + assert property (@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i) + else $fatal(1, "[instr_queue] Pushing address although replay asserted"); + + output_select_onehot : + assert property (@(posedge clk_i) $onehot0(idx_ds_q)) + else begin + $error("Output select should be one-hot encoded"); + $stop(); + end +`endif + // pragma translate_on +endmodule diff --git a/test/type_param/core/frontend/instr_scan.sv b/test/type_param/core/frontend/instr_scan.sv new file mode 100644 index 00000000..592d5d34 --- /dev/null +++ b/test/type_param/core/frontend/instr_scan.sv @@ -0,0 +1,83 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// ------------------------------ +// Instruction Scanner +// ------------------------------ +module instr_scan #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic [ 31:0] instr_i, // expect aligned instruction, compressed or not + output logic rvi_return_o, + output logic rvi_call_o, + output logic rvi_branch_o, + output logic rvi_jalr_o, + output logic rvi_jump_o, + output logic [riscv::VLEN-1:0] rvi_imm_o, + output logic rvc_branch_o, + output logic rvc_jump_o, + output logic rvc_jr_o, + output logic rvc_return_o, + output logic rvc_jalr_o, + output logic rvc_call_o, + output logic [riscv::VLEN-1:0] rvc_imm_o +); + logic is_rvc; + assign is_rvc = (instr_i[1:0] != 2'b11); + + logic rv32_rvc_jal; + assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)); + + logic is_xret; + assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011); + + // check that rs1 is either x1 or x5 and that rd is not rs1 + assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5) + & (instr_i[19:15] != instr_i[11:7]); + // Opocde is JAL[R] and destination register is either x1 or x5 + assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5); + // differentiates between JAL and BRANCH opcode, JALR comes from BHT + assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm( + instr_i + ) : ariane_pkg::sb_imm( + instr_i + ); + assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch); + assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr); + assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret; + + // opcode JAL + assign rvc_jump_o = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal; + + // always links to register 0 + logic is_jal_r; + assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd) + & (instr_i[6:2] == 5'b00000) + & (instr_i[1:0] == riscv::OpcodeC2) + & is_rvc; + assign rvc_jr_o = is_jal_r & ~instr_i[12]; + // always links to register 1 e.g.: it is a jump + assign rvc_jalr_o = is_jal_r & instr_i[12]; + assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal; + + assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez)) + & (instr_i[1:0] == riscv::OpcodeC1) + & is_rvc; + // check that rs1 is x1 or x5 + assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o; + + // differentiates between JAL and BRANCH opcode, JALR comes from BHT + assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0} + : {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0}; +endmodule diff --git a/test/type_param/core/frontend/ras.sv b/test/type_param/core/frontend/ras.sv new file mode 100644 index 00000000..f092b500 --- /dev/null +++ b/test/type_param/core/frontend/ras.sv @@ -0,0 +1,71 @@ +//Copyright (C) 2018 to present, +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// return address stack +module ras #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned DEPTH = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic push_i, + input logic pop_i, + input logic [riscv::VLEN-1:0] data_i, + output ariane_pkg::ras_t data_o +); + + ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q; + + assign data_o = stack_q[0]; + + always_comb begin + stack_d = stack_q; + + // push on the stack + if (push_i) begin + stack_d[0].ra = data_i; + // mark the new return address as valid + stack_d[0].valid = 1'b1; + stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0]; + end + + if (pop_i) begin + stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1]; + // we popped the value so invalidate the end of the stack + stack_d[DEPTH-1].valid = 1'b0; + stack_d[DEPTH-1].ra = 'b0; + end + // leave everything untouched and just push the latest value to the + // top of the stack + if (pop_i && push_i) begin + stack_d = stack_q; + stack_d[0].ra = data_i; + stack_d[0].valid = 1'b1; + end + + if (flush_i) begin + stack_d = '0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + stack_q <= '0; + end else begin + stack_q <= stack_d; + end + end +endmodule diff --git a/test/type_param/core/id_stage.sv b/test/type_param/core/id_stage.sv new file mode 100644 index 00000000..81d16402 --- /dev/null +++ b/test/type_param/core/id_stage.sv @@ -0,0 +1,143 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 15.04.2017 +// Description: Instruction decode, contains the logic for decode, +// issue and read operands. + +module id_stage #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + + input logic flush_i, + input logic debug_req_i, + // from IF + input ariane_pkg::fetch_entry_t fetch_entry_i, + input logic fetch_entry_valid_i, + output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry) + // to ID + output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction + output logic issue_entry_valid_o, // issue entry is valid + output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions + input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions + output logic rvfi_is_compressed_o, + // from CSR file + input riscv::priv_lvl_t priv_lvl_i, // current privilege level + input riscv::xs_t fs_i, // floating point extension status + input logic [2:0] frm_i, // floating-point dynamic rounding mode + input riscv::xs_t vs_i, // vector extension status + input logic [1:0] irq_i, + input ariane_pkg::irq_ctrl_t irq_ctrl_i, + input logic debug_mode_i, // we are in debug mode + input logic tvm_i, + input logic tw_i, + input logic tsr_i +); + // ID/ISSUE register stage + typedef struct packed { + logic valid; + ariane_pkg::scoreboard_entry_t sbe; + logic is_ctrl_flow; + } issue_struct_t; + issue_struct_t issue_n, issue_q; + + logic is_control_flow_instr; + ariane_pkg::scoreboard_entry_t decoded_instruction; + + logic is_illegal; + logic [31:0] instruction; + logic is_compressed; + + if (CVA6Cfg.RVC) begin + // --------------------------------------------------------- + // 1. Check if they are compressed and expand in case they are + // --------------------------------------------------------- + compressed_decoder #( + .CVA6Cfg(CVA6Cfg) + ) compressed_decoder_i ( + .instr_i (fetch_entry_i.instruction), + .instr_o (instruction), + .illegal_instr_o(is_illegal), + .is_compressed_o(is_compressed) + ); + end else begin + assign instruction = fetch_entry_i.instruction; + assign is_illegal = '0; + assign is_compressed = '0; + end + + assign rvfi_is_compressed_o = is_compressed; + // --------------------------------------------------------- + // 2. Decode and emit instruction to issue stage + // --------------------------------------------------------- + decoder #( + .CVA6Cfg(CVA6Cfg) + ) decoder_i ( + .debug_req_i, + .irq_ctrl_i, + .irq_i, + .pc_i (fetch_entry_i.address), + .is_compressed_i (is_compressed), + .is_illegal_i (is_illegal), + .instruction_i (instruction), + .compressed_instr_i (fetch_entry_i.instruction[15:0]), + .branch_predict_i (fetch_entry_i.branch_predict), + .ex_i (fetch_entry_i.ex), + .priv_lvl_i (priv_lvl_i), + .debug_mode_i (debug_mode_i), + .fs_i, + .frm_i, + .vs_i, + .tvm_i, + .tw_i, + .tsr_i, + .instruction_o (decoded_instruction), + .is_control_flow_instr_o(is_control_flow_instr) + ); + + // ------------------ + // Pipeline Register + // ------------------ + assign issue_entry_o = issue_q.sbe; + assign issue_entry_valid_o = issue_q.valid; + assign is_ctrl_flow_o = issue_q.is_ctrl_flow; + + always_comb begin + issue_n = issue_q; + fetch_entry_ready_o = 1'b0; + + // Clear the valid flag if issue has acknowledged the instruction + if (issue_instr_ack_i) issue_n.valid = 1'b0; + + // if we have a space in the register and the fetch is valid, go get it + // or the issue stage is currently acknowledging an instruction, which means that we will have space + // for a new instruction + if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin + fetch_entry_ready_o = 1'b1; + issue_n = '{1'b1, decoded_instruction, is_control_flow_instr}; + end + + // invalidate the pipeline register on a flush + if (flush_i) issue_n.valid = 1'b0; + end + // ------------------------- + // Registers (ID <-> Issue) + // ------------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + issue_q <= '0; + end else begin + issue_q <= issue_n; + end + end +endmodule diff --git a/test/type_param/core/include/acc_pkg.sv b/test/type_param/core/include/acc_pkg.sv new file mode 100644 index 00000000..bcd3c70a --- /dev/null +++ b/test/type_param/core/include/acc_pkg.sv @@ -0,0 +1,47 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: Matheus Cavalcante +// Nils Wistoff + +// Package defining the accelerator interface as used by Ara + CVA6 + +package acc_pkg; + + // ---------------------- + // Accelerator Interface + // ---------------------- + + typedef struct packed { + logic req_valid; + logic resp_ready; + riscv::instruction_t insn; + riscv::xlen_t rs1; + riscv::xlen_t rs2; + fpnew_pkg::roundmode_e frm; + logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id; + logic store_pending; + // Invalidation interface + logic acc_cons_en; + logic inval_ready; + } accelerator_req_t; + + typedef struct packed { + logic req_ready; + logic resp_valid; + riscv::xlen_t result; + logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id; + logic error; + // Metadata + logic store_pending; + logic store_complete; + logic load_complete; + logic [4:0] fflags; + logic fflags_valid; + // Invalidation interface + logic inval_valid; + logic [63:0] inval_addr; + } accelerator_resp_t; + +endpackage diff --git a/test/type_param/core/include/ariane_pkg.sv b/test/type_param/core/include/ariane_pkg.sv new file mode 100644 index 00000000..1616fafa --- /dev/null +++ b/test/type_param/core/include/ariane_pkg.sv @@ -0,0 +1,994 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: ariane_pkg.sv + * Author: Florian Zaruba + * Date: 8.4.2017 + * + * Description: Contains all the necessary defines for Ariane + * in one package. + */ + +// this is needed to propagate the +// configuration in case Ariane is +// instantiated in OpenPiton +`ifdef PITON_ARIANE +`include "l15.tmp.h" +`endif + +/// This package contains `functions` and global defines for CVA6. +/// *Note*: There are some parameters here as well which will eventually be +/// moved out to favour a fully parameterizable core. +package ariane_pkg; + + // TODO: Slowly move those parameters to the new system. + localparam NR_SB_ENTRIES = cva6_config_pkg::CVA6ConfigNrScoreboardEntries; // number of scoreboard entries + localparam TRANS_ID_BITS = $clog2( + NR_SB_ENTRIES + ); // depending on the number of scoreboard entries we need that many bits + // to uniquely identify the entry in the scoreboard + localparam ASID_WIDTH = (riscv::XLEN == 64) ? 16 : 1; + localparam BITS_SATURATION_COUNTER = 2; + + localparam ISSUE_WIDTH = 1; + + // depth of store-buffers, this needs to be a power of two + localparam logic [2:0] DEPTH_SPEC = 'd4; + + localparam int unsigned DCACHE_TYPE = int'(cva6_config_pkg::CVA6ConfigDcacheType); + // if DCACHE_TYPE = cva6_config_pkg::WT + // we can use a small commit queue since we have a write buffer in the dcache + // we could in principle do without the commit queue in this case, but the timing degrades if we do that due + // to longer paths into the commit stage + // if DCACHE_TYPE = cva6_config_pkg::WB + // allocate more space for the commit buffer to be on the save side, this needs to be a power of two + localparam logic [2:0] DEPTH_COMMIT = 'd4; + + localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn; // Is FPGA optimization of CV32A6 + + localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration + + // Transprecision float unit + localparam int unsigned LAT_COMP_FP32 = 'd2; + localparam int unsigned LAT_COMP_FP64 = 'd3; + localparam int unsigned LAT_COMP_FP16 = 'd1; + localparam int unsigned LAT_COMP_FP16ALT = 'd1; + localparam int unsigned LAT_COMP_FP8 = 'd1; + localparam int unsigned LAT_DIVSQRT = 'd2; + localparam int unsigned LAT_NONCOMP = 'd1; + localparam int unsigned LAT_CONV = 'd2; + + localparam riscv::xlen_t OPENHWGROUP_MVENDORID = {{riscv::XLEN - 32{1'b0}}, 32'h0602}; + localparam riscv::xlen_t ARIANE_MARCHID = {{riscv::XLEN - 32{1'b0}}, 32'd3}; + + // 32 registers + localparam REG_ADDR_SIZE = 5; + + // Read ports for general purpose register files + localparam NR_RGPR_PORTS = 2; + + // static debug hartinfo + // debug causes + localparam logic [2:0] CauseBreakpoint = 3'h1; + localparam logic [2:0] CauseTrigger = 3'h2; + localparam logic [2:0] CauseRequest = 3'h3; + localparam logic [2:0] CauseSingleStep = 3'h4; + // amount of data count registers implemented + localparam logic [3:0] DataCount = 4'h2; + + // address where data0-15 is shadowed or if shadowed in a CSR + // address of the first CSR used for shadowing the data + localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here + typedef struct packed { + logic [31:24] zero1; + logic [23:20] nscratch; + logic [19:17] zero0; + logic dataaccess; + logic [15:12] datasize; + logic [11:0] dataaddr; + } hartinfo_t; + + localparam hartinfo_t DebugHartInfo = '{ + zero1: '0, + nscratch: 2, // Debug module needs at least two scratch regs + zero0: '0, + dataaccess: 1'b1, // data registers are memory mapped in the debugger + datasize: DataCount, + dataaddr: DataAddr + }; + + // enables a commit log which matches spikes commit log format for easier trace comparison + localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1; + + // ------------- Dangerous ------------- + // if set to zero a flush will not invalidate the cache-lines, in a single core environment + // where coherence is not necessary this can improve performance. This needs to be switched on + // when more than one core is in a system + localparam logic INVALIDATE_ON_FLUSH = 1'b1; + +`ifdef SPIKE_TANDEM + // Spike still places 0 in TVAL for ENV_CALL_* exceptions. + // This may eventually go away when Spike starts to handle TVAL for *all* exceptions. + localparam bit ZERO_TVAL = 1'b1; +`else + localparam bit ZERO_TVAL = 1'b0; +`endif + // read mask for SSTATUS over MMSTATUS + localparam logic [63:0] SMODE_STATUS_READ_MASK = riscv::SSTATUS_UIE + | riscv::SSTATUS_SIE + | riscv::SSTATUS_SPIE + | riscv::SSTATUS_SPP + | riscv::SSTATUS_FS + | riscv::SSTATUS_XS + | riscv::SSTATUS_SUM + | riscv::SSTATUS_MXR + | riscv::SSTATUS_UPIE + | riscv::SSTATUS_SPIE + | riscv::SSTATUS_UXL + | riscv::SSTATUS_SD; + + localparam logic [63:0] SMODE_STATUS_WRITE_MASK = riscv::SSTATUS_SIE + | riscv::SSTATUS_SPIE + | riscv::SSTATUS_SPP + | riscv::SSTATUS_FS + | riscv::SSTATUS_SUM + | riscv::SSTATUS_MXR; + // --------------- + // AXI + // --------------- + + localparam FETCH_USER_WIDTH = cva6_config_pkg::CVA6ConfigFetchUserWidth; + localparam DATA_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth; + localparam AXI_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn | cva6_config_pkg::CVA6ConfigFetchUserEn; + localparam AXI_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth; + localparam DATA_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn; + localparam FETCH_USER_EN = cva6_config_pkg::CVA6ConfigFetchUserEn; + + typedef enum logic { + SINGLE_REQ, + CACHE_LINE_REQ + } ad_req_t; + + // --------------- + // Fetch Stage + // --------------- + + // leave as is (fails with >8 entries and wider fetch width) + localparam int unsigned FETCH_FIFO_DEPTH = 4; + localparam int unsigned FETCH_WIDTH = 32; + // maximum instructions we can fetch on one request (we support compressed instructions) + localparam int unsigned INSTR_PER_FETCH = RVC == 1'b1 ? (FETCH_WIDTH / 16) : 1; + localparam int unsigned LOG2_INSTR_PER_FETCH = RVC == 1'b1 ? $clog2(INSTR_PER_FETCH) : 1; + + // Only use struct when signals have same direction + // exception + typedef struct packed { + riscv::xlen_t cause; // cause of exception + riscv::xlen_t tval; // additional information of causing exception (e.g.: instruction causing it), + // address of LD/ST fault + logic valid; + } exception_t; + + typedef enum logic [2:0] { + NoCF, // No control flow prediction + Branch, // Branch + Jump, // Jump to address from immediate + JumpR, // Jump to address from registers + Return // Return Address Prediction + } cf_t; + + // branch-predict + // this is the struct we get back from ex stage and we will use it to update + // all the necessary data structures + // bp_resolve_t + typedef struct packed { + logic valid; // prediction with all its values is valid + logic [riscv::VLEN-1:0] pc; // PC of predict or mis-predict + logic [riscv::VLEN-1:0] target_address; // target address at which to jump, or not + logic is_mispredict; // set if this was a mis-predict + logic is_taken; // branch is taken + cf_t cf_type; // Type of control flow change + } bp_resolve_t; + + // branchpredict scoreboard entry + // this is the struct which we will inject into the pipeline to guide the various + // units towards the correct branch decision and resolve + typedef struct packed { + cf_t cf; // type of control flow prediction + logic [riscv::VLEN-1:0] predict_address; // target address at which to jump, or not + } branchpredict_sbe_t; + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] pc; // update at PC + logic [riscv::VLEN-1:0] target_address; + } btb_update_t; + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] target_address; + } btb_prediction_t; + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] ra; + } ras_t; + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] pc; // update at PC + logic taken; + } bht_update_t; + + typedef struct packed { + logic valid; + logic taken; + } bht_prediction_t; + + typedef struct packed { + logic valid; + logic [1:0] saturation_counter; + } bht_t; + + typedef enum logic [3:0] { + NONE, // 0 + LOAD, // 1 + STORE, // 2 + ALU, // 3 + CTRL_FLOW, // 4 + MULT, // 5 + CSR, // 6 + FPU, // 7 + FPU_VEC, // 8 + CVXIF, // 9 + ACCEL // 10 + } fu_t; + + localparam EXC_OFF_RST = 8'h80; + + localparam SupervisorIrq = 1; + localparam MachineIrq = 0; + + // All information needed to determine whether we need to associate an interrupt + // with the corresponding instruction or not. + typedef struct packed { + riscv::xlen_t mie; + riscv::xlen_t mip; + riscv::xlen_t mideleg; + logic sie; + logic global_enable; + } irq_ctrl_t; + + // --------------- + // Cache config + // --------------- + + // for usage in OpenPiton we have to propagate the openpiton L15 configuration from l15.h +`ifdef PITON_ARIANE + +`ifndef CONFIG_L1I_CACHELINE_WIDTH + `define CONFIG_L1I_CACHELINE_WIDTH 128 +`endif + +`ifndef CONFIG_L1I_ASSOCIATIVITY + `define CONFIG_L1I_ASSOCIATIVITY 4 +`endif + +`ifndef CONFIG_L1I_SIZE + `define CONFIG_L1I_SIZE 16*1024 +`endif + +`ifndef CONFIG_L1D_CACHELINE_WIDTH + `define CONFIG_L1D_CACHELINE_WIDTH 128 +`endif + +`ifndef CONFIG_L1D_ASSOCIATIVITY + `define CONFIG_L1D_ASSOCIATIVITY 8 +`endif + +`ifndef CONFIG_L1D_SIZE + `define CONFIG_L1D_SIZE 32*1024 +`endif + +`ifndef L15_THREADID_WIDTH + `define L15_THREADID_WIDTH 3 +`endif + + // I$ + localparam int unsigned ICACHE_LINE_WIDTH = `CONFIG_L1I_CACHELINE_WIDTH; + localparam int unsigned ICACHE_SET_ASSOC = `CONFIG_L1I_ASSOCIATIVITY; + localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(`CONFIG_L1I_SIZE / ICACHE_SET_ASSOC); + localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH; + localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128; // in bit + // D$ + localparam int unsigned DCACHE_LINE_WIDTH = `CONFIG_L1D_CACHELINE_WIDTH; + localparam int unsigned DCACHE_SET_ASSOC = `CONFIG_L1D_ASSOCIATIVITY; + localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(`CONFIG_L1D_SIZE / DCACHE_SET_ASSOC); + localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH; + localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : 128; // in bit + localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH; + + localparam int unsigned MEM_TID_WIDTH = `L15_THREADID_WIDTH; +`else + // I$ + localparam int unsigned CONFIG_L1I_SIZE = cva6_config_pkg::CVA6ConfigIcacheByteSize; // in byte + localparam int unsigned ICACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigIcacheSetAssoc; // number of ways + localparam int unsigned ICACHE_INDEX_WIDTH = $clog2( + CONFIG_L1I_SIZE / ICACHE_SET_ASSOC + ); // in bit, contains also offset width + localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH; // in bit + localparam int unsigned ICACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit + localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit + // D$ + localparam int unsigned CONFIG_L1D_SIZE = cva6_config_pkg::CVA6ConfigDcacheByteSize; // in byte + localparam int unsigned DCACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigDcacheSetAssoc; // number of ways + localparam int unsigned DCACHE_INDEX_WIDTH = $clog2( + CONFIG_L1D_SIZE / DCACHE_SET_ASSOC + ); // in bit, contains also offset width + localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH; // in bit + localparam int unsigned DCACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit + localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit + localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH; + + localparam int unsigned MEM_TID_WIDTH = cva6_config_pkg::CVA6ConfigMemTidWidth; +`endif + + localparam int unsigned DCACHE_TID_WIDTH = cva6_config_pkg::CVA6ConfigDcacheIdWidth; + + localparam int unsigned WT_DCACHE_WBUF_DEPTH = cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth; + + // --------------- + // EX Stage + // --------------- + + typedef enum logic [7:0] { // basic ALU op + ADD, + SUB, + ADDW, + SUBW, + // logic operations + XORL, + ORL, + ANDL, + // shifts + SRA, + SRL, + SLL, + SRLW, + SLLW, + SRAW, + // comparisons + LTS, + LTU, + GES, + GEU, + EQ, + NE, + // jumps + JALR, + BRANCH, + // set lower than operations + SLTS, + SLTU, + // CSR functions + MRET, + SRET, + DRET, + ECALL, + WFI, + FENCE, + FENCE_I, + SFENCE_VMA, + CSR_WRITE, + CSR_READ, + CSR_SET, + CSR_CLEAR, + // LSU functions + LD, + SD, + LW, + LWU, + SW, + LH, + LHU, + SH, + LB, + SB, + LBU, + // Atomic Memory Operations + AMO_LRW, + AMO_LRD, + AMO_SCW, + AMO_SCD, + AMO_SWAPW, + AMO_ADDW, + AMO_ANDW, + AMO_ORW, + AMO_XORW, + AMO_MAXW, + AMO_MAXWU, + AMO_MINW, + AMO_MINWU, + AMO_SWAPD, + AMO_ADDD, + AMO_ANDD, + AMO_ORD, + AMO_XORD, + AMO_MAXD, + AMO_MAXDU, + AMO_MIND, + AMO_MINDU, + // Multiplications + MUL, + MULH, + MULHU, + MULHSU, + MULW, + // Divisions + DIV, + DIVU, + DIVW, + DIVUW, + REM, + REMU, + REMW, + REMUW, + // Floating-Point Load and Store Instructions + FLD, + FLW, + FLH, + FLB, + FSD, + FSW, + FSH, + FSB, + // Floating-Point Computational Instructions + FADD, + FSUB, + FMUL, + FDIV, + FMIN_MAX, + FSQRT, + FMADD, + FMSUB, + FNMSUB, + FNMADD, + // Floating-Point Conversion and Move Instructions + FCVT_F2I, + FCVT_I2F, + FCVT_F2F, + FSGNJ, + FMV_F2X, + FMV_X2F, + // Floating-Point Compare Instructions + FCMP, + // Floating-Point Classify Instruction + FCLASS, + // Vectorial Floating-Point Instructions that don't directly map onto the scalar ones + VFMIN, + VFMAX, + VFSGNJ, + VFSGNJN, + VFSGNJX, + VFEQ, + VFNE, + VFLT, + VFGE, + VFLE, + VFGT, + VFCPKAB_S, + VFCPKCD_S, + VFCPKAB_D, + VFCPKCD_D, + // Offload Instructions to be directed into cv_x_if + OFFLOAD, + // Or-Combine and REV8 + ORCB, + REV8, + // Bitwise Rotation + ROL, + ROLW, + ROR, + RORI, + RORIW, + RORW, + // Sign and Zero Extend + SEXTB, + SEXTH, + ZEXTH, + // Count population + CPOP, + CPOPW, + // Count Leading/Training Zeros + CLZ, + CLZW, + CTZ, + CTZW, + // Carry less multiplication Op's + CLMUL, + CLMULH, + CLMULR, + // Single bit instructions Op's + BCLR, + BCLRI, + BEXT, + BEXTI, + BINV, + BINVI, + BSET, + BSETI, + // Integer minimum/maximum + MAX, + MAXU, + MIN, + MINU, + // Shift with Add Unsigned Word and Unsigned Word Op's (Bitmanip) + SH1ADDUW, + SH2ADDUW, + SH3ADDUW, + ADDUW, + SLLIUW, + // Shift with Add (Bitmanip) + SH1ADD, + SH2ADD, + SH3ADD, + // Bitmanip Logical with negate op (Bitmanip) + ANDN, + ORN, + XNOR, + // Accelerator operations + ACCEL_OP, + ACCEL_OP_FS1, + ACCEL_OP_FD, + ACCEL_OP_LOAD, + ACCEL_OP_STORE, + // Zicond instruction + CZERO_EQZ, + CZERO_NEZ + } fu_op; + + typedef struct packed { + fu_t fu; + fu_op operation; + riscv::xlen_t operand_a; + riscv::xlen_t operand_b; + riscv::xlen_t imm; + logic [TRANS_ID_BITS-1:0] trans_id; + } fu_data_t; + + function automatic logic op_is_branch(input fu_op op); + unique case (op) inside + EQ, NE, LTS, GES, LTU, GEU: return 1'b1; + default: return 1'b0; // all other ops + endcase + endfunction + + // ------------------------------- + // Extract Src/Dst FP Reg from Op + // ------------------------------- + // function used in instr_trace svh + // is_rs1_fpr function is kept to allow cva6 compilation with instr_trace feature + function automatic logic is_rs1_fpr(input fu_op op); + unique case (op) inside + [FMUL : FNMADD], // Computational Operations (except ADD/SUB) + FCVT_F2I, // Float-Int Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_F2X, // FPR-GPR Moves + FCMP, // Comparisons + FCLASS, // Classifications + [VFMIN : VFCPKCD_D], // Additional Vectorial FP ops + ACCEL_OP_FS1: + return 1'b1; // Accelerator instructions + default: return 1'b0; // all other ops + endcase + endfunction + + // function used in instr_trace svh + // is_rs2_fpr function is kept to allow cva6 compilation with instr_trace feature + function automatic logic is_rs2_fpr(input fu_op op); + unique case (op) inside + [FSD : FSB], // FP Stores + [FADD : FMIN_MAX], // Computational Operations (no sqrt) + [FMADD : FNMADD], // Fused Computational Operations + FCVT_F2F, // Vectorial F2F Conversions requrie target + [FSGNJ : FMV_F2X], // Sign Injections and moves mapped to SGNJ + FCMP, // Comparisons + [VFMIN : VFCPKCD_D]: + return 1'b1; // Additional Vectorial FP ops + default: return 1'b0; // all other ops + endcase + endfunction + + // function used in instr_trace svh + // is_imm_fpr function is kept to allow cva6 compilation with instr_trace feature + // ternary operations encode the rs3 address in the imm field, also add/sub + function automatic logic is_imm_fpr(input fu_op op); + unique case (op) inside + [FADD : FSUB], // ADD/SUB need inputs as Operand B/C + [FMADD : FNMADD], // Fused Computational Operations + [VFCPKAB_S : VFCPKCD_D]: + return 1'b1; // Vectorial FP cast and pack ops + default: return 1'b0; // all other ops + endcase + endfunction + + // function used in instr_trace svh + // is_rd_fpr function is kept to allow cva6 compilation with instr_trace feature + function automatic logic is_rd_fpr(input fu_op op); + unique case (op) inside + [FLD : FLB], // FP Loads + [FADD : FNMADD], // Computational Operations + FCVT_I2F, // Int-Float Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_X2F, // GPR-FPR Moves + [VFMIN : VFSGNJX], // Vectorial MIN/MAX and SGNJ + [VFCPKAB_S : VFCPKCD_D], // Vectorial FP cast and pack ops + ACCEL_OP_FD: + return 1'b1; // Accelerator instructions + default: return 1'b0; // all other ops + endcase + endfunction + + function automatic logic is_amo(fu_op op); + case (op) inside + [AMO_LRW : AMO_MINDU]: begin + return 1'b1; + end + default: return 1'b0; + endcase + endfunction + + typedef struct packed { + logic valid; + logic [riscv::VLEN-1:0] vaddr; + logic overflow; + riscv::xlen_t data; + logic [(riscv::XLEN/8)-1:0] be; + fu_t fu; + fu_op operation; + logic [TRANS_ID_BITS-1:0] trans_id; + } lsu_ctrl_t; + + // --------------- + // IF/ID Stage + // --------------- + // store the decompressed instruction + typedef struct packed { + logic [riscv::VLEN-1:0] address; // the address of the instructions from below + logic [31:0] instruction; // instruction word + branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path + exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions + } fetch_entry_t; + + // --------------- + // ID/EX/WB Stage + // --------------- + + localparam RVFI = cva6_config_pkg::CVA6ConfigRvfiTrace; + + typedef struct packed { + logic [riscv::VLEN-1:0] pc; // PC of instruction + logic [TRANS_ID_BITS-1:0] trans_id; // this can potentially be simplified, we could index the scoreboard entry + // with the transaction id in any case make the width more generic + fu_t fu; // functional unit to use + fu_op op; // operation to perform in each functional unit + logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1 + logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2 + logic [REG_ADDR_SIZE-1:0] rd; // register destination address + riscv::xlen_t result; // for unfinished instructions this field also holds the immediate, + // for unfinished floating-point that are partly encoded in rs2, this field also holds rs2 + // for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB) + // this field holds the address of the third operand from the floating-point register file + logic valid; // is the result valid + logic use_imm; // should we use the immediate as operand b? + logic use_zimm; // use zimm as operand a + logic use_pc; // set if we need to use the PC as operand a, PC from exception + exception_t ex; // exception has occurred + branchpredict_sbe_t bp; // branch predict scoreboard data structure + logic is_compressed; // signals a compressed instructions, we need this information at the commit stage if + // we want jump accordingly e.g.: +4, +2 + logic vfp; // is this a vector floating-point instruction? + } scoreboard_entry_t; + + // --------------- + // MMU instanciation + // --------------- + localparam bit MMU_PRESENT = cva6_config_pkg::CVA6ConfigMmuPresent; + + localparam int unsigned INSTR_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigInstrTlbEntries; + localparam int unsigned DATA_TLB_ENTRIES = cva6_config_pkg::CVA6ConfigDataTlbEntries; + + // ------------------- + // Performance counter + // ------------------- + localparam bit PERF_COUNTER_EN = cva6_config_pkg::CVA6ConfigPerfCounterEn; + localparam int unsigned MHPMCounterNum = 6; + + // -------------------- + // Atomics + // -------------------- + typedef enum logic [3:0] { + AMO_NONE = 4'b0000, + AMO_LR = 4'b0001, + AMO_SC = 4'b0010, + AMO_SWAP = 4'b0011, + AMO_ADD = 4'b0100, + AMO_AND = 4'b0101, + AMO_OR = 4'b0110, + AMO_XOR = 4'b0111, + AMO_MAX = 4'b1000, + AMO_MAXU = 4'b1001, + AMO_MIN = 4'b1010, + AMO_MINU = 4'b1011, + AMO_CAS1 = 4'b1100, // unused, not part of riscv spec, but provided in OpenPiton + AMO_CAS2 = 4'b1101 // unused, not part of riscv spec, but provided in OpenPiton + } amo_t; + + typedef struct packed { + logic valid; // valid flag + logic is_2M; // + logic is_1G; // + logic [27-1:0] vpn; // VPN (39bits) = 27bits + 12bits offset + logic [ASID_WIDTH-1:0] asid; + riscv::pte_t content; + } tlb_update_t; + + // Bits required for representation of physical address space as 4K pages + // (e.g. 27*4K == 39bit address space). + localparam PPN4K_WIDTH = 38; + + typedef struct packed { + logic valid; // valid flag + logic is_4M; // + logic [20-1:0] vpn; //VPN (32bits) = 20bits + 12bits offset + logic [9-1:0] asid; //ASID length = 9 for Sv32 mmu + riscv::pte_sv32_t content; + } tlb_update_sv32_t; + + typedef enum logic [1:0] { + FE_NONE, + FE_INSTR_ACCESS_FAULT, + FE_INSTR_PAGE_FAULT + } frontend_exception_t; + + // ---------------------- + // cache request ports + // ---------------------- + // I$ address translation requests + typedef struct packed { + logic fetch_valid; // address translation valid + logic [riscv::PLEN-1:0] fetch_paddr; // physical address in + exception_t fetch_exception; // exception occurred during fetch + } icache_areq_t; + + typedef struct packed { + logic fetch_req; // address translation request + logic [riscv::VLEN-1:0] fetch_vaddr; // virtual address out + } icache_arsp_t; + + // I$ data requests + typedef struct packed { + logic req; // we request a new word + logic kill_s1; // kill the current request + logic kill_s2; // kill the last request + logic spec; // request is speculative + logic [riscv::VLEN-1:0] vaddr; // 1st cycle: 12 bit index is taken for lookup + } icache_dreq_t; + + typedef struct packed { + logic ready; // icache is ready + logic valid; // signals a valid read + logic [FETCH_WIDTH-1:0] data; // 2+ cycle out: tag + logic [FETCH_USER_WIDTH-1:0] user; // User bits + logic [riscv::VLEN-1:0] vaddr; // virtual address out + exception_t ex; // we've encountered an exception + } icache_drsp_t; + + // AMO request going to cache. this request is unconditionally valid as soon + // as request goes high. + // Furthermore, those signals are kept stable until the response indicates + // completion by asserting ack. + typedef struct packed { + logic req; // this request is valid + amo_t amo_op; // atomic memory operation to perform + logic [1:0] size; // 2'b10 --> word operation, 2'b11 --> double word operation + logic [63:0] operand_a; // address + logic [63:0] operand_b; // data as layouted in the register + } amo_req_t; + + // AMO response coming from cache. + typedef struct packed { + logic ack; // response is valid + logic [63:0] result; // sign-extended, result + } amo_resp_t; + + // D$ data requests + typedef struct packed { + logic [DCACHE_INDEX_WIDTH-1:0] address_index; + logic [DCACHE_TAG_WIDTH-1:0] address_tag; + riscv::xlen_t data_wdata; + logic [DCACHE_USER_WIDTH-1:0] data_wuser; + logic data_req; + logic data_we; + logic [(riscv::XLEN/8)-1:0] data_be; + logic [1:0] data_size; + logic [DCACHE_TID_WIDTH-1:0] data_id; + logic kill_req; + logic tag_valid; + } dcache_req_i_t; + + typedef struct packed { + logic data_gnt; + logic data_rvalid; + logic [DCACHE_TID_WIDTH-1:0] data_rid; + riscv::xlen_t data_rdata; + logic [DCACHE_USER_WIDTH-1:0] data_ruser; + } dcache_req_o_t; + + // ---------------------- + // Arithmetic Functions + // ---------------------- + function automatic riscv::xlen_t sext32(logic [31:0] operand); + return {{riscv::XLEN - 32{operand[31]}}, operand[31:0]}; + endfunction + + // ---------------------- + // Immediate functions + // ---------------------- + function automatic logic [riscv::VLEN-1:0] uj_imm(logic [31:0] instruction_i); + return { + {44 + riscv::VLEN - 64{instruction_i[31]}}, + instruction_i[19:12], + instruction_i[20], + instruction_i[30:21], + 1'b0 + }; + endfunction + + function automatic logic [riscv::VLEN-1:0] i_imm(logic [31:0] instruction_i); + return {{52 + riscv::VLEN - 64{instruction_i[31]}}, instruction_i[31:20]}; + endfunction + + function automatic logic [riscv::VLEN-1:0] sb_imm(logic [31:0] instruction_i); + return { + {51 + riscv::VLEN - 64{instruction_i[31]}}, + instruction_i[31], + instruction_i[7], + instruction_i[30:25], + instruction_i[11:8], + 1'b0 + }; + endfunction + + // ---------------------- + // LSU Functions + // ---------------------- + // align data to address e.g.: shift data to be naturally 64 + function automatic riscv::xlen_t data_align(logic [2:0] addr, logic [63:0] data); + // Set addr[2] to 1'b0 when 32bits + logic [ 2:0] addr_tmp = {(addr[2] && riscv::IS_XLEN64), addr[1:0]}; + logic [63:0] data_tmp = {64{1'b0}}; + case (addr_tmp) + 3'b000: data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-1:0]}; + 3'b001: + data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-9:0], data[riscv::XLEN-1:riscv::XLEN-8]}; + 3'b010: + data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-17:0], data[riscv::XLEN-1:riscv::XLEN-16]}; + 3'b011: + data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-25:0], data[riscv::XLEN-1:riscv::XLEN-24]}; + 3'b100: data_tmp = {data[31:0], data[63:32]}; + 3'b101: data_tmp = {data[23:0], data[63:24]}; + 3'b110: data_tmp = {data[15:0], data[63:16]}; + 3'b111: data_tmp = {data[7:0], data[63:8]}; + endcase + return data_tmp[riscv::XLEN-1:0]; + endfunction + + // generate byte enable mask + function automatic logic [7:0] be_gen(logic [2:0] addr, logic [1:0] size); + case (size) + 2'b11: begin + return 8'b1111_1111; + end + 2'b10: begin + case (addr[2:0]) + 3'b000: return 8'b0000_1111; + 3'b001: return 8'b0001_1110; + 3'b010: return 8'b0011_1100; + 3'b011: return 8'b0111_1000; + 3'b100: return 8'b1111_0000; + default: ; // Do nothing + endcase + end + 2'b01: begin + case (addr[2:0]) + 3'b000: return 8'b0000_0011; + 3'b001: return 8'b0000_0110; + 3'b010: return 8'b0000_1100; + 3'b011: return 8'b0001_1000; + 3'b100: return 8'b0011_0000; + 3'b101: return 8'b0110_0000; + 3'b110: return 8'b1100_0000; + default: ; // Do nothing + endcase + end + 2'b00: begin + case (addr[2:0]) + 3'b000: return 8'b0000_0001; + 3'b001: return 8'b0000_0010; + 3'b010: return 8'b0000_0100; + 3'b011: return 8'b0000_1000; + 3'b100: return 8'b0001_0000; + 3'b101: return 8'b0010_0000; + 3'b110: return 8'b0100_0000; + 3'b111: return 8'b1000_0000; + endcase + end + endcase + return 8'b0; + endfunction + + function automatic logic [3:0] be_gen_32(logic [1:0] addr, logic [1:0] size); + case (size) + 2'b10: begin + return 4'b1111; + end + 2'b01: begin + case (addr[1:0]) + 2'b00: return 4'b0011; + 2'b01: return 4'b0110; + 2'b10: return 4'b1100; + default: ; // Do nothing + endcase + end + 2'b00: begin + case (addr[1:0]) + 2'b00: return 4'b0001; + 2'b01: return 4'b0010; + 2'b10: return 4'b0100; + 2'b11: return 4'b1000; + endcase + end + default: return 4'b0; + endcase + return 4'b0; + endfunction + + // ---------------------- + // Extract Bytes from Op + // ---------------------- + function automatic logic [1:0] extract_transfer_size(fu_op op); + case (op) + LD, SD, FLD, FSD, + AMO_LRD, AMO_SCD, + AMO_SWAPD, AMO_ADDD, + AMO_ANDD, AMO_ORD, + AMO_XORD, AMO_MAXD, + AMO_MAXDU, AMO_MIND, + AMO_MINDU: begin + return 2'b11; + end + LW, LWU, SW, FLW, FSW, + AMO_LRW, AMO_SCW, + AMO_SWAPW, AMO_ADDW, + AMO_ANDW, AMO_ORW, + AMO_XORW, AMO_MAXW, + AMO_MAXWU, AMO_MINW, + AMO_MINWU: begin + return 2'b10; + end + LH, LHU, SH, FLH, FSH: return 2'b01; + LB, LBU, SB, FLB, FSB: return 2'b00; + default: return 2'b11; + endcase + endfunction +endpackage diff --git a/test/type_param/core/include/config_pkg.sv b/test/type_param/core/include/config_pkg.sv new file mode 100644 index 00000000..90d6bfe1 --- /dev/null +++ b/test/type_param/core/include/config_pkg.sv @@ -0,0 +1,181 @@ +// Copyright 2023 Thales DIS France SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + +package config_pkg; + + // --------------- + // Global Config + // --------------- + localparam int unsigned ILEN = 32; + localparam int unsigned NRET = 1; + + /// The NoC type is a top-level parameter, hence we need a bit more + /// information on what protocol those type parameters are supporting. + /// Currently two values are supported" + typedef enum { + /// The "classic" AXI4 protocol. + NOC_TYPE_AXI4_ATOP, + /// In the OpenPiton setting the WT cache is connected to the L15. + NOC_TYPE_L15_BIG_ENDIAN, + NOC_TYPE_L15_LITTLE_ENDIAN + } noc_type_e; + + /// Cache type parameter + typedef enum logic [1:0] { + WB = 0, + WT = 1, + HPDCACHE = 2 + } cache_type_t; + + localparam NrMaxRules = 16; + + typedef struct packed { + /// Number of commit ports, i.e., maximum number of instructions that the + /// core can retire per cycle. It can be beneficial to have more commit + /// ports than issue ports, for the scoreboard to empty out in case one + /// instruction stalls a little longer. + int unsigned NrCommitPorts; + /// AXI parameters. + int unsigned AxiAddrWidth; + int unsigned AxiDataWidth; + int unsigned AxiIdWidth; + int unsigned AxiUserWidth; + int unsigned NrLoadBufEntries; + bit FpuEn; + bit XF16; + bit XF16ALT; + bit XF8; + bit RVA; + bit RVB; + bit RVV; + bit RVC; + bit RVZCB; + bit XFVec; + bit CvxifEn; + bit ZiCondExtEn; + // Calculated + bit RVF; + bit RVD; + bit FpPresent; + bit NSX; + int unsigned FLen; + bit RVFVec; + bit XF16Vec; + bit XF16ALTVec; + bit XF8Vec; + int unsigned NrRgprPorts; + int unsigned NrWbPorts; + bit EnableAccelerator; + bit RVS; //Supervisor mode + bit RVU; //User mode + // Debug Module + // address to which a hart should jump when it was requested to halt + logic [63:0] HaltAddress; + logic [63:0] ExceptionAddress; + /// Return address stack depth, good values are around 2 to 4. + int unsigned RASDepth; + /// Branch target buffer entries. + int unsigned BTBEntries; + /// Branch history (2-bit saturation counter) size, to keep track of + /// branch otucomes. + int unsigned BHTEntries; + /// Offset of the debug module. + logic [63:0] DmBaseAddress; + /// Number of PMP entries. + int unsigned NrPMPEntries; + /// Physical Memory Protection (PMP) CSR reset values and read-only bits + logic [15:0][63:0] PMPCfgRstVal; + logic [15:0][63:0] PMPAddrRstVal; + bit [15:0] PMPEntryReadOnly; + /// Set to the bus type in use. + noc_type_e NOCType; + /// Physical Memory Attributes (PMAs) + /// Number of non idempotent rules. + int unsigned NrNonIdempotentRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] NonIdempotentLength; + /// Number of regions which have execute property. + int unsigned NrExecuteRegionRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] ExecuteRegionLength; + /// Number of regions which have cached property. + int unsigned NrCachedRegionRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] CachedRegionLength; + /// Maximum number of outstanding stores. + int unsigned MaxOutstandingStores; + bit DebugEn; + bit NonIdemPotenceEn; + bit AxiBurstWriteEn; + } cva6_cfg_t; + + + /// Empty configuration to sanity check proper parameter passing. Whenever + /// you develop a module that resides within the core, assign this constant. + localparam cva6_cfg_t cva6_cfg_empty = '0; + + + /// Utility function being called to check parameters. Not all values make + /// sense for all parameters, here is the place to sanity check them. + function automatic void check_cfg(cva6_cfg_t Cfg); + // pragma translate_off +`ifndef VERILATOR + assert (Cfg.RASDepth > 0); + assert (2 ** $clog2(Cfg.BTBEntries) == Cfg.BTBEntries); + assert (2 ** $clog2(Cfg.BHTEntries) == Cfg.BHTEntries); + assert (Cfg.NrNonIdempotentRules <= NrMaxRules); + assert (Cfg.NrExecuteRegionRules <= NrMaxRules); + assert (Cfg.NrCachedRegionRules <= NrMaxRules); + assert (Cfg.NrPMPEntries <= 16); +`endif + // pragma translate_on + endfunction + + function automatic logic range_check(logic [63:0] base, logic [63:0] len, logic [63:0] address); + // if len is a power of two, and base is properly aligned, this check could be simplified + // Extend base by one bit to prevent an overflow. + return (address >= base) && (({1'b0, address}) < (65'(base) + len)); + endfunction : range_check + + + function automatic logic is_inside_nonidempotent_regions(cva6_cfg_t Cfg, logic [63:0] address); + logic [NrMaxRules-1:0] pass; + pass = '0; + for (int unsigned k = 0; k < Cfg.NrNonIdempotentRules; k++) begin + pass[k] = range_check(Cfg.NonIdempotentAddrBase[k], Cfg.NonIdempotentLength[k], address); + end + return |pass; + endfunction : is_inside_nonidempotent_regions + + function automatic logic is_inside_execute_regions(cva6_cfg_t Cfg, logic [63:0] address); + // if we don't specify any region we assume everything is accessible + logic [NrMaxRules-1:0] pass; + pass = '0; + for (int unsigned k = 0; k < Cfg.NrExecuteRegionRules; k++) begin + pass[k] = range_check(Cfg.ExecuteRegionAddrBase[k], Cfg.ExecuteRegionLength[k], address); + end + return |pass; + endfunction : is_inside_execute_regions + + function automatic logic is_inside_cacheable_regions(cva6_cfg_t Cfg, logic [63:0] address); + automatic logic [NrMaxRules-1:0] pass; + pass = '0; + for (int unsigned k = 0; k < Cfg.NrCachedRegionRules; k++) begin + pass[k] = range_check(Cfg.CachedRegionAddrBase[k], Cfg.CachedRegionLength[k], address); + end + return |pass; + endfunction : is_inside_cacheable_regions + +endpackage diff --git a/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv b/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv new file mode 100644 index 00000000..ec4db64f --- /dev/null +++ b/test/type_param/core/include/cv64a6_imafdc_sv39_config_pkg.sv @@ -0,0 +1,150 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + + +package cva6_config_pkg; + + localparam CVA6ConfigXlen = 64; + + localparam CVA6ConfigFpuEn = 1; + localparam CVA6ConfigF16En = 0; + localparam CVA6ConfigF16AltEn = 0; + localparam CVA6ConfigF8En = 0; + localparam CVA6ConfigFVecEn = 0; + + localparam CVA6ConfigCvxifEn = 1; + localparam CVA6ConfigCExtEn = 1; + localparam CVA6ConfigZcbExtEn = 1; + localparam CVA6ConfigAExtEn = 1; + localparam CVA6ConfigBExtEn = 1; + localparam CVA6ConfigVExtEn = 0; + localparam CVA6ConfigZiCondExtEn = 1; + + localparam CVA6ConfigAxiIdWidth = 4; + localparam CVA6ConfigAxiAddrWidth = 64; + localparam CVA6ConfigAxiDataWidth = 64; + localparam CVA6ConfigFetchUserEn = 0; + localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; + localparam CVA6ConfigDataUserEn = 0; + localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; + + localparam CVA6ConfigIcacheByteSize = 16384; + localparam CVA6ConfigIcacheSetAssoc = 4; + localparam CVA6ConfigIcacheLineWidth = 128; + localparam CVA6ConfigDcacheByteSize = 32768; + localparam CVA6ConfigDcacheSetAssoc = 8; + localparam CVA6ConfigDcacheLineWidth = 128; + + localparam CVA6ConfigDcacheIdWidth = 1; + localparam CVA6ConfigMemTidWidth = 2; + + localparam CVA6ConfigWtDcacheWbufDepth = 8; + + localparam CVA6ConfigNrCommitPorts = 2; + localparam CVA6ConfigNrScoreboardEntries = 8; + + localparam CVA6ConfigFPGAEn = 0; + + localparam CVA6ConfigNrLoadPipeRegs = 1; + localparam CVA6ConfigNrStorePipeRegs = 0; + localparam CVA6ConfigNrLoadBufEntries = 2; + + localparam CVA6ConfigInstrTlbEntries = 16; + localparam CVA6ConfigDataTlbEntries = 16; + + localparam CVA6ConfigRASDepth = 2; + localparam CVA6ConfigBTBEntries = 32; + localparam CVA6ConfigBHTEntries = 128; + + localparam CVA6ConfigNrPMPEntries = 8; + + localparam CVA6ConfigPerfCounterEn = 1; + + localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; + + localparam CVA6ConfigMmuPresent = 1; + + localparam CVA6ConfigRvfiTrace = 1; + + localparam config_pkg::cva6_cfg_t cva6_cfg = '{ + NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), + AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), + AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), + AxiIdWidth: unsigned'(CVA6ConfigAxiIdWidth), + AxiUserWidth: unsigned'(CVA6ConfigDataUserWidth), + NrLoadBufEntries: unsigned'(CVA6ConfigNrLoadBufEntries), + FpuEn: bit'(CVA6ConfigFpuEn), + XF16: bit'(CVA6ConfigF16En), + XF16ALT: bit'(CVA6ConfigF16AltEn), + XF8: bit'(CVA6ConfigF8En), + RVA: bit'(CVA6ConfigAExtEn), + RVB: bit'(CVA6ConfigBExtEn), + RVV: bit'(CVA6ConfigVExtEn), + RVC: bit'(CVA6ConfigCExtEn), + RVZCB: bit'(CVA6ConfigZcbExtEn), + XFVec: bit'(CVA6ConfigFVecEn), + CvxifEn: bit'(CVA6ConfigCvxifEn), + ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), + // Extended + RVF: + bit'( + 0 + ), + RVD: bit'(0), + FpPresent: bit'(0), + NSX: bit'(0), + FLen: unsigned'(0), + RVFVec: bit'(0), + XF16Vec: bit'(0), + XF16ALTVec: bit'(0), + XF8Vec: bit'(0), + NrRgprPorts: unsigned'(0), + NrWbPorts: unsigned'(0), + EnableAccelerator: bit'(0), + RVS: bit'(1), + RVU: bit'(1), + HaltAddress: 64'h800, + ExceptionAddress: 64'h808, + RASDepth: unsigned'(CVA6ConfigRASDepth), + BTBEntries: unsigned'(CVA6ConfigBTBEntries), + BHTEntries: unsigned'(CVA6ConfigBHTEntries), + DmBaseAddress: 64'h0, + NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), + PMPCfgRstVal: {16{64'h0}}, + PMPAddrRstVal: {16{64'h0}}, + PMPEntryReadOnly: 16'd0, + NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, + // idempotent region + NrNonIdempotentRules: + unsigned'( + 2 + ), + NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), + NonIdempotentLength: 1024'({64'b0, 64'b0}), + NrExecuteRegionRules: unsigned'(3), + // DRAM, Boot ROM, Debug Module + ExecuteRegionAddrBase: + 1024'( + {64'h8000_0000, 64'h1_0000, 64'h0} + ), + ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), + // cached region + NrCachedRegionRules: + unsigned'( + 1 + ), + CachedRegionAddrBase: 1024'({64'h8000_0000}), + CachedRegionLength: 1024'({64'h40000000}), + MaxOutstandingStores: unsigned'(7), + DebugEn: bit'(1), + NonIdemPotenceEn: bit'(0), + AxiBurstWriteEn: bit'(0) + }; + +endpackage diff --git a/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv b/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv new file mode 100644 index 00000000..1abe5370 --- /dev/null +++ b/test/type_param/core/include/cva6_hpdcache_default_config_pkg.sv @@ -0,0 +1,123 @@ +// Copyright 2023 Commissariat a l'Energie Atomique et aux Energies +// Alternatives (CEA) +// +// Licensed under the Solderpad Hardware License, Version 2.1 (the “License”); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Authors: Cesar Fuguet +// Date: February, 2023 +// Description: +// Default package with parameters for the HPDcache in a CVA6 platform. +// Users can copy this file, rename it, and adapt the configuration values as +// needed. + +package hpdcache_params_pkg; + // Imports from the CVA6 configuration package + // {{{ + import cva6_config_pkg::CVA6ConfigXlen; + import cva6_config_pkg::CVA6ConfigDcacheByteSize; + import cva6_config_pkg::CVA6ConfigDcacheSetAssoc; + import cva6_config_pkg::CVA6ConfigDcacheLineWidth; + import cva6_config_pkg::CVA6ConfigDcacheIdWidth; + import cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth; + import cva6_config_pkg::CVA6ConfigNrLoadBufEntries; + // }}} + + // Definition of constants used only in this file + // {{{ + localparam int unsigned __BYTES_PER_WAY = CVA6ConfigDcacheByteSize / CVA6ConfigDcacheSetAssoc; + + localparam int unsigned __BYTES_PER_CACHELINE = CVA6ConfigDcacheLineWidth / 8; + // }}} + + // Definition of global constants for the HPDcache data and directory + // {{{ + // HPDcache physical address width (in bits) + localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN; + + // HPDcache number of sets + localparam int unsigned PARAM_SETS = __BYTES_PER_WAY / __BYTES_PER_CACHELINE; + + // HPDcache number of ways + localparam int unsigned PARAM_WAYS = CVA6ConfigDcacheSetAssoc; + + // HPDcache word width (bits) + localparam int unsigned PARAM_WORD_WIDTH = CVA6ConfigXlen; + + // HPDcache cache-line width (bits) + localparam int unsigned PARAM_CL_WORDS = CVA6ConfigDcacheLineWidth / PARAM_WORD_WIDTH; + + // HPDcache number of words in the request data channels (request and response) + localparam int unsigned PARAM_REQ_WORDS = 1; + + // HPDcache request transaction ID width (bits) + localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = CVA6ConfigDcacheIdWidth; + + // HPDcache request source ID width (bits) + localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = 3; + // }}} + + // Definition of constants and types for HPDcache data memory + // {{{ + localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = 128 / PARAM_WORD_WIDTH; + localparam int unsigned PARAM_DATA_SETS_PER_RAM = PARAM_SETS; + + // HPDcache DATA RAM macros whether implements: + // - Write byte enable (1'b1) + // - Write bit mask (1'b0) + localparam bit PARAM_DATA_RAM_WBYTEENABLE = 1'b1; + + // Define the number of memory contiguous words that can be accessed + // simultaneously from the cache. + // - This limits the maximum width for the data channel from requesters + // - This impacts the refill latency (more ACCESS_WORDS -> less REFILL LATENCY) + localparam int unsigned PARAM_ACCESS_WORDS = PARAM_CL_WORDS / 2; + // }}} + + // Definition of constants and types for the Miss Status Holding Register (MSHR) + // {{{ + // HPDcache MSHR number of sets + localparam int unsigned PARAM_MSHR_SETS = 2; + + // HPDcache MSHR number of ways + localparam int unsigned PARAM_MSHR_WAYS = (CVA6ConfigNrLoadBufEntries > 4) ? 4 : 2; + + // HPDcache MSHR number of ways in the same SRAM word + localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = (PARAM_MSHR_WAYS > 1) ? 2 : 1; + + // HPDcache MSHR number of sets in the same SRAM + localparam int unsigned PARAM_MSHR_SETS_PER_RAM = PARAM_MSHR_SETS; + + // HPDcache MSHR RAM whether implements: + // - Write byte enable (1'b1) + // - Write bit mask (1'b0) + localparam bit PARAM_MSHR_RAM_WBYTEENABLE = 1'b1; + + // HPDcache MSHR whether uses FFs or SRAM + localparam bit PARAM_MSHR_USE_REGBANK = (PARAM_MSHR_SETS * PARAM_MSHR_WAYS) <= 16; + localparam bit PARAM_REFILL_CORE_RSP_FEEDTHROUGH = 1'b1; + // }}} + + // Definition of constants and types for the Write Buffer (WBUF) + // {{{ + // HPDcache Write-Buffer number of entries in the directory + localparam int unsigned PARAM_WBUF_DIR_ENTRIES = CVA6ConfigWtDcacheWbufDepth; + + // HPDcache Write-Buffer number of entries in the data buffer + localparam int unsigned PARAM_WBUF_DATA_ENTRIES = CVA6ConfigWtDcacheWbufDepth; + + // HPDcache Write-Buffer number of words per entry + localparam int unsigned PARAM_WBUF_WORDS = PARAM_REQ_WORDS; + + // HPDcache Write-Buffer threshold counter width (in bits) + localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = 3; + localparam bit PARAM_WBUF_SEND_FEEDTHROUGH = 1'b0; + // }}} + + // Definition of constants and types for the Replay Table (RTAB) + // {{{ + localparam int PARAM_RTAB_ENTRIES = 4; + // }}} +endpackage diff --git a/test/type_param/core/include/cvxif_pkg.sv b/test/type_param/core/include/cvxif_pkg.sv new file mode 100644 index 00000000..39e77b44 --- /dev/null +++ b/test/type_param/core/include/cvxif_pkg.sv @@ -0,0 +1,110 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Guillaume CHAUVON (guillaume.chauvon@thalesgroup.com) + +// Package for the CoreV-X-Interface for the CVA6 + +package cvxif_pkg; + + localparam X_DATAWIDTH = riscv::XLEN; + localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; //2 or 3 + localparam X_ID_WIDTH = ariane_pkg::TRANS_ID_BITS; + localparam X_MEM_WIDTH = 64; + localparam X_RFR_WIDTH = riscv::XLEN; + localparam X_RFW_WIDTH = riscv::XLEN; + + typedef struct packed { + logic [15:0] instr; + logic [1:0] mode; + logic [X_ID_WIDTH-1:0] id; + } x_compressed_req_t; + + typedef struct packed { + logic [31:0] instr; + logic accept; + } x_compressed_resp_t; + + typedef struct packed { + logic [31:0] instr; + logic [1:0] mode; + logic [X_ID_WIDTH-1:0] id; + logic [X_NUM_RS-1:0][X_RFR_WIDTH-1:0] rs; + logic [X_NUM_RS-1:0] rs_valid; + } x_issue_req_t; + + typedef struct packed { + logic accept; + logic writeback; + logic dualwrite; + logic dualread; + logic loadstore; + logic exc; + } x_issue_resp_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; + logic x_commit_kill; + } x_commit_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; + logic [31:0] addr; + logic [1:0] mode; + logic we; + logic [1:0] size; + logic [X_MEM_WIDTH-1:0] wdata; + logic last; + logic spec; + } x_mem_req_t; + + typedef struct packed { + logic exc; + logic [5:0] exccode; + } x_mem_resp_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; + logic [X_MEM_WIDTH-1:0] rdata; + logic err; + } x_mem_result_t; + + typedef struct packed { + logic [X_ID_WIDTH-1:0] id; + logic [X_RFW_WIDTH-1:0] data; + logic [4:0] rd; + logic we; + logic exc; + logic [5:0] exccode; + } x_result_t; + + typedef struct packed { + logic x_compressed_valid; + x_compressed_req_t x_compressed_req; + logic x_issue_valid; + x_issue_req_t x_issue_req; + logic x_commit_valid; + x_commit_t x_commit; + logic x_mem_ready; + x_mem_resp_t x_mem_resp; + logic x_mem_result_valid; + x_mem_result_t x_mem_result; + logic x_result_ready; + } cvxif_req_t; + + typedef struct packed { + logic x_compressed_ready; + x_compressed_resp_t x_compressed_resp; + logic x_issue_ready; + x_issue_resp_t x_issue_resp; + logic x_mem_valid; + x_mem_req_t x_mem_req; + logic x_result_valid; + x_result_t x_result; + } cvxif_resp_t; + +endpackage diff --git a/test/type_param/core/include/instr_tracer_pkg.sv b/test/type_param/core/include/instr_tracer_pkg.sv new file mode 100644 index 00000000..bd36f09b --- /dev/null +++ b/test/type_param/core/include/instr_tracer_pkg.sv @@ -0,0 +1,202 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 16.05.2017 +// Description: Instruction Tracer Defines + +`ifndef VERILATOR +package instr_tracer_pkg; + + parameter INSTR_NOP = 32'h00_00_00_13; + + parameter INSTR_LUI = {25'b?, riscv::OpcodeLui}; + parameter INSTR_AUIPC = {25'b?, riscv::OpcodeAuipc}; + parameter INSTR_JAL = {25'b?, riscv::OpcodeJal}; + parameter INSTR_JALR = {17'b?, 3'b000, 5'b?, riscv::OpcodeJalr}; + // BRANCH + parameter INSTR_BEQZ = {7'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BEQ = {7'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BNEZ = {7'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BNE = {7'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BLTZ = {7'b?, 5'b0, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BLT = {7'b?, 5'b?, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BGEZ = {7'b?, 5'b0, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BGE = {7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BLTU = {7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch}; + parameter INSTR_BGEU = {7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch}; + + // OP-IMM + parameter INSTR_LI = {12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_ADDI = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SLTI = {17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SLTIU = {17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_XORI = {17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_ORI = {17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_ANDI = {17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SLLI = {6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SRLI = {6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm}; + parameter INSTR_SRAI = {6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm}; + + // OP-IMM-32 + parameter INSTR_ADDIW = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32}; + parameter INSTR_SLLIW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32}; + parameter INSTR_SRLIW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32}; + parameter INSTR_SRAIW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32}; + + // OP + parameter INSTR_ADD = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SUB = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SLL = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SLT = {7'b0000000, 10'b?, 3'b010, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SLTU = {7'b0000000, 10'b?, 3'b011, 5'b?, riscv::OpcodeOp}; + parameter INSTR_XOR = {7'b0000000, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SRL = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp}; + parameter INSTR_SRA = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp}; + parameter INSTR_OR = {7'b0000000, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp}; + parameter INSTR_AND = {7'b0000000, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp}; + parameter INSTR_MUL = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp}; + + // OP32 + parameter INSTR_ADDW = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_SUBW = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_SLLW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_SRLW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_SRAW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32}; + parameter INSTR_MULW = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32}; + + // MISC-MEM + parameter INSTR_FENCE = {4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem}; + parameter INSTR_FENCEI = {17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem}; + + // SYSTEM + parameter INSTR_CSRW = {12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRW = {12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRR = {12'b?, 5'b0, 3'b010, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRRS = {12'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRS = {12'b?, 5'b?, 3'b010, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRC = {12'b?, 5'b?, 3'b011, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRC = {12'b?, 5'b?, 3'b011, 5'b0, riscv::OpcodeSystem}; + + parameter INSTR_CSRWI = {17'b?, 3'b101, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRWI = {17'b?, 3'b101, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRSI = {17'b?, 3'b110, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRSI = {17'b?, 3'b110, 5'b?, riscv::OpcodeSystem}; + parameter INSTR_CSRCI = {17'b?, 3'b111, 5'b0, riscv::OpcodeSystem}; + parameter INSTR_CSRRCI = {17'b?, 3'b111, 5'b?, riscv::OpcodeSystem}; + + parameter INSTR_ECALL = {12'b000000000000, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_EBREAK = {12'b000000000001, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_MRET = {12'b001100000010, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_SRET = {12'b000100000010, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_DRET = {12'b011110110010, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_WFI = {12'b000100000101, 13'b0, riscv::OpcodeSystem}; + parameter INSTR_SFENCE = {12'b0001001?????, 13'b?, riscv::OpcodeSystem}; + + // RV32M + parameter INSTR_PMUL = {7'b0000001, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp}; + parameter INSTR_DIV = {7'b0000001, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp}; + parameter INSTR_DIVU = {7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp}; + parameter INSTR_REM = {7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp}; + parameter INSTR_REMU = {7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp}; + + // RVFD + parameter INSTR_FMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd}; + parameter INSTR_FMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub}; + parameter INSTR_FNSMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub}; + parameter INSTR_FNMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd}; + + parameter INSTR_FADD = {5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSUB = {5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMUL = {5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FDIV = {5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSQRT = {5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSGNJ = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSGNJN = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FSGNJX = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMIN = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMAX = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FLE = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FLT = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FEQ = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp}; + + parameter INSTR_FCVT_F2F = {5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMV_F2X = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FCLASS = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FMV_X2F = {5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FCVT_F2I = {5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + parameter INSTR_FCVT_I2F = {5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + + // A + parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo}; + + // Load/Stores + parameter [31:0] LB = 32'b?????????????????000?????0000011; + parameter [31:0] LH = 32'b?????????????????001?????0000011; + parameter [31:0] LW = 32'b?????????????????010?????0000011; + parameter [31:0] LD = 32'b?????????????????011?????0000011; + parameter [31:0] LBU = 32'b?????????????????100?????0000011; + parameter [31:0] LHU = 32'b?????????????????101?????0000011; + parameter [31:0] LWU = 32'b?????????????????110?????0000011; + parameter [31:0] FLW = 32'b?????????????????010?????0000111; + parameter [31:0] FLD = 32'b?????????????????011?????0000111; + parameter [31:0] FLQ = 32'b?????????????????100?????0000111; + parameter [31:0] SB = 32'b?????????????????000?????0100011; + parameter [31:0] SH = 32'b?????????????????001?????0100011; + parameter [31:0] SW = 32'b?????????????????010?????0100011; + parameter [31:0] SD = 32'b?????????????????011?????0100011; + parameter [31:0] FSW = 32'b?????????????????010?????0100111; + parameter [31:0] FSD = 32'b?????????????????011?????0100111; + parameter [31:0] FSQ = 32'b?????????????????100?????0100111; + parameter [31:0] C_ADDI4SPN = 32'b????????????????000???????????00; + parameter [31:0] C_FLD = 32'b????????????????001???????????00; + parameter [31:0] C_LW = 32'b????????????????010???????????00; + parameter [31:0] C_FLW = 32'b????????????????011???????????00; + parameter [31:0] C_FSD = 32'b????????????????101???????????00; + parameter [31:0] C_SW = 32'b????????????????110???????????00; + parameter [31:0] C_FSW = 32'b????????????????111???????????00; + parameter [31:0] C_ADDI = 32'b????????????????000???????????01; + parameter [31:0] C_JAL = 32'b????????????????001???????????01; + parameter [31:0] C_LI = 32'b????????????????010???????????01; + parameter [31:0] C_LUI = 32'b????????????????011???????????01; + parameter [31:0] C_SRLI = 32'b????????????????100?00????????01; + parameter [31:0] C_SRAI = 32'b????????????????100?01????????01; + parameter [31:0] C_ANDI = 32'b????????????????100?10????????01; + parameter [31:0] C_SUB = 32'b????????????????100011???00???01; + parameter [31:0] C_XOR = 32'b????????????????100011???01???01; + parameter [31:0] C_OR = 32'b????????????????100011???10???01; + parameter [31:0] C_AND = 32'b????????????????100011???11???01; + parameter [31:0] C_SUBW = 32'b????????????????100111???00???01; + parameter [31:0] C_ADDW = 32'b????????????????100111???01???01; + parameter [31:0] C_J = 32'b????????????????101???????????01; + parameter [31:0] C_BEQZ = 32'b????????????????110???????????01; + parameter [31:0] C_BNEZ = 32'b????????????????111???????????01; + parameter [31:0] C_SLLI = 32'b????????????????000???????????10; + parameter [31:0] C_FLDSP = 32'b????????????????001???????????10; + parameter [31:0] C_LWSP = 32'b????????????????010???????????10; + parameter [31:0] C_FLWSP = 32'b????????????????011???????????10; + parameter [31:0] C_MV = 32'b????????????????1000??????????10; + parameter [31:0] C_ADD = 32'b????????????????1001??????????10; + parameter [31:0] C_FSDSP = 32'b????????????????101???????????10; + parameter [31:0] C_SWSP = 32'b????????????????110???????????10; + parameter [31:0] C_FSWSP = 32'b????????????????111???????????10; + parameter [31:0] C_NOP = 32'b????????????????0000000000000001; + parameter [31:0] C_ADDI16SP = 32'b????????????????011?00010?????01; + parameter [31:0] C_JR = 32'b????????????????1000?????0000010; + parameter [31:0] C_JALR = 32'b????????????????1001?????0000010; + parameter [31:0] C_EBREAK = 32'b????????????????1001000000000010; + parameter [31:0] C_LD = 32'b????????????????011???????????00; + parameter [31:0] C_SD = 32'b????????????????111???????????00; + parameter [31:0] C_ADDIW = 32'b????????????????001???????????01; + parameter [31:0] C_LDSP = 32'b????????????????011???????????10; + parameter [31:0] C_SDSP = 32'b????????????????111???????????10; + +endpackage +`endif diff --git a/test/type_param/core/include/riscv_pkg.sv b/test/type_param/core/include/riscv_pkg.sv new file mode 100644 index 00000000..18ae2cfc --- /dev/null +++ b/test/type_param/core/include/riscv_pkg.sv @@ -0,0 +1,851 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: riscv_pkg.sv + * Author: Florian Zaruba + * Date: 30.6.2017 + * + * Description: Common RISC-V definitions. + */ + +package riscv; + + // ---------------------- + // Import cva6 config from cva6_config_pkg + // ---------------------- + localparam XLEN = cva6_config_pkg::CVA6ConfigXlen; + localparam FPU_EN = cva6_config_pkg::CVA6ConfigFpuEn; + + // ---------------------- + // Data and Address length + // ---------------------- + typedef enum logic [3:0] { + ModeOff = 0, + ModeSv32 = 1, + ModeSv39 = 8, + ModeSv48 = 9, + ModeSv57 = 10, + ModeSv64 = 11 + } vm_mode_t; + + // Warning: When using STD_CACHE, configuration must be PLEN=56 and VLEN=64 + // Warning: VLEN must be superior or equal to PLEN + localparam VLEN = (XLEN == 32) ? 32 : 64; // virtual address length + localparam PLEN = (XLEN == 32) ? 34 : 56; // physical address length + + localparam IS_XLEN32 = (XLEN == 32) ? 1'b1 : 1'b0; + localparam IS_XLEN64 = (XLEN == 32) ? 1'b0 : 1'b1; + localparam ModeW = (XLEN == 32) ? 1 : 4; + localparam ASIDW = (XLEN == 32) ? 9 : 16; + localparam PPNW = (XLEN == 32) ? 22 : 44; + localparam vm_mode_t MODE_SV = (XLEN == 32) ? ModeSv32 : ModeSv39; + localparam SV = (MODE_SV == ModeSv32) ? 32 : 39; + localparam VPN2 = (VLEN - 31 < 8) ? VLEN - 31 : 8; + localparam XLEN_ALIGN_BYTES = $clog2(XLEN / 8); + + typedef logic [XLEN-1:0] xlen_t; + + // -------------------- + // Privilege Spec + // -------------------- + typedef enum logic [1:0] { + PRIV_LVL_M = 2'b11, + PRIV_LVL_S = 2'b01, + PRIV_LVL_U = 2'b00 + } priv_lvl_t; + + // type which holds xlen + typedef enum logic [1:0] { + XLEN_32 = 2'b01, + XLEN_64 = 2'b10, + XLEN_128 = 2'b11 + } xlen_e; + + typedef enum logic [1:0] { + Off = 2'b00, + Initial = 2'b01, + Clean = 2'b10, + Dirty = 2'b11 + } xs_t; + + typedef struct packed { + logic sd; // signal dirty state - read-only + logic [62:34] wpri6; // writes preserved reads ignored + xlen_e uxl; // variable user mode xlen - hardwired to zero + logic [12:0] wpri5; // writes preserved reads ignored + logic mxr; // make executable readable + logic sum; // permit supervisor user memory access + logic wpri4; // writes preserved reads ignored + xs_t xs; // extension register - hardwired to zero + xs_t fs; // floating point extension register + logic [1:0] wpri3; // writes preserved reads ignored + xs_t vs; // vector extension register + logic spp; // holds the previous privilege mode up to supervisor + logic wpri2; // writes preserved reads ignored + logic ube; // UBE controls whether explicit load and store memory accesses made from U-mode are little-endian (UBE=0) or big-endian (UBE=1) + logic spie; // supervisor interrupts enable bit active prior to trap + logic [1:0] wpri1; // writes preserved reads ignored + logic sie; // supervisor interrupts enable + logic wpri0; // writes preserved reads ignored + } sstatus_rv_t; + + typedef struct packed { + logic sd; // signal dirty state - read-only + logic [62:36] wpri4; // writes preserved reads ignored + xlen_e sxl; // variable supervisor mode xlen - hardwired to zero + xlen_e uxl; // variable user mode xlen - hardwired to zero + logic [8:0] wpri3; // writes preserved reads ignored + logic tsr; // trap sret + logic tw; // time wait + logic tvm; // trap virtual memory + logic mxr; // make executable readable + logic sum; // permit supervisor user memory access + logic mprv; // modify privilege - privilege level for ld/st + xs_t xs; // extension register - hardwired to zero + xs_t fs; // floating point extension register + priv_lvl_t mpp; // holds the previous privilege mode up to machine + xs_t vs; // vector extension register + logic spp; // holds the previous privilege mode up to supervisor + logic mpie; // machine interrupts enable bit active prior to trap + logic ube; // UBE controls whether explicit load and store memory accesses made from U-mode are little-endian (UBE=0) or big-endian (UBE=1) + logic spie; // supervisor interrupts enable bit active prior to trap + logic wpri2; // writes preserved reads ignored + logic mie; // machine interrupts enable + logic wpri1; // writes preserved reads ignored + logic sie; // supervisor interrupts enable + logic wpri0; // writes preserved reads ignored + } mstatus_rv_t; + + typedef struct packed { + logic [ModeW-1:0] mode; + logic [ASIDW-1:0] asid; + logic [PPNW-1:0] ppn; + } satp_t; + + // -------------------- + // Instruction Types + // -------------------- + typedef struct packed { + logic [31:25] funct7; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } rtype_t; + + typedef struct packed { + logic [31:27] rs3; + logic [26:25] funct2; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } r4type_t; + + typedef struct packed { + logic [31:27] funct5; + logic [26:25] fmt; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] rm; + logic [11:7] rd; + logic [6:0] opcode; + } rftype_t; // floating-point + + typedef struct packed { + logic [31:30] funct2; + logic [29:25] vecfltop; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:14] repl; + logic [13:12] vfmt; + logic [11:7] rd; + logic [6:0] opcode; + } rvftype_t; // vectorial floating-point + + typedef struct packed { + logic [31:20] imm; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } itype_t; + + typedef struct packed { + logic [31:25] imm; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] imm0; + logic [6:0] opcode; + } stype_t; + + typedef struct packed { + logic [31:12] imm; + logic [11:7] rd; + logic [6:0] opcode; + } utype_t; + + // atomic instructions + typedef struct packed { + logic [31:27] funct5; + logic aq; + logic rl; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } atype_t; + + typedef union packed { + logic [31:0] instr; + rtype_t rtype; + r4type_t r4type; + rftype_t rftype; + rvftype_t rvftype; + itype_t itype; + stype_t stype; + utype_t utype; + atype_t atype; + } instruction_t; + + // -------------------- + // Opcodes + // -------------------- + // RV32/64G listings: + // Quadrant 0 + localparam OpcodeLoad = 7'b00_000_11; + localparam OpcodeLoadFp = 7'b00_001_11; + localparam OpcodeCustom0 = 7'b00_010_11; + localparam OpcodeMiscMem = 7'b00_011_11; + localparam OpcodeOpImm = 7'b00_100_11; + localparam OpcodeAuipc = 7'b00_101_11; + localparam OpcodeOpImm32 = 7'b00_110_11; + // Quadrant 1 + localparam OpcodeStore = 7'b01_000_11; + localparam OpcodeStoreFp = 7'b01_001_11; + localparam OpcodeCustom1 = 7'b01_010_11; + localparam OpcodeAmo = 7'b01_011_11; + localparam OpcodeOp = 7'b01_100_11; + localparam OpcodeLui = 7'b01_101_11; + localparam OpcodeOp32 = 7'b01_110_11; + // Quadrant 2 + localparam OpcodeMadd = 7'b10_000_11; + localparam OpcodeMsub = 7'b10_001_11; + localparam OpcodeNmsub = 7'b10_010_11; + localparam OpcodeNmadd = 7'b10_011_11; + localparam OpcodeOpFp = 7'b10_100_11; + localparam OpcodeVec = 7'b10_101_11; + localparam OpcodeCustom2 = 7'b10_110_11; + // Quadrant 3 + localparam OpcodeBranch = 7'b11_000_11; + localparam OpcodeJalr = 7'b11_001_11; + localparam OpcodeRsrvd2 = 7'b11_010_11; + localparam OpcodeJal = 7'b11_011_11; + localparam OpcodeSystem = 7'b11_100_11; + localparam OpcodeRsrvd3 = 7'b11_101_11; + localparam OpcodeCustom3 = 7'b11_110_11; + + // RV64C/RV32C listings: + // Quadrant 0 + localparam OpcodeC0 = 2'b00; + localparam OpcodeC0Addi4spn = 3'b000; + localparam OpcodeC0Fld = 3'b001; + localparam OpcodeC0Lw = 3'b010; + localparam OpcodeC0Ld = 3'b011; + localparam OpcodeC0Zcb = 3'b100; + localparam OpcodeC0Fsd = 3'b101; + localparam OpcodeC0Sw = 3'b110; + localparam OpcodeC0Sd = 3'b111; + // Quadrant 1 + localparam OpcodeC1 = 2'b01; + localparam OpcodeC1Addi = 3'b000; + localparam OpcodeC1Addiw = 3'b001; //for RV64I only + localparam OpcodeC1Jal = 3'b001; //for RV32I only + localparam OpcodeC1Li = 3'b010; + localparam OpcodeC1LuiAddi16sp = 3'b011; + localparam OpcodeC1MiscAlu = 3'b100; + localparam OpcodeC1J = 3'b101; + localparam OpcodeC1Beqz = 3'b110; + localparam OpcodeC1Bnez = 3'b111; + // Quadrant 2 + localparam OpcodeC2 = 2'b10; + localparam OpcodeC2Slli = 3'b000; + localparam OpcodeC2Fldsp = 3'b001; + localparam OpcodeC2Lwsp = 3'b010; + localparam OpcodeC2Ldsp = 3'b011; + localparam OpcodeC2JalrMvAdd = 3'b100; + localparam OpcodeC2Fsdsp = 3'b101; + localparam OpcodeC2Swsp = 3'b110; + localparam OpcodeC2Sdsp = 3'b111; + + // ---------------------- + // Virtual Memory + // ---------------------- + // memory management, pte for sv39 + typedef struct packed { + logic [9:0] reserved; + logic [44-1:0] ppn; // PPN length for + logic [1:0] rsw; + logic d; + logic a; + logic g; + logic u; + logic x; + logic w; + logic r; + logic v; + } pte_t; + + // memory management, pte for sv32 + typedef struct packed { + logic [22-1:0] ppn; // PPN length for + logic [1:0] rsw; + logic d; + logic a; + logic g; + logic u; + logic x; + logic w; + logic r; + logic v; + } pte_sv32_t; + + // ---------------------- + // Exception Cause Codes + // ---------------------- + localparam logic [XLEN-1:0] INSTR_ADDR_MISALIGNED = 0; + localparam logic [XLEN-1:0] INSTR_ACCESS_FAULT = 1; // Illegal access as governed by PMPs and PMAs + localparam logic [XLEN-1:0] ILLEGAL_INSTR = 2; + localparam logic [XLEN-1:0] BREAKPOINT = 3; + localparam logic [XLEN-1:0] LD_ADDR_MISALIGNED = 4; + localparam logic [XLEN-1:0] LD_ACCESS_FAULT = 5; // Illegal access as governed by PMPs and PMAs + localparam logic [XLEN-1:0] ST_ADDR_MISALIGNED = 6; + localparam logic [XLEN-1:0] ST_ACCESS_FAULT = 7; // Illegal access as governed by PMPs and PMAs + localparam logic [XLEN-1:0] ENV_CALL_UMODE = 8; // environment call from user mode + localparam logic [XLEN-1:0] ENV_CALL_SMODE = 9; // environment call from supervisor mode + localparam logic [XLEN-1:0] ENV_CALL_MMODE = 11; // environment call from machine mode + localparam logic [XLEN-1:0] INSTR_PAGE_FAULT = 12; // Instruction page fault + localparam logic [XLEN-1:0] LOAD_PAGE_FAULT = 13; // Load page fault + localparam logic [XLEN-1:0] STORE_PAGE_FAULT = 15; // Store page fault + localparam logic [XLEN-1:0] DEBUG_REQUEST = 24; // Debug request + + localparam int unsigned IRQ_S_SOFT = 1; + localparam int unsigned IRQ_M_SOFT = 3; + localparam int unsigned IRQ_S_TIMER = 5; + localparam int unsigned IRQ_M_TIMER = 7; + localparam int unsigned IRQ_S_EXT = 9; + localparam int unsigned IRQ_M_EXT = 11; + + localparam logic [XLEN-1:0] MIP_SSIP = 1 << IRQ_S_SOFT; + localparam logic [XLEN-1:0] MIP_MSIP = 1 << IRQ_M_SOFT; + localparam logic [XLEN-1:0] MIP_STIP = 1 << IRQ_S_TIMER; + localparam logic [XLEN-1:0] MIP_MTIP = 1 << IRQ_M_TIMER; + localparam logic [XLEN-1:0] MIP_SEIP = 1 << IRQ_S_EXT; + localparam logic [XLEN-1:0] MIP_MEIP = 1 << IRQ_M_EXT; + + localparam logic [XLEN-1:0] S_SW_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_SOFT); + localparam logic [XLEN-1:0] M_SW_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_SOFT); + localparam logic [XLEN-1:0] S_TIMER_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_TIMER); + localparam logic [XLEN-1:0] M_TIMER_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_TIMER); + localparam logic [XLEN-1:0] S_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_EXT); + localparam logic [XLEN-1:0] M_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_EXT); + + // ----- + // CSRs + // ----- + typedef enum logic [11:0] { + // Floating-Point CSRs + CSR_FFLAGS = 12'h001, + CSR_FRM = 12'h002, + CSR_FCSR = 12'h003, + CSR_FTRAN = 12'h800, + // Vector CSRs + CSR_VSTART = 12'h008, + CSR_VXSAT = 12'h009, + CSR_VXRM = 12'h00A, + CSR_VCSR = 12'h00F, + CSR_VL = 12'hC20, + CSR_VTYPE = 12'hC21, + CSR_VLENB = 12'hC22, + // Supervisor Mode CSRs + CSR_SSTATUS = 12'h100, + CSR_SIE = 12'h104, + CSR_STVEC = 12'h105, + CSR_SCOUNTEREN = 12'h106, + CSR_SSCRATCH = 12'h140, + CSR_SEPC = 12'h141, + CSR_SCAUSE = 12'h142, + CSR_STVAL = 12'h143, + CSR_SIP = 12'h144, + CSR_SATP = 12'h180, + // Machine Mode CSRs + CSR_MSTATUS = 12'h300, + CSR_MISA = 12'h301, + CSR_MEDELEG = 12'h302, + CSR_MIDELEG = 12'h303, + CSR_MIE = 12'h304, + CSR_MTVEC = 12'h305, + CSR_MCOUNTEREN = 12'h306, + CSR_MSTATUSH = 12'h310, + CSR_MCOUNTINHIBIT = 12'h320, + CSR_MHPM_EVENT_3 = 12'h323, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_4 = 12'h324, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_5 = 12'h325, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_6 = 12'h326, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_7 = 12'h327, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_8 = 12'h328, //Machine performance monitoring Event Selector + CSR_MHPM_EVENT_9 = 12'h329, //Reserved + CSR_MHPM_EVENT_10 = 12'h32A, //Reserved + CSR_MHPM_EVENT_11 = 12'h32B, //Reserved + CSR_MHPM_EVENT_12 = 12'h32C, //Reserved + CSR_MHPM_EVENT_13 = 12'h32D, //Reserved + CSR_MHPM_EVENT_14 = 12'h32E, //Reserved + CSR_MHPM_EVENT_15 = 12'h32F, //Reserved + CSR_MHPM_EVENT_16 = 12'h330, //Reserved + CSR_MHPM_EVENT_17 = 12'h331, //Reserved + CSR_MHPM_EVENT_18 = 12'h332, //Reserved + CSR_MHPM_EVENT_19 = 12'h333, //Reserved + CSR_MHPM_EVENT_20 = 12'h334, //Reserved + CSR_MHPM_EVENT_21 = 12'h335, //Reserved + CSR_MHPM_EVENT_22 = 12'h336, //Reserved + CSR_MHPM_EVENT_23 = 12'h337, //Reserved + CSR_MHPM_EVENT_24 = 12'h338, //Reserved + CSR_MHPM_EVENT_25 = 12'h339, //Reserved + CSR_MHPM_EVENT_26 = 12'h33A, //Reserved + CSR_MHPM_EVENT_27 = 12'h33B, //Reserved + CSR_MHPM_EVENT_28 = 12'h33C, //Reserved + CSR_MHPM_EVENT_29 = 12'h33D, //Reserved + CSR_MHPM_EVENT_30 = 12'h33E, //Reserved + CSR_MHPM_EVENT_31 = 12'h33F, //Reserved + CSR_MSCRATCH = 12'h340, + CSR_MEPC = 12'h341, + CSR_MCAUSE = 12'h342, + CSR_MTVAL = 12'h343, + CSR_MIP = 12'h344, + CSR_MENVCFG = 12'h30A, + CSR_MENVCFGH = 12'h31A, + CSR_PMPCFG0 = 12'h3A0, + CSR_PMPCFG1 = 12'h3A1, + CSR_PMPCFG2 = 12'h3A2, + CSR_PMPCFG3 = 12'h3A3, + CSR_PMPADDR0 = 12'h3B0, + CSR_PMPADDR1 = 12'h3B1, + CSR_PMPADDR2 = 12'h3B2, + CSR_PMPADDR3 = 12'h3B3, + CSR_PMPADDR4 = 12'h3B4, + CSR_PMPADDR5 = 12'h3B5, + CSR_PMPADDR6 = 12'h3B6, + CSR_PMPADDR7 = 12'h3B7, + CSR_PMPADDR8 = 12'h3B8, + CSR_PMPADDR9 = 12'h3B9, + CSR_PMPADDR10 = 12'h3BA, + CSR_PMPADDR11 = 12'h3BB, + CSR_PMPADDR12 = 12'h3BC, + CSR_PMPADDR13 = 12'h3BD, + CSR_PMPADDR14 = 12'h3BE, + CSR_PMPADDR15 = 12'h3BF, + CSR_MVENDORID = 12'hF11, + CSR_MARCHID = 12'hF12, + CSR_MIMPID = 12'hF13, + CSR_MHARTID = 12'hF14, + CSR_MCONFIGPTR = 12'hF15, + CSR_MCYCLE = 12'hB00, + CSR_MCYCLEH = 12'hB80, + CSR_MINSTRET = 12'hB02, + CSR_MINSTRETH = 12'hB82, + //Performance Counters + CSR_MHPM_COUNTER_3 = 12'hB03, + CSR_MHPM_COUNTER_4 = 12'hB04, + CSR_MHPM_COUNTER_5 = 12'hB05, + CSR_MHPM_COUNTER_6 = 12'hB06, + CSR_MHPM_COUNTER_7 = 12'hB07, + CSR_MHPM_COUNTER_8 = 12'hB08, + CSR_MHPM_COUNTER_9 = 12'hB09, // reserved + CSR_MHPM_COUNTER_10 = 12'hB0A, // reserved + CSR_MHPM_COUNTER_11 = 12'hB0B, // reserved + CSR_MHPM_COUNTER_12 = 12'hB0C, // reserved + CSR_MHPM_COUNTER_13 = 12'hB0D, // reserved + CSR_MHPM_COUNTER_14 = 12'hB0E, // reserved + CSR_MHPM_COUNTER_15 = 12'hB0F, // reserved + CSR_MHPM_COUNTER_16 = 12'hB10, // reserved + CSR_MHPM_COUNTER_17 = 12'hB11, // reserved + CSR_MHPM_COUNTER_18 = 12'hB12, // reserved + CSR_MHPM_COUNTER_19 = 12'hB13, // reserved + CSR_MHPM_COUNTER_20 = 12'hB14, // reserved + CSR_MHPM_COUNTER_21 = 12'hB15, // reserved + CSR_MHPM_COUNTER_22 = 12'hB16, // reserved + CSR_MHPM_COUNTER_23 = 12'hB17, // reserved + CSR_MHPM_COUNTER_24 = 12'hB18, // reserved + CSR_MHPM_COUNTER_25 = 12'hB19, // reserved + CSR_MHPM_COUNTER_26 = 12'hB1A, // reserved + CSR_MHPM_COUNTER_27 = 12'hB1B, // reserved + CSR_MHPM_COUNTER_28 = 12'hB1C, // reserved + CSR_MHPM_COUNTER_29 = 12'hB1D, // reserved + CSR_MHPM_COUNTER_30 = 12'hB1E, // reserved + CSR_MHPM_COUNTER_31 = 12'hB1F, // reserved + CSR_MHPM_COUNTER_3H = 12'hB83, + CSR_MHPM_COUNTER_4H = 12'hB84, + CSR_MHPM_COUNTER_5H = 12'hB85, + CSR_MHPM_COUNTER_6H = 12'hB86, + CSR_MHPM_COUNTER_7H = 12'hB87, + CSR_MHPM_COUNTER_8H = 12'hB88, + CSR_MHPM_COUNTER_9H = 12'hB89, // reserved + CSR_MHPM_COUNTER_10H = 12'hB8A, // reserved + CSR_MHPM_COUNTER_11H = 12'hB8B, // reserved + CSR_MHPM_COUNTER_12H = 12'hB8C, // reserved + CSR_MHPM_COUNTER_13H = 12'hB8D, // reserved + CSR_MHPM_COUNTER_14H = 12'hB8E, // reserved + CSR_MHPM_COUNTER_15H = 12'hB8F, // reserved + CSR_MHPM_COUNTER_16H = 12'hB90, // reserved + CSR_MHPM_COUNTER_17H = 12'hB91, // reserved + CSR_MHPM_COUNTER_18H = 12'hB92, // reserved + CSR_MHPM_COUNTER_19H = 12'hB93, // reserved + CSR_MHPM_COUNTER_20H = 12'hB94, // reserved + CSR_MHPM_COUNTER_21H = 12'hB95, // reserved + CSR_MHPM_COUNTER_22H = 12'hB96, // reserved + CSR_MHPM_COUNTER_23H = 12'hB97, // reserved + CSR_MHPM_COUNTER_24H = 12'hB98, // reserved + CSR_MHPM_COUNTER_25H = 12'hB99, // reserved + CSR_MHPM_COUNTER_26H = 12'hB9A, // reserved + CSR_MHPM_COUNTER_27H = 12'hB9B, // reserved + CSR_MHPM_COUNTER_28H = 12'hB9C, // reserved + CSR_MHPM_COUNTER_29H = 12'hB9D, // reserved + CSR_MHPM_COUNTER_30H = 12'hB9E, // reserved + CSR_MHPM_COUNTER_31H = 12'hB9F, // reserved + // Cache Control (platform specifc) + CSR_DCACHE = 12'h7C1, + CSR_ICACHE = 12'h7C0, + // Accelerator memory consistency (platform specific) + CSR_ACC_CONS = 12'h7C2, + // Triggers + CSR_TSELECT = 12'h7A0, + CSR_TDATA1 = 12'h7A1, + CSR_TDATA2 = 12'h7A2, + CSR_TDATA3 = 12'h7A3, + CSR_TINFO = 12'h7A4, + // Debug CSR + CSR_DCSR = 12'h7b0, + CSR_DPC = 12'h7b1, + CSR_DSCRATCH0 = 12'h7b2, // optional + CSR_DSCRATCH1 = 12'h7b3, // optional + // Counters and Timers (User Mode - R/O Shadows) + CSR_CYCLE = 12'hC00, + CSR_CYCLEH = 12'hC80, + CSR_TIME = 12'hC01, + CSR_TIMEH = 12'hC81, + CSR_INSTRET = 12'hC02, + CSR_INSTRETH = 12'hC82, + // Performance counters (User Mode - R/O Shadows) + CSR_HPM_COUNTER_3 = 12'hC03, + CSR_HPM_COUNTER_4 = 12'hC04, + CSR_HPM_COUNTER_5 = 12'hC05, + CSR_HPM_COUNTER_6 = 12'hC06, + CSR_HPM_COUNTER_7 = 12'hC07, + CSR_HPM_COUNTER_8 = 12'hC08, + CSR_HPM_COUNTER_9 = 12'hC09, // reserved + CSR_HPM_COUNTER_10 = 12'hC0A, // reserved + CSR_HPM_COUNTER_11 = 12'hC0B, // reserved + CSR_HPM_COUNTER_12 = 12'hC0C, // reserved + CSR_HPM_COUNTER_13 = 12'hC0D, // reserved + CSR_HPM_COUNTER_14 = 12'hC0E, // reserved + CSR_HPM_COUNTER_15 = 12'hC0F, // reserved + CSR_HPM_COUNTER_16 = 12'hC10, // reserved + CSR_HPM_COUNTER_17 = 12'hC11, // reserved + CSR_HPM_COUNTER_18 = 12'hC12, // reserved + CSR_HPM_COUNTER_19 = 12'hC13, // reserved + CSR_HPM_COUNTER_20 = 12'hC14, // reserved + CSR_HPM_COUNTER_21 = 12'hC15, // reserved + CSR_HPM_COUNTER_22 = 12'hC16, // reserved + CSR_HPM_COUNTER_23 = 12'hC17, // reserved + CSR_HPM_COUNTER_24 = 12'hC18, // reserved + CSR_HPM_COUNTER_25 = 12'hC19, // reserved + CSR_HPM_COUNTER_26 = 12'hC1A, // reserved + CSR_HPM_COUNTER_27 = 12'hC1B, // reserved + CSR_HPM_COUNTER_28 = 12'hC1C, // reserved + CSR_HPM_COUNTER_29 = 12'hC1D, // reserved + CSR_HPM_COUNTER_30 = 12'hC1E, // reserved + CSR_HPM_COUNTER_31 = 12'hC1F, // reserved + CSR_HPM_COUNTER_3H = 12'hC83, + CSR_HPM_COUNTER_4H = 12'hC84, + CSR_HPM_COUNTER_5H = 12'hC85, + CSR_HPM_COUNTER_6H = 12'hC86, + CSR_HPM_COUNTER_7H = 12'hC87, + CSR_HPM_COUNTER_8H = 12'hC88, + CSR_HPM_COUNTER_9H = 12'hC89, // reserved + CSR_HPM_COUNTER_10H = 12'hC8A, // reserved + CSR_HPM_COUNTER_11H = 12'hC8B, // reserved + CSR_HPM_COUNTER_12H = 12'hC8C, // reserved + CSR_HPM_COUNTER_13H = 12'hC8D, // reserved + CSR_HPM_COUNTER_14H = 12'hC8E, // reserved + CSR_HPM_COUNTER_15H = 12'hC8F, // reserved + CSR_HPM_COUNTER_16H = 12'hC90, // reserved + CSR_HPM_COUNTER_17H = 12'hC91, // reserved + CSR_HPM_COUNTER_18H = 12'hC92, // reserved + CSR_HPM_COUNTER_19H = 12'hC93, // reserved + CSR_HPM_COUNTER_20H = 12'hC94, // reserved + CSR_HPM_COUNTER_21H = 12'hC95, // reserved + CSR_HPM_COUNTER_22H = 12'hC96, // reserved + CSR_HPM_COUNTER_23H = 12'hC97, // reserved + CSR_HPM_COUNTER_24H = 12'hC98, // reserved + CSR_HPM_COUNTER_25H = 12'hC99, // reserved + CSR_HPM_COUNTER_26H = 12'hC9A, // reserved + CSR_HPM_COUNTER_27H = 12'hC9B, // reserved + CSR_HPM_COUNTER_28H = 12'hC9C, // reserved + CSR_HPM_COUNTER_29H = 12'hC9D, // reserved + CSR_HPM_COUNTER_30H = 12'hC9E, // reserved + CSR_HPM_COUNTER_31H = 12'hC9F // reserved + } csr_reg_t; + + localparam logic [63:0] SSTATUS_UIE = 'h00000001; + localparam logic [63:0] SSTATUS_SIE = 'h00000002; + localparam logic [63:0] SSTATUS_SPIE = 'h00000020; + localparam logic [63:0] SSTATUS_SPP = 'h00000100; + localparam logic [63:0] SSTATUS_FS = 'h00006000; + localparam logic [63:0] SSTATUS_XS = 'h00018000; + localparam logic [63:0] SSTATUS_SUM = 'h00040000; + localparam logic [63:0] SSTATUS_MXR = 'h00080000; + localparam logic [63:0] SSTATUS_UPIE = 'h00000010; + localparam logic [63:0] SSTATUS_UXL = 64'h0000000300000000; + localparam logic [63:0] SSTATUS_SD = {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000}; + + localparam logic [63:0] MSTATUS_UIE = 'h00000001; + localparam logic [63:0] MSTATUS_SIE = 'h00000002; + localparam logic [63:0] MSTATUS_HIE = 'h00000004; + localparam logic [63:0] MSTATUS_MIE = 'h00000008; + localparam logic [63:0] MSTATUS_UPIE = 'h00000010; + localparam logic [63:0] MSTATUS_SPIE = 'h00000020; + localparam logic [63:0] MSTATUS_HPIE = 'h00000040; + localparam logic [63:0] MSTATUS_MPIE = 'h00000080; + localparam logic [63:0] MSTATUS_SPP = 'h00000100; + localparam logic [63:0] MSTATUS_HPP = 'h00000600; + localparam logic [63:0] MSTATUS_MPP = 'h00001800; + localparam logic [63:0] MSTATUS_FS = 'h00006000; + localparam logic [63:0] MSTATUS_XS = 'h00018000; + localparam logic [63:0] MSTATUS_MPRV = 'h00020000; + localparam logic [63:0] MSTATUS_SUM = 'h00040000; + localparam logic [63:0] MSTATUS_MXR = 'h00080000; + localparam logic [63:0] MSTATUS_TVM = 'h00100000; + localparam logic [63:0] MSTATUS_TW = 'h00200000; + localparam logic [63:0] MSTATUS_TSR = 'h00400000; + localparam logic [63:0] MSTATUS_UXL = {30'h0000000, IS_XLEN64, IS_XLEN64, 32'h00000000}; + localparam logic [63:0] MSTATUS_SXL = {28'h0000000, IS_XLEN64, IS_XLEN64, 34'h00000000}; + localparam logic [63:0] MSTATUS_SD = {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000}; + + typedef enum logic [2:0] { + CSRRW = 3'h1, + CSRRS = 3'h2, + CSRRC = 3'h3, + CSRRWI = 3'h5, + CSRRSI = 3'h6, + CSRRCI = 3'h7 + } csr_op_t; + + // decoded CSR address + typedef struct packed { + logic [1:0] rw; + priv_lvl_t priv_lvl; + logic [7:0] address; + } csr_addr_t; + + typedef union packed { + csr_reg_t address; + csr_addr_t csr_decode; + } csr_t; + + // Floating-Point control and status register (32-bit!) + typedef struct packed { + logic [31:15] reserved; // reserved for L extension, return 0 otherwise + logic [6:0] fprec; // div/sqrt precision control + logic [2:0] frm; // float rounding mode + logic [4:0] fflags; // float exception flags + } fcsr_t; + + // PMP + typedef enum logic [1:0] { + OFF = 2'b00, + TOR = 2'b01, + NA4 = 2'b10, + NAPOT = 2'b11 + } pmp_addr_mode_t; + + // PMP Access Type + typedef enum logic [2:0] { + ACCESS_NONE = 3'b000, + ACCESS_READ = 3'b001, + ACCESS_WRITE = 3'b010, + ACCESS_EXEC = 3'b100 + } pmp_access_t; + + typedef struct packed { + logic x; + logic w; + logic r; + } pmpcfg_access_t; + + // packed struct of a PMP configuration register (8bit) + typedef struct packed { + logic locked; // lock this configuration + logic [1:0] reserved; + pmp_addr_mode_t addr_mode; // Off, TOR, NA4, NAPOT + pmpcfg_access_t access_type; + } pmpcfg_t; + + // ----- + // Debug + // ----- + typedef struct packed { + logic [31:28] xdebugver; + logic [27:16] zero2; + logic ebreakm; + logic zero1; + logic ebreaks; + logic ebreaku; + logic stepie; + logic stopcount; + logic stoptime; + logic [8:6] cause; + logic zero0; + logic mprven; + logic nmip; + logic step; + priv_lvl_t prv; + } dcsr_t; + + // Instruction Generation *incomplete* + function automatic logic [31:0] jal(logic [4:0] rd, logic [20:0] imm); + // OpCode Jal + return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h6f}; + endfunction + + function automatic logic [31:0] jalr(logic [4:0] rd, logic [4:0] rs1, logic [11:0] offset); + // OpCode Jal + return {offset[11:0], rs1, 3'b0, rd, 7'h67}; + endfunction + + function automatic logic [31:0] andi(logic [4:0] rd, logic [4:0] rs1, logic [11:0] imm); + // OpCode andi + return {imm[11:0], rs1, 3'h7, rd, 7'h13}; + endfunction + + function automatic logic [31:0] slli(logic [4:0] rd, logic [4:0] rs1, logic [5:0] shamt); + // OpCode slli + return {6'b0, shamt[5:0], rs1, 3'h1, rd, 7'h13}; + endfunction + + function automatic logic [31:0] srli(logic [4:0] rd, logic [4:0] rs1, logic [5:0] shamt); + // OpCode srli + return {6'b0, shamt[5:0], rs1, 3'h5, rd, 7'h13}; + endfunction + + function automatic logic [31:0] load(logic [2:0] size, logic [4:0] dest, logic [4:0] base, + logic [11:0] offset); + // OpCode Load + return {offset[11:0], base, size, dest, 7'h03}; + endfunction + + function automatic logic [31:0] auipc(logic [4:0] rd, logic [20:0] imm); + // OpCode Auipc + return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h17}; + endfunction + + function automatic logic [31:0] store(logic [2:0] size, logic [4:0] src, logic [4:0] base, + logic [11:0] offset); + // OpCode Store + return {offset[11:5], src, base, size, offset[4:0], 7'h23}; + endfunction + + function automatic logic [31:0] float_load(logic [2:0] size, logic [4:0] dest, logic [4:0] base, + logic [11:0] offset); + // OpCode Load + return {offset[11:0], base, size, dest, 7'b00_001_11}; + endfunction + + function automatic logic [31:0] float_store(logic [2:0] size, logic [4:0] src, logic [4:0] base, + logic [11:0] offset); + // OpCode Store + return {offset[11:5], src, base, size, offset[4:0], 7'b01_001_11}; + endfunction + + function automatic logic [31:0] csrw(csr_reg_t csr, logic [4:0] rs1); + // CSRRW, rd, OpCode System + return {csr, rs1, 3'h1, 5'h0, 7'h73}; + endfunction + + function automatic logic [31:0] csrr(csr_reg_t csr, logic [4:0] dest); + // rs1, CSRRS, rd, OpCode System + return {csr, 5'h0, 3'h2, dest, 7'h73}; + endfunction + + function automatic logic [31:0] branch(logic [4:0] src2, logic [4:0] src1, logic [2:0] funct3, + logic [11:0] offset); + // OpCode Branch + return {offset[11], offset[9:4], src2, src1, funct3, offset[3:0], offset[10], 7'b11_000_11}; + endfunction + + function automatic logic [31:0] ebreak(); + return 32'h00100073; + endfunction + + function automatic logic [31:0] wfi(); + return 32'h10500073; + endfunction + + function automatic logic [31:0] nop(); + return 32'h00000013; + endfunction + + function automatic logic [31:0] illegal(); + return 32'h00000000; + endfunction + + + // trace log compatible to spikes commit log feature + // pragma translate_off + function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, + logic [4:0] rd, logic [63:0] result, logic rd_fpr); + string rd_s; + string instr_word; + + automatic string rf_s = rd_fpr ? "f" : "x"; + + if (instr[1:0] != 2'b11) begin + instr_word = $sformatf("(0x%h)", instr[15:0]); + end else begin + instr_word = $sformatf("(0x%h)", instr); + end + + if (rd < 10) rd_s = $sformatf("%s %0d", rf_s, rd); + else rd_s = $sformatf("%s%0d", rf_s, rd); + + if (rd_fpr || rd != 0) begin + // 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000 + return $sformatf("%d 0x%h %s %s 0x%h\n", priv_lvl, pc, instr_word, rd_s, result); + end else begin + // 0 0x000000008000019c (0x0040006f) + return $sformatf("%d 0x%h %s\n", priv_lvl, pc, instr_word); + end + endfunction + + typedef struct { + byte priv; + longint unsigned pc; + byte is_fp; + byte rd; + longint unsigned data; + int unsigned instr; + byte was_exception; + } commit_log_t; + // pragma translate_on + +endpackage diff --git a/test/type_param/core/include/std_cache_pkg.sv b/test/type_param/core/include/std_cache_pkg.sv new file mode 100644 index 00000000..ae812c99 --- /dev/null +++ b/test/type_param/core/include/std_cache_pkg.sv @@ -0,0 +1,98 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 + +// ******* WIP ******* +// Description: package for the standard Ariane cache subsystem. + +package std_cache_pkg; + + // Calculated parameter + localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8); + localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_BYTE_OFFSET); + localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC * 2; + localparam DCACHE_SET_ASSOC_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC); + // localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not + + typedef struct packed { + logic [1:0] id; // id for which we handle the miss + logic valid; + logic we; + logic [55:0] addr; + logic [7:0][7:0] wdata; + logic [7:0] be; + } mshr_t; + + typedef struct packed { + logic valid; + logic [63:0] addr; + logic [7:0] be; + logic [1:0] size; + logic we; + logic [63:0] wdata; + logic bypass; + } miss_req_t; + + typedef struct packed { + logic req; + ariane_pkg::ad_req_t reqtype; + ariane_pkg::amo_t amo; + logic [3:0] id; + logic [63:0] addr; + logic [63:0] wdata; + logic we; + logic [7:0] be; + logic [1:0] size; + } bypass_req_t; + + typedef struct packed { + logic gnt; + logic valid; + logic [63:0] rdata; + } bypass_rsp_t; + + typedef struct packed { + logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array + logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array + logic valid; // state array + logic dirty; // state array + } cache_line_t; + + // cache line byte enable + typedef struct packed { + logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array + logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits) + } cl_be_t; + + // convert one hot to bin for -> needed for cache replacement + function automatic logic [DCACHE_SET_ASSOC_WIDTH-1:0] one_hot_to_bin( + input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in); + for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin + if (in[i]) return i; + end + endfunction + // get the first bit set, returns one hot value + function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl( + input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty); + // one-hot return vector + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0; + for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin + if (valid_dirty[i]) begin + oh[i] = 1'b1; + return oh; + end + end + endfunction +endpackage : std_cache_pkg + diff --git a/test/type_param/core/include/wt_cache_pkg.sv b/test/type_param/core/include/wt_cache_pkg.sv new file mode 100644 index 00000000..9a8c0ce0 --- /dev/null +++ b/test/type_param/core/include/wt_cache_pkg.sv @@ -0,0 +1,344 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Package for OpenPiton compatible L1 cache subsystem + +// this is needed to propagate the +// configuration in case Ariane is +// instantiated in OpenPiton +`ifdef PITON_ARIANE +`include "l15.tmp.h" +`include "define.tmp.h" +`endif + +package wt_cache_pkg; + + // these parames need to coincide with the + // L1.5 parameterization, do not change +`ifdef PITON_ARIANE + +`ifndef CONFIG_L15_ASSOCIATIVITY + `define CONFIG_L15_ASSOCIATIVITY 4 +`endif + +`ifndef TLB_CSM_WIDTH + `define TLB_CSM_WIDTH 33 +`endif + + localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY; + localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH; +`else + localparam L15_SET_ASSOC = ariane_pkg::DCACHE_SET_ASSOC;// align with dcache for compatibility with the standard Ariane setup + localparam L15_TLB_CSM_WIDTH = 33; +`endif + localparam L15_TID_WIDTH = ariane_pkg::MEM_TID_WIDTH; + localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC); + localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC); + localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC); + + // FIFO depths of L15 adapter + localparam ADAPTER_REQ_FIFO_DEPTH = 2; + localparam ADAPTER_RTRN_FIFO_DEPTH = 2; + + + // Calculated parameter + localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8); + localparam ICACHE_NUM_WORDS = 2 ** (ariane_pkg::ICACHE_INDEX_WIDTH - ICACHE_OFFSET_WIDTH); + localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS); // excluding byte offset + + localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8); + localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_OFFSET_WIDTH); + localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS); // excluding byte offset + + localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH / riscv::XLEN; + localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS); + + // write buffer parameterization + localparam DCACHE_WBUF_DEPTH = ariane_pkg::WT_DCACHE_WBUF_DEPTH; + localparam DCACHE_MAX_TX = 2 ** L15_TID_WIDTH; + localparam CACHE_ID_WIDTH = L15_TID_WIDTH; + + + typedef struct packed { + logic [ariane_pkg::DCACHE_TAG_WIDTH+(ariane_pkg::DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES)-1:0] wtag; + riscv::xlen_t data; + logic [ariane_pkg::DCACHE_USER_WIDTH-1:0] user; + logic [(riscv::XLEN/8)-1:0] dirty; // byte is dirty + logic [(riscv::XLEN/8)-1:0] valid; // byte is valid + logic [(riscv::XLEN/8)-1:0] txblock; // byte is part of transaction in-flight + logic checked; // if cache state of this word has been checked + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache + } wbuffer_t; + + // TX status registers are indexed with the transaction ID + // they basically store which bytes from which buffer entry are part + // of that transaction + + typedef struct packed { + logic vld; + logic [(riscv::XLEN/8)-1:0] be; + logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] ptr; + } tx_stat_t; + + // local interfaces between caches and L15 adapter + typedef enum logic [1:0] { + DCACHE_STORE_REQ, + DCACHE_LOAD_REQ, + DCACHE_ATOMIC_REQ, + DCACHE_INT_REQ + } dcache_out_t; + + typedef enum logic [2:0] { + DCACHE_INV_REQ, // no ack from the core required + DCACHE_STORE_ACK, // note: this may contain an invalidation vector, too + DCACHE_LOAD_ACK, + DCACHE_ATOMIC_ACK, + DCACHE_INT_ACK + } dcache_in_t; + + typedef enum logic [0:0] { + ICACHE_INV_REQ, // no ack from the core required + ICACHE_IFILL_ACK + } icache_in_t; + + // icache interface + typedef struct packed { + logic vld; // invalidate only affected way + logic all; // invalidate all ways + logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate + logic [L1I_WAY_WIDTH-1:0] way; // way to invalidate + } icache_inval_t; + + typedef struct packed { + logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace + logic [riscv::PLEN-1:0] paddr; // physical address + logic nc; // noncacheable + logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } icache_req_t; + + typedef struct packed { + icache_in_t rtype; // see definitions above + logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width + logic [ariane_pkg::ICACHE_USER_LINE_WIDTH-1:0] user; // user bits + icache_inval_t inv; // invalidation vector + logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } icache_rtrn_t; + + // dcache interface + typedef struct packed { + logic vld; // invalidate only affected way + logic all; // invalidate all ways + logic [ariane_pkg::DCACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate + logic [L15_WAY_WIDTH-1:0] way; // way to invalidate + } dcache_inval_t; + + typedef struct packed { + dcache_out_t rtype; // see definitions above + logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) + logic [L1D_WAY_WIDTH-1:0] way; // way to replace + logic [riscv::PLEN-1:0] paddr; // physical address + riscv::xlen_t data; // word width of processor (no block stores at the moment) + logic [ariane_pkg::DATA_USER_WIDTH-1:0] user; // user width of processor (no block stores at the moment) + logic nc; // noncacheable + logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + ariane_pkg::amo_t amo_op; // amo opcode + } dcache_req_t; + + typedef struct packed { + dcache_in_t rtype; // see definitions above + logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width + logic [ariane_pkg::DCACHE_USER_LINE_WIDTH-1:0] user; // user bits + dcache_inval_t inv; // invalidation vector + logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } dcache_rtrn_t; + + + // taken from iop.h in openpiton + // to l1.5 (only marked subset is used) + typedef enum logic [4:0] { + L15_LOAD_RQ = 5'b00000, // load request + L15_IMISS_RQ = 5'b10000, // instruction fill request + L15_STORE_RQ = 5'b00001, // store request + L15_ATOMIC_RQ = 5'b00110, // atomic op + //L15_CAS1_RQ = 5'b00010, // compare and swap1 packet (OpenSparc atomics) + //L15_CAS2_RQ = 5'b00011, // compare and swap2 packet (OpenSparc atomics) + //L15_SWAP_RQ = 5'b00110, // swap packet (OpenSparc atomics) + L15_STRLOAD_RQ = 5'b00100, // unused + L15_STRST_RQ = 5'b00101, // unused + L15_STQ_RQ = 5'b00111, // unused + L15_INT_RQ = 5'b01001, // interrupt request + L15_FWD_RQ = 5'b01101, // unused + L15_FWD_RPY = 5'b01110, // unused + L15_RSVD_RQ = 5'b11111 // unused + } l15_reqtypes_t; + + // from l1.5 (only marked subset is used) + typedef enum logic [3:0] { + L15_LOAD_RET = 4'b0000, // load packet + // L15_INV_RET = 4'b0011, // invalidate packet, not unique... + L15_ST_ACK = 4'b0100, // store ack packet + //L15_AT_ACK = 4'b0011, // unused, not unique... + L15_INT_RET = 4'b0111, // interrupt packet + L15_TEST_RET = 4'b0101, // unused + L15_FP_RET = 4'b1000, // unused + L15_IFILL_RET = 4'b0001, // instruction fill packet + L15_EVICT_REQ = 4'b0011, // eviction request + L15_ERR_RET = 4'b1100, // unused + L15_STRLOAD_RET = 4'b0010, // unused + L15_STRST_ACK = 4'b0110, // unused + L15_FWD_RQ_RET = 4'b1010, // unused + L15_FWD_RPY_RET = 4'b1011, // unused + L15_RSVD_RET = 4'b1111, // unused + L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses + } l15_rtrntypes_t; + + + typedef struct packed { + logic l15_val; // valid signal, asserted with request + logic l15_req_ack; // ack for response + l15_reqtypes_t l15_rqtype; // see below for encoding + logic l15_nc; // non-cacheable bit + logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) + logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1 + logic l15_prefetch; // unused in openpiton + logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment + logic l15_blockstore; // unused in openpiton + logic l15_blockinitstore; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace + logic [39:0] l15_address; // physical address + logic [63:0] l15_data; // word to write + logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests) + logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane + logic [3:0] l15_amo_op; // atomic operation type + } l15_req_t; + + typedef struct packed { + logic l15_ack; // ack for request struct + logic l15_header_ack; // ack for request struct + logic l15_val; // valid signal for return struct + l15_rtrntypes_t l15_returntype; // see below for encoding + logic l15_l2miss; // unused in Ariane + logic [1:0] l15_error; // unused in openpiton + logic l15_noncacheable; // non-cacheable bit + logic l15_atomic; // asserted in load return and store ack packets of atomic tx + logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID + logic l15_prefetch; // unused in openpiton + logic l15_f4b; // 4byte instruction fill from I/O space (nc). + logic [63:0] l15_data_0; // used for both caches + logic [63:0] l15_data_1; // used for both caches + logic [63:0] l15_data_2; // currently only used for I$ + logic [63:0] l15_data_3; // currently only used for I$ + logic l15_inval_icache_all_way; // invalidate all ways + logic l15_inval_dcache_all_way; // unused in openpiton + logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline + logic l15_cross_invalidate; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton + logic l15_inval_dcache_inval; // invalidate selected cacheline and way + logic l15_inval_icache_inval; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate + logic l15_blockinitstore; // unused in openpiton + } l15_rtrn_t; + + // swap endianess in a 64bit word + function automatic logic [63:0] swendian64(input logic [63:0] in); + automatic logic [63:0] out; + for (int k = 0; k < 64; k += 8) begin + out[k+:8] = in[63-k-:8]; + end + return out; + endfunction + + function automatic logic [5:0] popcnt64(input logic [63:0] in); + logic [5:0] cnt = 0; + foreach (in[k]) begin + cnt += 6'(in[k]); + end + return cnt; + endfunction : popcnt64 + + function automatic logic [(riscv::XLEN/8)-1:0] to_byte_enable8( + input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); + logic [(riscv::XLEN/8)-1:0] be; + be = '0; + unique case (size) + 2'b00: be[offset] = '1; // byte + 2'b01: be[offset+:2] = '1; // hword + 2'b10: be[offset+:4] = '1; // word + default: be = '1; // dword + endcase // size + return be; + endfunction : to_byte_enable8 + + function automatic logic [(riscv::XLEN/8)-1:0] to_byte_enable4( + input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); + logic [3:0] be; + be = '0; + unique case (size) + 2'b00: be[offset] = '1; // byte + 2'b01: be[offset+:2] = '1; // hword + default: be = '1; // word + endcase // size + return be; + endfunction : to_byte_enable4 + + // openpiton requires the data to be replicated in case of smaller sizes than dwords + function automatic riscv::xlen_t repData64(input riscv::xlen_t data, + input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, + input logic [1:0] size); + riscv::xlen_t out; + unique case (size) + 2'b00: for (int k = 0; k < 8; k++) out[k*8+:8] = data[offset*8+:8]; // byte + 2'b01: for (int k = 0; k < 4; k++) out[k*16+:16] = data[offset*8+:16]; // hword + 2'b10: for (int k = 0; k < 2; k++) out[k*32+:32] = data[offset*8+:32]; // word + default: out = data; // dword + endcase // size + return out; + endfunction : repData64 + + function automatic riscv::xlen_t repData32(input riscv::xlen_t data, + input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, + input logic [1:0] size); + riscv::xlen_t out; + unique case (size) + 2'b00: for (int k = 0; k < 4; k++) out[k*8+:8] = data[offset*8+:8]; // byte + 2'b01: for (int k = 0; k < 2; k++) out[k*16+:16] = data[offset*8+:16]; // hword + default: out = data; // word + endcase // size + return out; + endfunction : repData32 + + // note: this is openpiton specific. cannot transmit unaligned words. + // hence we default to individual bytes in that case, and they have to be transmitted + // one after the other + function automatic logic [1:0] toSize64(input logic [7:0] be); + logic [1:0] size; + unique case (be) + 8'b1111_1111: size = 2'b11; // dword + 8'b0000_1111, 8'b1111_0000: size = 2'b10; // word + 8'b1100_0000, 8'b0011_0000, 8'b0000_1100, 8'b0000_0011: size = 2'b01; // hword + default: size = 2'b00; // individual bytes + endcase // be + return size; + endfunction : toSize64 + + + function automatic logic [1:0] toSize32(input logic [3:0] be); + logic [1:0] size; + unique case (be) + 4'b1111: size = 2'b10; // word + 4'b1100, 4'b0011: size = 2'b01; // hword + default: size = 2'b00; // individual bytes + endcase // be + return size; + endfunction : toSize32 + +endpackage diff --git a/test/type_param/core/instr_realign.sv b/test/type_param/core/instr_realign.sv new file mode 100644 index 00000000..043a1311 --- /dev/null +++ b/test/type_param/core/instr_realign.sv @@ -0,0 +1,361 @@ +// Copyright 2018 - 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +// Description: Instruction Re-aligner +// +// This module takes 32-bit aligned cache blocks and extracts the instructions. +// As we are supporting the compressed instruction set extension in a 32 bit instruction word +// are up to 2 compressed instructions. +// Furthermore those instructions can be arbitrarily interleaved which makes it possible to fetch +// only the lower part of a 32 bit instruction. +// Furthermore we need to handle the case if we want to start fetching from an unaligned +// instruction e.g. a branch. + + +module instr_realign + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic valid_i, + output logic serving_unaligned_o, // we have an unaligned instruction in [0] + input logic [riscv::VLEN-1:0] address_i, + input logic [FETCH_WIDTH-1:0] data_i, + output logic [INSTR_PER_FETCH-1:0] valid_o, + output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o, + output logic [INSTR_PER_FETCH-1:0][31:0] instr_o +); + // as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions + logic [3:0] instr_is_compressed; + + for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin + // LSB != 2'b11 + assign instr_is_compressed[i] = ~&data_i[i*16+:2]; + end + + // save the unaligned part of the instruction to this ff + logic [15:0] unaligned_instr_d, unaligned_instr_q; + // the last instruction was unaligned + logic unaligned_d, unaligned_q; + // register to save the unaligned address + logic [riscv::VLEN-1:0] unaligned_address_d, unaligned_address_q; + // we have an unaligned instruction + assign serving_unaligned_o = unaligned_q; + + // Instruction re-alignment + if (FETCH_WIDTH == 32) begin : realign_bp_32 + always_comb begin : re_align + unaligned_d = unaligned_q; + unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + unaligned_instr_d = data_i[31:16]; + + valid_o[0] = valid_i; + instr_o[0] = (unaligned_q) ? {data_i[15:0], unaligned_instr_q} : data_i[31:0]; + addr_o[0] = (unaligned_q) ? unaligned_address_q : address_i; + + valid_o[1] = 1'b0; + instr_o[1] = '0; + addr_o[1] = {address_i[riscv::VLEN-1:2], 2'b10}; + + // this instruction is compressed or the last instruction was unaligned + if (instr_is_compressed[0] || unaligned_q) begin + // check if this is instruction is still unaligned e.g.: it is not compressed + // if its compressed re-set unaligned flag + // for 32 bit we can simply check the next instruction and whether it is compressed or not + // if it is compressed the next fetch will contain an aligned instruction + // is instruction 1 also compressed + // yes? -> no problem, no -> we've got an unaligned instruction + if (instr_is_compressed[1]) begin + unaligned_d = 1'b0; + valid_o[1] = valid_i; + instr_o[1] = {16'b0, data_i[31:16]}; + end else begin + // save the upper bits for next cycle + unaligned_d = 1'b1; + unaligned_instr_d = data_i[31:16]; + unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + end + end // else -> normal fetch + + // we started to fetch on a unaligned boundary with a whole instruction -> wait until we've + // received the next instruction + if (valid_i && address_i[1]) begin + // the instruction is not compressed so we can't do anything in this cycle + if (!instr_is_compressed[0]) begin + valid_o = '0; + unaligned_d = 1'b1; + unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + unaligned_instr_d = data_i[15:0]; + // the instruction isn't compressed but only the lower is ready + end else begin + valid_o = {{INSTR_PER_FETCH - 1{1'b0}}, 1'b1}; + end + end + end + // TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width + end else if (FETCH_WIDTH == 64) begin : realign_bp_64 + initial begin + $error("Not propperly implemented"); + end + always_comb begin : re_align + unaligned_d = unaligned_q; + unaligned_address_d = unaligned_address_q; + unaligned_instr_d = unaligned_instr_q; + + valid_o = '0; + valid_o[0] = valid_i; + + instr_o[0] = data_i[31:0]; + addr_o[0] = address_i; + + instr_o[1] = '0; + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b010}; + + instr_o[2] = {16'b0, data_i[47:32]}; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b100}; + + instr_o[3] = {16'b0, data_i[63:48]}; + addr_o[3] = {address_i[riscv::VLEN-1:3], 3'b110}; + + // last instruction was unaligned + if (unaligned_q) begin + instr_o[0] = {data_i[15:0], unaligned_instr_q}; + addr_o[0] = unaligned_address_q; + // for 64 bit there exist the following options: + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | I | U | -> again unaligned + // | * | C | I | U | -> aligned + // | * | I | C | U | -> aligned + // | I | C | C | U | -> again unaligned + // | * | C | C | C | U | -> aligned + // Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half + // * = don't care + if (instr_is_compressed[1]) begin + instr_o[1] = {16'b0, data_i[31:16]}; + valid_o[1] = valid_i; + + if (instr_is_compressed[2]) begin + if (instr_is_compressed[3]) begin + unaligned_d = 1'b0; + valid_o[3] = valid_i; + end else begin + // continues to be unaligned + end + end else begin + unaligned_d = 1'b0; + instr_o[2] = data_i[63:32]; + valid_o[2] = valid_i; + end + // instruction 1 is not compressed + end else begin + instr_o[1] = data_i[47:16]; + valid_o[1] = valid_i; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + if (instr_is_compressed[2]) begin + unaligned_d = 1'b0; + instr_o[2] = {16'b0, data_i[63:48]}; + valid_o[2] = valid_i; + end else begin + // continues to be unaligned + end + end + end else if (instr_is_compressed[0]) begin // instruction zero is RVC + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | I | C | -> again unaligned + // | * | C | I | C | -> aligned + // | * | I | C | C | -> aligned + // | I | C | C | C | -> again unaligned + // | * | C | C | C | C | -> aligned + if (instr_is_compressed[1]) begin + instr_o[1] = {16'b0, data_i[31:16]}; + valid_o[1] = valid_i; + + if (instr_is_compressed[2]) begin + valid_o[2] = valid_i; + if (instr_is_compressed[3]) begin + valid_o[3] = valid_i; + end else begin + // this instruction is unaligned + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[3]; + end + end else begin + instr_o[2] = data_i[63:32]; + valid_o[2] = valid_i; + end + // instruction 1 is not compressed -> check slot 3 + end else begin + instr_o[1] = data_i[47:16]; + valid_o[1] = valid_i; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + if (instr_is_compressed[3]) begin + instr_o[2] = data_i[63:48]; + valid_o[2] = valid_i; + end else begin + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[2]; + end + end + + // Full instruction in slot zero + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | C | I | + // | * | C | C | I | + // | * | I | I | + end else begin + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + + if (instr_is_compressed[2]) begin + instr_o[1] = {16'b0, data_i[47:32]}; + valid_o[1] = valid_i; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + if (instr_is_compressed[3]) begin + // | * | C | C | I | + valid_o[2] = valid_i; + addr_o[2] = {16'b0, data_i[63:48]}; + end else begin + // this instruction is unaligned + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[2]; + end + end else begin + // two regular instructions back-to-back + instr_o[1] = data_i[63:32]; + valid_o[1] = valid_i; + end + end + + // -------------------------- + // Unaligned fetch + // -------------------------- + // Address was not 64 bit aligned + case (address_i[2:1]) + // this means the previouse instruction was either compressed or unaligned + // in any case we don't ccare + 2'b01: begin + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | I | x -> again unaligned + // | * | C | I | x -> aligned + // | * | I | C | x -> aligned + // | I | C | C | x -> again unaligned + // | * | C | C | C | x -> aligned + addr_o[0] = {address_i[riscv::VLEN-1:3], 3'b010}; + + if (instr_is_compressed[1]) begin + instr_o[0] = {16'b0, data_i[31:16]}; + valid_o[0] = valid_i; + + if (instr_is_compressed[2]) begin + valid_o[1] = valid_i; + instr_o[1] = {16'b0, data_i[47:32]}; + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + if (instr_is_compressed[3]) begin + instr_o[2] = {16'b0, data_i[63:48]}; + addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + valid_o[2] = valid_i; + end else begin + // this instruction is unaligned + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[3]; + end + end else begin + instr_o[1] = data_i[63:32]; + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + valid_o[1] = valid_i; + end + // instruction 1 is not compressed -> check slot 3 + end else begin + instr_o[0] = data_i[47:16]; + valid_o[0] = valid_i; + addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b110}; + if (instr_is_compressed[3]) begin + instr_o[1] = data_i[63:48]; + valid_o[1] = valid_i; + end else begin + unaligned_d = 1'b1; + unaligned_instr_d = data_i[63:48]; + unaligned_address_d = addr_o[1]; + end + end + end + 2'b10: begin + valid_o = '0; + // 64 32 0 + // | 3 | 2 | 1 | 0 | <- instruction slot + // | I | C | * | <- unaligned + // | C | C | * | <- aligned + // | I | * | <- aligned + if (instr_is_compressed[2]) begin + valid_o[0] = valid_i; + instr_o[0] = data_i[47:32]; + // second instruction is also compressed + if (instr_is_compressed[3]) begin + valid_o[1] = valid_i; + instr_o[1] = data_i[63:48]; + // regular instruction -> unaligned + end else begin + unaligned_d = 1'b1; + unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110}; + unaligned_instr_d = data_i[63:48]; + end + // instruction is a regular instruction + end else begin + valid_o[0] = valid_i; + instr_o[0] = data_i[63:32]; + addr_o[0] = address_i; + end + end + // we started to fetch on a unaligned boundary with a whole instruction -> wait until we've + // received the next instruction + 2'b11: begin + valid_o = '0; + if (!instr_is_compressed[3]) begin + unaligned_d = 1'b1; + unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110}; + unaligned_instr_d = data_i[63:48]; + end else begin + valid_o[3] = valid_i; + end + end + endcase + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + unaligned_q <= 1'b0; + unaligned_address_q <= '0; + unaligned_instr_q <= '0; + end else begin + if (valid_i) begin + unaligned_address_q <= unaligned_address_d; + unaligned_instr_q <= unaligned_instr_d; + end + + if (flush_i) begin + unaligned_q <= 1'b0; + end else if (valid_i) begin + unaligned_q <= unaligned_d; + end + end + end +endmodule diff --git a/test/type_param/core/issue_read_operands.sv b/test/type_param/core/issue_read_operands.sv new file mode 100644 index 00000000..2e324861 --- /dev/null +++ b/test/type_param/core/issue_read_operands.sv @@ -0,0 +1,604 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.04.2017 +// Description: Issues instruction from the scoreboard and fetches the operands +// This also includes all the forwarding logic + + +module issue_read_operands + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rs3_len_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // flush + input logic flush_i, + // stall + input logic stall_i, + // coming from decoder + input scoreboard_entry_t issue_instr_i, + input logic issue_instr_valid_i, + output logic issue_ack_o, + // lookup rd in scoreboard + output logic [REG_ADDR_SIZE-1:0] rs1_o, + input riscv::xlen_t rs1_i, + input logic rs1_valid_i, + output logic [REG_ADDR_SIZE-1:0] rs2_o, + input riscv::xlen_t rs2_i, + input logic rs2_valid_i, + output logic [REG_ADDR_SIZE-1:0] rs3_o, + input rs3_len_t rs3_i, + input logic rs3_valid_i, + // get clobber input + input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_i, + input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_i, + // To FU, just single issue for now + output fu_data_t fu_data_o, + output riscv::xlen_t rs1_forwarding_o, // unregistered version of fu_data_o.operanda + output riscv::xlen_t rs2_forwarding_o, // unregistered version of fu_data_o.operandb + output logic [riscv::VLEN-1:0] pc_o, + output logic is_compressed_instr_o, + // ALU 1 + input logic flu_ready_i, // Fixed latency unit ready to accept a new request + output logic alu_valid_o, // Output is valid + // Branches and Jumps + output logic branch_valid_o, // this is a valid branch instruction + output branchpredict_sbe_t branch_predict_o, + // LSU + input logic lsu_ready_i, // FU is ready + output logic lsu_valid_o, // Output is valid + // MULT + output logic mult_valid_o, // Output is valid + // FPU + input logic fpu_ready_i, // FU is ready + output logic fpu_valid_o, // Output is valid + output logic [1:0] fpu_fmt_o, // FP fmt field from instr. + output logic [2:0] fpu_rm_o, // FP rm field from instr. + // CSR + output logic csr_valid_o, // Output is valid + // CVXIF + output logic cvxif_valid_o, + input logic cvxif_ready_i, + output logic [31:0] cvxif_off_instr_o, + // commit port + input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i, + + output logic stall_issue_o // stall signal, we do not want to fetch any more entries + // committing instruction instruction + // from scoreboard + // input scoreboard_entry commit_instr_i, + // output logic commit_ack_o +); + logic stall; + logic fu_busy; // functional unit is busy + riscv::xlen_t operand_a_regfile, operand_b_regfile; // operands coming from regfile + rs3_len_t + operand_c_regfile, + operand_c_fpr, + operand_c_gpr; // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 + // output flipflop (ID <-> EX) + riscv::xlen_t operand_a_n, operand_a_q, operand_b_n, operand_b_q, imm_n, imm_q, imm_forward_rs3; + + logic alu_valid_q; + logic mult_valid_q; + logic fpu_valid_q; + logic [ 1:0] fpu_fmt_q; + logic [ 2:0] fpu_rm_q; + logic lsu_valid_q; + logic csr_valid_q; + logic branch_valid_q; + logic cvxif_valid_q; + logic [31:0] cvxif_off_instr_q; + + logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; + fu_op operator_n, operator_q; // operation to perform + fu_t fu_n, fu_q; // functional unit to use + + // forwarding signals + logic forward_rs1, forward_rs2, forward_rs3; + + // original instruction stored in tval + riscv::instruction_t orig_instr; + assign orig_instr = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]); + + // ID <-> EX registers + + assign rs1_forwarding_o = operand_a_n[riscv::VLEN-1:0]; //forwarding or unregistered rs1 value + assign rs2_forwarding_o = operand_b_n[riscv::VLEN-1:0]; //forwarding or unregistered rs2 value + + assign fu_data_o.operand_a = operand_a_q; + assign fu_data_o.operand_b = operand_b_q; + assign fu_data_o.fu = fu_q; + assign fu_data_o.operation = operator_q; + assign fu_data_o.trans_id = trans_id_q; + assign fu_data_o.imm = imm_q; + assign alu_valid_o = alu_valid_q; + assign branch_valid_o = branch_valid_q; + assign lsu_valid_o = lsu_valid_q; + assign csr_valid_o = csr_valid_q; + assign mult_valid_o = mult_valid_q; + assign fpu_valid_o = fpu_valid_q; + assign fpu_fmt_o = fpu_fmt_q; + assign fpu_rm_o = fpu_rm_q; + assign cvxif_valid_o = CVA6Cfg.CvxifEn ? cvxif_valid_q : '0; + assign cvxif_off_instr_o = CVA6Cfg.CvxifEn ? cvxif_off_instr_q : '0; + assign stall_issue_o = stall; + // --------------- + // Issue Stage + // --------------- + + // select the right busy signal + // this obviously depends on the functional unit we need + always_comb begin : unit_busy + unique case (issue_instr_i.fu) + NONE: fu_busy = 1'b0; + ALU, CTRL_FLOW, CSR, MULT: fu_busy = ~flu_ready_i; + LOAD, STORE: fu_busy = ~lsu_ready_i; + CVXIF: fu_busy = ~cvxif_ready_i; + default: begin + if (CVA6Cfg.FpPresent && (issue_instr_i.fu == FPU || issue_instr_i.fu == FPU_VEC)) begin + fu_busy = ~fpu_ready_i; + end else begin + fu_busy = 1'b0; + end + end + endcase + end + + // --------------- + // Register stage + // --------------- + // check that all operands are available, otherwise stall + // forward corresponding register + always_comb begin : operands_available + stall = stall_i; + // operand forwarding signals + forward_rs1 = 1'b0; + forward_rs2 = 1'b0; + forward_rs3 = 1'b0; // FPR only + // poll the scoreboard for those values + rs1_o = issue_instr_i.rs1; + rs2_o = issue_instr_i.rs2; + rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field + + // 0. check that we are not using the zimm type in RS1 + // as this is an immediate we do not have to wait on anything here + // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr) + // 2. poll the scoreboard + if (!issue_instr_i.use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i.op + )) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE : + rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin + // check if the clobbering instruction is not a CSR instruction, CSR instructions can only + // be fetched through the register file since they can't be forwarded + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs1_valid_i && (CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i.op + ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs1] != CSR) || + (CVA6Cfg.RVS && issue_instr_i.op == SFENCE_VMA)))) begin + forward_rs1 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end + + if ((CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i.op + )) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE : + rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs2_valid_i && (CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i.op + ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs2] != CSR) || + (CVA6Cfg.RVS && issue_instr_i.op == SFENCE_VMA)))) begin + forward_rs2 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end + + // Only check clobbered gpr for OFFLOADED instruction + if ((CVA6Cfg.FpPresent && is_imm_fpr( + issue_instr_i.op + )) ? rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE : + issue_instr_i.op == OFFLOAD && CVA6Cfg.NrRgprPorts == 3 ? + rd_clobber_gpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE : 0) begin + // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check + if (rs3_valid_i) begin + forward_rs3 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end + end + + // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_rs3 + assign imm_forward_rs3 = rs3_i; + end else begin : gen_fp_rs3 + assign imm_forward_rs3 = {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, rs3_i}; + end + + // Forwarding/Output MUX + always_comb begin : forwarding_operand_select + // default is regfiles (gpr or fpr) + operand_a_n = operand_a_regfile; + operand_b_n = operand_b_regfile; + // immediates are the third operands in the store case + // for FP operations, the imm field can also be the third operand from the regfile + if (CVA6Cfg.NrRgprPorts == 3) begin + imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? + {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : + issue_instr_i.op == OFFLOAD ? operand_c_regfile : issue_instr_i.result; + end else begin + imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? + {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : issue_instr_i.result; + end + trans_id_n = issue_instr_i.trans_id; + fu_n = issue_instr_i.fu; + operator_n = issue_instr_i.op; + // or should we forward + if (forward_rs1) begin + operand_a_n = rs1_i; + end + + if (forward_rs2) begin + operand_b_n = rs2_i; + end + + if (CVA6Cfg.FpPresent && forward_rs3) begin + imm_n = imm_forward_rs3; + end + + // use the PC as operand a + if (issue_instr_i.use_pc) begin + operand_a_n = { + {riscv::XLEN - riscv::VLEN{issue_instr_i.pc[riscv::VLEN-1]}}, issue_instr_i.pc + }; + end + + // use the zimm as operand a + if (issue_instr_i.use_zimm) begin + // zero extend operand a + operand_a_n = {{riscv::XLEN - 5{1'b0}}, issue_instr_i.rs1[4:0]}; + end + // or is it an immediate (including PC), this is not the case for a store, control flow, and accelerator instructions + // also make sure operand B is not already used as an FP operand + if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && (issue_instr_i.fu != ACCEL) && !(CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i.op + ))) begin + operand_b_n = issue_instr_i.result; + end + end + + // FU select, assert the correct valid out signal (in the next cycle) + // This needs to be like this to make verilator happy. I know its ugly. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + alu_valid_q <= 1'b0; + lsu_valid_q <= 1'b0; + mult_valid_q <= 1'b0; + fpu_valid_q <= 1'b0; + fpu_fmt_q <= 2'b0; + fpu_rm_q <= 3'b0; + csr_valid_q <= 1'b0; + branch_valid_q <= 1'b0; + end else begin + alu_valid_q <= 1'b0; + lsu_valid_q <= 1'b0; + mult_valid_q <= 1'b0; + fpu_valid_q <= 1'b0; + fpu_fmt_q <= 2'b0; + fpu_rm_q <= 3'b0; + csr_valid_q <= 1'b0; + branch_valid_q <= 1'b0; + // Exception pass through: + // If an exception has occurred simply pass it through + // we do not want to issue this instruction + if (!issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin + case (issue_instr_i.fu) + ALU: begin + alu_valid_q <= 1'b1; + end + CTRL_FLOW: begin + branch_valid_q <= 1'b1; + end + MULT: begin + mult_valid_q <= 1'b1; + end + LOAD, STORE: begin + lsu_valid_q <= 1'b1; + end + CSR: begin + csr_valid_q <= 1'b1; + end + default: begin + if (issue_instr_i.fu == FPU && CVA6Cfg.FpPresent) begin + fpu_valid_q <= 1'b1; + fpu_fmt_q <= orig_instr.rftype.fmt; // fmt bits from instruction + fpu_rm_q <= orig_instr.rftype.rm; // rm bits from instruction + end else if (issue_instr_i.fu == FPU_VEC && CVA6Cfg.FpPresent) begin + fpu_valid_q <= 1'b1; + fpu_fmt_q <= orig_instr.rvftype.vfmt; // vfmt bits from instruction + fpu_rm_q <= {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction + end + end + endcase + end + // if we got a flush request, de-assert the valid flag, otherwise we will start this + // functional unit with the wrong inputs + if (flush_i) begin + alu_valid_q <= 1'b0; + lsu_valid_q <= 1'b0; + mult_valid_q <= 1'b0; + fpu_valid_q <= 1'b0; + csr_valid_q <= 1'b0; + branch_valid_q <= 1'b0; + end + end + end + + if (CVA6Cfg.CvxifEn) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + cvxif_valid_q <= 1'b0; + cvxif_off_instr_q <= 32'b0; + end else begin + cvxif_valid_q <= 1'b0; + cvxif_off_instr_q <= 32'b0; + if (!issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin + case (issue_instr_i.fu) + CVXIF: begin + cvxif_valid_q <= 1'b1; + cvxif_off_instr_q <= orig_instr; + end + default: ; + endcase + end + if (flush_i) begin + cvxif_valid_q <= 1'b0; + cvxif_off_instr_q <= 32'b0; + end + end + end + end + + // We can issue an instruction if we do not detect that any other instruction is writing the same + // destination register. + // We also need to check if there is an unresolved branch in the scoreboard. + always_comb begin : issue_scoreboard + // default assignment + issue_ack_o = 1'b0; + // check that we didn't stall, that the instruction we got is valid + // and that the functional unit we need is not busy + if (issue_instr_valid_i) begin + // check that the corresponding functional unit is not busy + if (!stall && !fu_busy) begin + // ----------------------------------------- + // WAW - Write After Write Dependency Check + // ----------------------------------------- + // no other instruction has the same destination register -> issue the instruction + if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + issue_instr_i.op + )) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE) : + (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin + issue_ack_o = 1'b1; + end + // or check that the target destination register will be written in this cycle by the + // commit stage + for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) + if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + issue_instr_i.op + )) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd[4:0]) : + (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd[4:0])) begin + issue_ack_o = 1'b1; + end + + end + // we can also issue the instruction under the following two circumstances: + // we can do this even if we are stalled or no functional unit is ready (as we don't need one) + // the decoder needs to make sure that the instruction is marked as valid when it does not + // need any functional unit or if an exception occurred previous to the execute stage. + // 1. we already got an exception + if (issue_instr_i.ex.valid) begin + issue_ack_o = 1'b1; + end + // 2. it is an instruction which does not need any functional unit + if (issue_instr_i.fu == NONE) begin + issue_ack_o = 1'b1; + end + end + // after a multiplication was issued we can only issue another multiplication + // otherwise we will get contentions on the fixed latency bus + if (mult_valid_q && issue_instr_i.fu inside {ALU, CTRL_FLOW, CSR}) begin + issue_ack_o = 1'b0; + end + end + + // ---------------------- + // Integer Register File + // ---------------------- + logic [ CVA6Cfg.NrRgprPorts-1:0][riscv::XLEN-1:0] rdata; + logic [ CVA6Cfg.NrRgprPorts-1:0][ 4:0] raddr_pack; + + // pack signals + logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_pack; + logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_pack; + logic [CVA6Cfg.NrCommitPorts-1:0] we_pack; + + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_rs3 + assign raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + end else begin : gen_no_rs3 + assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + end + + for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_write_back_port + assign waddr_pack[i] = waddr_i[i]; + assign wdata_pack[i] = wdata_i[i]; + assign we_pack[i] = we_gpr_i[i]; + end + if (ariane_pkg::FPGA_EN) begin : gen_fpga_regfile + ariane_regfile_fpga #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (riscv::XLEN), + .NR_READ_PORTS(CVA6Cfg.NrRgprPorts), + .ZERO_REG_ZERO(1) + ) i_ariane_regfile_fpga ( + .test_en_i(1'b0), + .raddr_i (raddr_pack), + .rdata_o (rdata), + .waddr_i (waddr_pack), + .wdata_i (wdata_pack), + .we_i (we_pack), + .* + ); + end else begin : gen_asic_regfile + ariane_regfile #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (riscv::XLEN), + .NR_READ_PORTS(CVA6Cfg.NrRgprPorts), + .ZERO_REG_ZERO(1) + ) i_ariane_regfile ( + .test_en_i(1'b0), + .raddr_i (raddr_pack), + .rdata_o (rdata), + .waddr_i (waddr_pack), + .wdata_i (wdata_pack), + .we_i (we_pack), + .* + ); + end + + // ----------------------------- + // Floating-Point Register File + // ----------------------------- + logic [2:0][CVA6Cfg.FLen-1:0] fprdata; + + // pack signals + logic [2:0][4:0] fp_raddr_pack; + logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] fp_wdata_pack; + + generate + if (CVA6Cfg.FpPresent) begin : float_regfile_gen + assign fp_raddr_pack = { + issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0] + }; + for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_fp_wdata_pack + assign fp_wdata_pack[i] = {wdata_i[i][CVA6Cfg.FLen-1:0]}; + end + if (ariane_pkg::FPGA_EN) begin : gen_fpga_fp_regfile + ariane_regfile_fpga #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (CVA6Cfg.FLen), + .NR_READ_PORTS(3), + .ZERO_REG_ZERO(0) + ) i_ariane_fp_regfile_fpga ( + .test_en_i(1'b0), + .raddr_i (fp_raddr_pack), + .rdata_o (fprdata), + .waddr_i (waddr_pack), + .wdata_i (fp_wdata_pack), + .we_i (we_fpr_i), + .* + ); + end else begin : gen_asic_fp_regfile + ariane_regfile #( + .CVA6Cfg (CVA6Cfg), + .DATA_WIDTH (CVA6Cfg.FLen), + .NR_READ_PORTS(3), + .ZERO_REG_ZERO(0) + ) i_ariane_fp_regfile ( + .test_en_i(1'b0), + .raddr_i (fp_raddr_pack), + .rdata_o (fprdata), + .waddr_i (waddr_pack), + .wdata_i (fp_wdata_pack), + .we_i (we_fpr_i), + .* + ); + end + end else begin : no_fpr_gen + assign fprdata = '{default: '0}; + end + endgenerate + + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_operand_c + assign operand_c_fpr = {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[2]}; + assign operand_c_gpr = rdata[2]; + end else begin + assign operand_c_fpr = fprdata[2]; + end + + assign operand_a_regfile = (CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i.op + )) ? {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[0]} : rdata[0]; + assign operand_b_regfile = (CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i.op + )) ? {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[1]} : rdata[1]; + assign operand_c_regfile = (CVA6Cfg.NrRgprPorts == 3) ? ((CVA6Cfg.FpPresent && is_imm_fpr( + issue_instr_i.op + )) ? operand_c_fpr : operand_c_gpr) : operand_c_fpr; + + + // ---------------------- + // Registers (ID <-> EX) + // ---------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + operand_a_q <= '{default: 0}; + operand_b_q <= '{default: 0}; + imm_q <= '0; + fu_q <= NONE; + operator_q <= ADD; + trans_id_q <= '0; + pc_o <= '0; + is_compressed_instr_o <= 1'b0; + branch_predict_o <= {cf_t'(0), {riscv::VLEN{1'b0}}}; + end else begin + operand_a_q <= operand_a_n; + operand_b_q <= operand_b_n; + imm_q <= imm_n; + fu_q <= fu_n; + operator_q <= operator_n; + trans_id_q <= trans_id_n; + pc_o <= issue_instr_i.pc; + is_compressed_instr_o <= issue_instr_i.is_compressed; + branch_predict_o <= issue_instr_i.bp; + end + end + + //pragma translate_off + initial begin + assert (CVA6Cfg.NrRgprPorts == 2 || (CVA6Cfg.NrRgprPorts == 3 && CVA6Cfg.CvxifEn)) + else + $fatal( + 1, + "If CVXIF is enable, ariane regfile can have either 2 or 3 read ports. Else it has 2 read ports." + ); + end + + assert property (@(posedge clk_i) (branch_valid_q) |-> (!$isunknown( + operand_a_q + ) && !$isunknown( + operand_b_q + ))) + else $warning("Got unknown value in one of the operands"); + + //pragma translate_on +endmodule + + diff --git a/test/type_param/core/issue_stage.sv b/test/type_param/core/issue_stage.sv new file mode 100644 index 00000000..64b8cb59 --- /dev/null +++ b/test/type_param/core/issue_stage.sv @@ -0,0 +1,199 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 21.05.2017 +// Description: Issue stage dispatches instructions to the FUs and keeps track of them +// in a scoreboard like data-structure. + + +module issue_stage + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + output logic sb_full_o, + input logic flush_unissued_instr_i, + input logic flush_i, + input logic stall_i, // Stall issue stage + // from ISSUE + input scoreboard_entry_t decoded_instr_i, + input logic decoded_instr_valid_i, + input logic is_ctrl_flow_i, + output logic decoded_instr_ack_o, + // to EX + output [riscv::VLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda + output [riscv::VLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb + output fu_data_t fu_data_o, + output logic [riscv::VLEN-1:0] pc_o, + output logic is_compressed_instr_o, + input logic flu_ready_i, + output logic alu_valid_o, + // ex just resolved our predicted branch, we are ready to accept new requests + input logic resolve_branch_i, + + input logic lsu_ready_i, + output logic lsu_valid_o, + // branch prediction + output logic branch_valid_o, // use branch prediction unit + output branchpredict_sbe_t branch_predict_o, // Branch predict Out + + output logic mult_valid_o, + + input logic fpu_ready_i, + output logic fpu_valid_o, + output logic [1:0] fpu_fmt_o, // FP fmt field from instr. + output logic [2:0] fpu_rm_o, // FP rm field from instr. + + output logic csr_valid_o, + + // CVXIF + //Issue interface + output logic x_issue_valid_o, + input logic x_issue_ready_i, + output logic [31:0] x_off_instr_o, + + // to accelerator dispatcher + output scoreboard_entry_t issue_instr_o, + output logic issue_instr_hs_o, + + // write back port + input logic [CVA6Cfg.NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_i, + input bp_resolve_t resolved_branch_i, + input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, + input exception_t [CVA6Cfg.NrWbPorts-1:0] ex_ex_i, // exception from execute stage or CVXIF offloaded instruction + input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, + input logic x_we_i, + + // commit port + input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i, + input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i, + + output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + + output logic stall_issue_o, // Used in Performance Counters + + //RVFI + output logic [TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, + output logic [CVA6Cfg.NrCommitPorts-1:0][TRANS_ID_BITS-1:0] rvfi_commit_pointer_o +); + // --------------------------------------------------- + // Scoreboard (SB) <-> Issue and Read Operands (IRO) + // --------------------------------------------------- + typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? riscv::XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t; + + fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro; + fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro; + + logic [ REG_ADDR_SIZE-1:0] rs1_iro_sb; + riscv::xlen_t rs1_sb_iro; + logic rs1_valid_sb_iro; + + logic [ REG_ADDR_SIZE-1:0] rs2_iro_sb; + riscv::xlen_t rs2_sb_iro; + logic rs2_valid_iro_sb; + + logic [ REG_ADDR_SIZE-1:0] rs3_iro_sb; + rs3_len_t rs3_sb_iro; + logic rs3_valid_iro_sb; + + scoreboard_entry_t issue_instr_sb_iro; + logic issue_instr_valid_sb_iro; + logic issue_ack_iro_sb; + + riscv::xlen_t rs1_forwarding_xlen; + riscv::xlen_t rs2_forwarding_xlen; + + assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0]; + assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0]; + + assign issue_instr_o = issue_instr_sb_iro; + assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb; + + + // --------------------------------------------------------- + // 2. Manage instructions in a scoreboard + // --------------------------------------------------------- + scoreboard #( + .CVA6Cfg (CVA6Cfg), + .rs3_len_t(rs3_len_t) + ) i_scoreboard ( + .sb_full_o (sb_full_o), + .unresolved_branch_i(1'b0), + .rd_clobber_gpr_o (rd_clobber_gpr_sb_iro), + .rd_clobber_fpr_o (rd_clobber_fpr_sb_iro), + .rs1_i (rs1_iro_sb), + .rs1_o (rs1_sb_iro), + .rs1_valid_o (rs1_valid_sb_iro), + .rs2_i (rs2_iro_sb), + .rs2_o (rs2_sb_iro), + .rs2_valid_o (rs2_valid_iro_sb), + .rs3_i (rs3_iro_sb), + .rs3_o (rs3_sb_iro), + .rs3_valid_o (rs3_valid_iro_sb), + + .decoded_instr_i (decoded_instr_i), + .decoded_instr_valid_i(decoded_instr_valid_i), + .decoded_instr_ack_o (decoded_instr_ack_o), + .issue_instr_o (issue_instr_sb_iro), + .issue_instr_valid_o (issue_instr_valid_sb_iro), + .issue_ack_i (issue_ack_iro_sb), + + .resolved_branch_i(resolved_branch_i), + .trans_id_i (trans_id_i), + .wbdata_i (wbdata_i), + .ex_i (ex_ex_i), + .* + ); + + // --------------------------------------------------------- + // 3. Issue instruction and read operand, also commit + // --------------------------------------------------------- + issue_read_operands #( + .CVA6Cfg (CVA6Cfg), + .rs3_len_t(rs3_len_t) + ) i_issue_read_operands ( + .flush_i (flush_unissued_instr_i), + .issue_instr_i (issue_instr_sb_iro), + .issue_instr_valid_i(issue_instr_valid_sb_iro), + .issue_ack_o (issue_ack_iro_sb), + .fu_data_o (fu_data_o), + .flu_ready_i (flu_ready_i), + .rs1_o (rs1_iro_sb), + .rs1_i (rs1_sb_iro), + .rs1_valid_i (rs1_valid_sb_iro), + .rs2_o (rs2_iro_sb), + .rs2_i (rs2_sb_iro), + .rs2_valid_i (rs2_valid_iro_sb), + .rs3_o (rs3_iro_sb), + .rs3_i (rs3_sb_iro), + .rs3_valid_i (rs3_valid_iro_sb), + .rd_clobber_gpr_i (rd_clobber_gpr_sb_iro), + .rd_clobber_fpr_i (rd_clobber_fpr_sb_iro), + .alu_valid_o (alu_valid_o), + .branch_valid_o (branch_valid_o), + .csr_valid_o (csr_valid_o), + .cvxif_valid_o (x_issue_valid_o), + .cvxif_ready_i (x_issue_ready_i), + .cvxif_off_instr_o (x_off_instr_o), + .mult_valid_o (mult_valid_o), + .rs1_forwarding_o (rs1_forwarding_xlen), + .rs2_forwarding_o (rs2_forwarding_xlen), + .stall_issue_o (stall_issue_o), + .* + ); + +endmodule diff --git a/test/type_param/core/load_store_unit.sv b/test/type_param/core/load_store_unit.sv new file mode 100644 index 00000000..14a281f4 --- /dev/null +++ b/test/type_param/core/load_store_unit.sv @@ -0,0 +1,493 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.04.2017 +// Description: Load Store Unit, handles address calculation and memory interface signals + + +module load_store_unit + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic stall_st_pending_i, + output logic no_st_pending_o, + input logic amo_valid_commit_i, + + input fu_data_t fu_data_i, + output logic lsu_ready_o, // FU is ready e.g. not busy + input logic lsu_valid_i, // Input is valid + + output logic [TRANS_ID_BITS-1:0] load_trans_id_o, // ID of scoreboard entry at which to write back + output riscv::xlen_t load_result_o, + output logic load_valid_o, + output exception_t load_exception_o, // to WB, signal exception status LD exception + + output logic [TRANS_ID_BITS-1:0] store_trans_id_o, // ID of scoreboard entry at which to write back + output riscv::xlen_t store_result_o, + output logic store_valid_o, + output exception_t store_exception_o, // to WB, signal exception status ST exception + + input logic commit_i, // commit the pending store + output logic commit_ready_o, // commit queue is ready to accept another commit request + input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + + input logic enable_translation_i, // enable virtual memory translation + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + + // icache translation requests + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + + input riscv::priv_lvl_t priv_lvl_i, // From CSR register file + input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file + input logic sum_i, // From CSR register file + input logic mxr_i, // From CSR register file + input logic [riscv::PPNW-1:0] satp_ppn_i, // From CSR register file + input logic [ ASID_WIDTH-1:0] asid_i, // From CSR register file + input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic flush_tlb_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + + // interface to dcache + input dcache_req_o_t [ 2:0] dcache_req_ports_i, + output dcache_req_i_t [ 2:0] dcache_req_ports_o, + input logic dcache_wbuffer_empty_i, + input logic dcache_wbuffer_not_ni_i, + // AMO interface + output amo_req_t amo_req_o, + input amo_resp_t amo_resp_i, + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + + //RVFI + output lsu_ctrl_t rvfi_lsu_ctrl_o, + output [riscv::PLEN-1:0] rvfi_mem_paddr_o +); + // data is misaligned + logic data_misaligned; + // -------------------------------------- + // 1st register stage - (stall registers) + // -------------------------------------- + // those are the signals which are always correct + // e.g.: they keep the value in the stall case + lsu_ctrl_t lsu_ctrl; + + logic pop_st; + logic pop_ld; + + // ------------------------------ + // Address Generation Unit (AGU) + // ------------------------------ + // virtual address as calculated by the AGU in the first cycle + logic [ riscv::VLEN-1:0] vaddr_i; + riscv::xlen_t vaddr_xlen; + logic overflow; + logic [(riscv::XLEN/8)-1:0] be_i; + + assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a)); + assign vaddr_i = vaddr_xlen[riscv::VLEN-1:0]; + // we work with SV39 or SV32, so if VM is enabled, check that all bits [XLEN-1:38] or [XLEN-1:31] are equal + assign overflow = (riscv::IS_XLEN64 && (!((&vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b1 || (|vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b0))); + + logic st_valid_i; + logic ld_valid_i; + logic ld_translation_req; + logic st_translation_req; + logic [riscv::VLEN-1:0] ld_vaddr; + logic [riscv::VLEN-1:0] st_vaddr; + logic translation_req; + logic translation_valid; + logic [riscv::VLEN-1:0] mmu_vaddr; + logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen; + exception_t mmu_exception; + logic dtlb_hit; + logic [ riscv::PPNW-1:0] dtlb_ppn; + + logic ld_valid; + logic [TRANS_ID_BITS-1:0] ld_trans_id; + riscv::xlen_t ld_result; + logic st_valid; + logic [TRANS_ID_BITS-1:0] st_trans_id; + riscv::xlen_t st_result; + + logic [ 11:0] page_offset; + logic page_offset_matches; + + exception_t misaligned_exception; + exception_t ld_ex; + exception_t st_ex; + + // ------------------- + // MMU e.g.: TLBs/PTW + // ------------------- + if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39 + mmu #( + .CVA6Cfg (CVA6Cfg), + .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES), + .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_cva6_mmu ( + // misaligned bypass + .misaligned_ex_i(misaligned_exception), + .lsu_is_store_i (st_translation_req), + .lsu_req_i (translation_req), + .lsu_vaddr_i (mmu_vaddr), + .lsu_valid_o (translation_valid), + .lsu_paddr_o (mmu_paddr), + .lsu_exception_o(mmu_exception), + .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request + .lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request + // connecting PTW to D$ IF + .req_port_i (dcache_req_ports_i[0]), + .req_port_o (dcache_req_ports_o[0]), + // icache address translation requests + .icache_areq_i (icache_areq_i), + .asid_to_be_flushed_i, + .vaddr_to_be_flushed_i, + .icache_areq_o (icache_areq_o), + .pmpcfg_i, + .pmpaddr_i, + .* + ); + end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32 + cva6_mmu_sv32 #( + .CVA6Cfg (CVA6Cfg), + .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES), + .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_cva6_mmu ( + // misaligned bypass + .misaligned_ex_i(misaligned_exception), + .lsu_is_store_i (st_translation_req), + .lsu_req_i (translation_req), + .lsu_vaddr_i (mmu_vaddr), + .lsu_valid_o (translation_valid), + .lsu_paddr_o (mmu_paddr), + .lsu_exception_o(mmu_exception), + .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request + .lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request + // connecting PTW to D$ IF + .req_port_i (dcache_req_ports_i[0]), + .req_port_o (dcache_req_ports_o[0]), + // icache address translation requests + .icache_areq_i (icache_areq_i), + .asid_to_be_flushed_i, + .vaddr_to_be_flushed_i, + .icache_areq_o (icache_areq_o), + .pmpcfg_i, + .pmpaddr_i, + .* + ); + end else begin : gen_no_mmu + + if (riscv::VLEN > riscv::PLEN) begin + assign mmu_vaddr_plen = mmu_vaddr[riscv::PLEN-1:0]; + assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; + end else begin + assign mmu_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, mmu_vaddr}; + assign fetch_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, icache_areq_i.fetch_vaddr}; + end + + assign icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + assign icache_areq_o.fetch_paddr = fetch_vaddr_plen; + assign icache_areq_o.fetch_exception = '0; + + assign dcache_req_ports_o[0].address_index = '0; + assign dcache_req_ports_o[0].address_tag = '0; + assign dcache_req_ports_o[0].data_wdata = '0; + assign dcache_req_ports_o[0].data_req = 1'b0; + assign dcache_req_ports_o[0].data_be = '1; + assign dcache_req_ports_o[0].data_size = 2'b11; + assign dcache_req_ports_o[0].data_we = 1'b0; + assign dcache_req_ports_o[0].kill_req = '0; + assign dcache_req_ports_o[0].tag_valid = 1'b0; + + assign itlb_miss_o = 1'b0; + assign dtlb_miss_o = 1'b0; + assign dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12]; + assign dtlb_hit = 1'b1; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mmu_paddr <= '0; + translation_valid <= '0; + mmu_exception <= '0; + end else begin + mmu_paddr <= mmu_vaddr_plen; + translation_valid <= translation_req; + mmu_exception <= misaligned_exception; + end + end + end + + + logic store_buffer_empty; + // ------------------ + // Store Unit + // ------------------ + store_unit #( + .CVA6Cfg(CVA6Cfg) + ) i_store_unit ( + .clk_i, + .rst_ni, + .flush_i, + .stall_st_pending_i, + .no_st_pending_o, + .store_buffer_empty_o(store_buffer_empty), + + .valid_i (st_valid_i), + .lsu_ctrl_i(lsu_ctrl), + .pop_st_o (pop_st), + .commit_i, + .commit_ready_o, + .amo_valid_commit_i, + + .valid_o (st_valid), + .trans_id_o (st_trans_id), + .result_o (st_result), + .ex_o (st_ex), + // MMU port + .translation_req_o (st_translation_req), + .vaddr_o (st_vaddr), + .rvfi_mem_paddr_o (rvfi_mem_paddr_o), + .paddr_i (mmu_paddr), + .ex_i (mmu_exception), + .dtlb_hit_i (dtlb_hit), + // Load Unit + .page_offset_i (page_offset), + .page_offset_matches_o(page_offset_matches), + // AMOs + .amo_req_o, + .amo_resp_i, + // to memory arbiter + .req_port_i (dcache_req_ports_i[2]), + .req_port_o (dcache_req_ports_o[2]) + ); + + // ------------------ + // Load Unit + // ------------------ + load_unit #( + .CVA6Cfg(CVA6Cfg) + ) i_load_unit ( + .valid_i (ld_valid_i), + .lsu_ctrl_i(lsu_ctrl), + .pop_ld_o (pop_ld), + + .valid_o (ld_valid), + .trans_id_o (ld_trans_id), + .result_o (ld_result), + .ex_o (ld_ex), + // MMU port + .translation_req_o (ld_translation_req), + .vaddr_o (ld_vaddr), + .paddr_i (mmu_paddr), + .ex_i (mmu_exception), + .dtlb_hit_i (dtlb_hit), + .dtlb_ppn_i (dtlb_ppn), + // to store unit + .page_offset_o (page_offset), + .page_offset_matches_i(page_offset_matches), + .store_buffer_empty_i (store_buffer_empty), + // to memory arbiter + .req_port_i (dcache_req_ports_i[1]), + .req_port_o (dcache_req_ports_o[1]), + .dcache_wbuffer_not_ni_i, + .commit_tran_id_i, + .* + ); + + // ---------------------------- + // Output Pipeline Register + // ---------------------------- + + // amount of pipeline registers inserted for load/store return path + // can be tuned to trade-off IPC vs. cycle time + + shift_reg #( + .dtype(logic [$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1:0]), + .Depth(cva6_config_pkg::CVA6ConfigNrLoadPipeRegs) + ) i_pipe_reg_load ( + .clk_i, + .rst_ni, + .d_i({ld_valid, ld_trans_id, ld_result, ld_ex}), + .d_o({load_valid_o, load_trans_id_o, load_result_o, load_exception_o}) + ); + + shift_reg #( + .dtype(logic [$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1:0]), + .Depth(cva6_config_pkg::CVA6ConfigNrStorePipeRegs) + ) i_pipe_reg_store ( + .clk_i, + .rst_ni, + .d_i({st_valid, st_trans_id, st_result, st_ex}), + .d_o({store_valid_o, store_trans_id_o, store_result_o, store_exception_o}) + ); + + // determine whether this is a load or store + always_comb begin : which_op + + ld_valid_i = 1'b0; + st_valid_i = 1'b0; + + translation_req = 1'b0; + mmu_vaddr = {riscv::VLEN{1'b0}}; + + // check the operation to activate the right functional unit accordingly + unique case (lsu_ctrl.fu) + // all loads go here + LOAD: begin + ld_valid_i = lsu_ctrl.valid; + translation_req = ld_translation_req; + mmu_vaddr = ld_vaddr; + end + // all stores go here + STORE: begin + st_valid_i = lsu_ctrl.valid; + translation_req = st_translation_req; + mmu_vaddr = st_vaddr; + end + // not relevant for the LSU + default: ; + endcase + end + + + // --------------- + // Byte Enable + // --------------- + // we can generate the byte enable from the virtual address since the last + // 12 bit are the same anyway + // and we can always generate the byte enable from the address at hand + + if (riscv::IS_XLEN64) begin : gen_8b_be + assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operation)); + end else begin : gen_4b_be + assign be_i = be_gen_32(vaddr_i[1:0], extract_transfer_size(fu_data_i.operation)); + end + + // ------------------------ + // Misaligned Exception + // ------------------------ + // we can detect a misaligned exception immediately + // the misaligned exception is passed to the functional unit via the MMU, which in case + // can augment the exception if other memory related exceptions like a page fault or access errors + always_comb begin : data_misaligned_detection + + misaligned_exception = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0}; + + data_misaligned = 1'b0; + + if (lsu_ctrl.valid) begin + case (lsu_ctrl.operation) + // double word + LD, SD, FLD, FSD, + AMO_LRD, AMO_SCD, + AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, + AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, + AMO_MINDU: begin + if (riscv::IS_XLEN64 && lsu_ctrl.vaddr[2:0] != 3'b000) begin + data_misaligned = 1'b1; + end + end + // word + LW, LWU, SW, FLW, FSW, + AMO_LRW, AMO_SCW, + AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, + AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, + AMO_MINWU: begin + if (lsu_ctrl.vaddr[1:0] != 2'b00) begin + data_misaligned = 1'b1; + end + end + // half word + LH, LHU, SH, FLH, FSH: begin + if (lsu_ctrl.vaddr[0] != 1'b0) begin + data_misaligned = 1'b1; + end + end + // byte -> is always aligned + default: ; + endcase + end + + if (data_misaligned) begin + + if (lsu_ctrl.fu == LOAD) begin + misaligned_exception = { + riscv::LD_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + }; + + end else if (lsu_ctrl.fu == STORE) begin + misaligned_exception = { + riscv::ST_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + }; + end + end + + if (ariane_pkg::MMU_PRESENT && en_ld_st_translation_i && lsu_ctrl.overflow) begin + + if (lsu_ctrl.fu == LOAD) begin + misaligned_exception = { + riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + }; + + end else if (lsu_ctrl.fu == STORE) begin + misaligned_exception = { + riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + }; + end + end + end + + // ------------------ + // LSU Control + // ------------------ + // new data arrives here + lsu_ctrl_t lsu_req_i; + + assign lsu_req_i = { + lsu_valid_i, + vaddr_i, + overflow, + fu_data_i.operand_b, + be_i, + fu_data_i.fu, + fu_data_i.operation, + fu_data_i.trans_id + }; + + lsu_bypass #( + .CVA6Cfg(CVA6Cfg) + ) lsu_bypass_i ( + .lsu_req_i (lsu_req_i), + .lsu_req_valid_i(lsu_valid_i), + .pop_ld_i (pop_ld), + .pop_st_i (pop_st), + + .lsu_ctrl_o(lsu_ctrl), + .ready_o (lsu_ready_o), + .* + ); + + assign rvfi_lsu_ctrl_o = lsu_ctrl; + +endmodule + diff --git a/test/type_param/core/load_unit.sv b/test/type_param/core/load_unit.sv new file mode 100644 index 00000000..512b498c --- /dev/null +++ b/test/type_param/core/load_unit.sv @@ -0,0 +1,534 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Load Unit, takes care of all load requests +// +// Contributor: Cesar Fuguet , CEA List +// Date: August 29, 2023 +// Modification: add support for multiple outstanding load operations +// to the data cache + +module load_unit + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + // load unit input port + input logic valid_i, + input lsu_ctrl_t lsu_ctrl_i, + output logic pop_ld_o, + // load unit output port + output logic valid_o, + output logic [TRANS_ID_BITS-1:0] trans_id_o, + output riscv::xlen_t result_o, + output exception_t ex_o, + // MMU -> Address Translation + output logic translation_req_o, // request address translation + output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out + input logic [riscv::PLEN-1:0] paddr_i, // physical address in + input exception_t ex_i, // exception which may has happened earlier. for example: mis-aligned exception + input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request + input logic [riscv::PPNW-1:0] dtlb_ppn_i, // ppn on the dtlb, send in the same cycle as the request + // address checker + output logic [11:0] page_offset_o, + input logic page_offset_matches_i, + input logic store_buffer_empty_i, // the entire store-buffer is empty + input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + // D$ interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + input logic dcache_wbuffer_not_ni_i +); + enum logic [3:0] { + IDLE, + WAIT_GNT, + SEND_TAG, + WAIT_PAGE_OFFSET, + ABORT_TRANSACTION, + ABORT_TRANSACTION_NI, + WAIT_TRANSLATION, + WAIT_FLUSH, + WAIT_WB_EMPTY + } + state_d, state_q; + + // in order to decouple the response interface from the request interface, + // we need a a buffer which can hold all inflight memory load requests + typedef struct packed { + logic [TRANS_ID_BITS-1:0] trans_id; // scoreboard identifier + logic [riscv::XLEN_ALIGN_BYTES-1:0] address_offset; // least significant bits of the address + fu_op operation; // type of load + } ldbuf_t; + + + // to support a throughput of one load per cycle, if the number of entries + // of the load buffer is 1, implement a fall-through mode. This however + // adds a combinational path between the request and response interfaces + // towards the cache. + localparam logic LDBUF_FALLTHROUGH = (CVA6Cfg.NrLoadBufEntries == 1); + localparam int unsigned REQ_ID_BITS = CVA6Cfg.NrLoadBufEntries > 1 ? $clog2( + CVA6Cfg.NrLoadBufEntries + ) : 1; + + typedef logic [REQ_ID_BITS-1:0] ldbuf_id_t; + + logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_valid_q, ldbuf_valid_d; + logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_flushed_q, ldbuf_flushed_d; + ldbuf_t [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_q; + logic ldbuf_empty, ldbuf_full; + ldbuf_id_t ldbuf_free_index; + logic ldbuf_w; + ldbuf_t ldbuf_wdata; + ldbuf_id_t ldbuf_windex; + logic ldbuf_r; + ldbuf_t ldbuf_rdata; + ldbuf_id_t ldbuf_rindex; + ldbuf_id_t ldbuf_last_id_q; + + assign ldbuf_full = &ldbuf_valid_q; + + // + // buffer of outstanding loads + + // write in the first available slot + generate + if (CVA6Cfg.NrLoadBufEntries > 1) begin : ldbuf_free_index_multi_gen + lzc #( + .WIDTH(CVA6Cfg.NrLoadBufEntries), + .MODE (1'b0) // Count leading zeros + ) lzc_windex_i ( + .in_i (~ldbuf_valid_q), + .cnt_o (ldbuf_free_index), + .empty_o(ldbuf_empty) + ); + end else begin : ldbuf_free_index_single_gen + assign ldbuf_free_index = 1'b0; + end + endgenerate + + assign ldbuf_windex = (LDBUF_FALLTHROUGH && ldbuf_r) ? ldbuf_rindex : ldbuf_free_index; + + always_comb begin : ldbuf_comb + ldbuf_flushed_d = ldbuf_flushed_q; + ldbuf_valid_d = ldbuf_valid_q; + + // In case of flush, raise the flushed flag in all slots. + if (flush_i) begin + ldbuf_flushed_d = '1; + end + // Free read entry (in the case of fall-through mode, free the entry + // only if there is no pending load) + if (ldbuf_r && (!LDBUF_FALLTHROUGH || !ldbuf_w)) begin + ldbuf_valid_d[ldbuf_rindex] = 1'b0; + end + // Track a new outstanding operation in the load buffer + if (ldbuf_w) begin + ldbuf_flushed_d[ldbuf_windex] = 1'b0; + ldbuf_valid_d[ldbuf_windex] = 1'b1; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ldbuf_ff + if (!rst_ni) begin + ldbuf_flushed_q <= '0; + ldbuf_valid_q <= '0; + ldbuf_last_id_q <= '0; + ldbuf_q <= '0; + end else begin + ldbuf_flushed_q <= ldbuf_flushed_d; + ldbuf_valid_q <= ldbuf_valid_d; + if (ldbuf_w) begin + ldbuf_last_id_q <= ldbuf_windex; + ldbuf_q[ldbuf_windex] <= ldbuf_wdata; + end + end + end + + // page offset is defined as the lower 12 bits, feed through for address checker + assign page_offset_o = lsu_ctrl_i.vaddr[11:0]; + // feed-through the virtual address for VA translation + assign vaddr_o = lsu_ctrl_i.vaddr; + // this is a read-only interface so set the write enable to 0 + assign req_port_o.data_we = 1'b0; + assign req_port_o.data_wdata = '0; + // compose the load buffer write data, control is handled in the FSM + assign ldbuf_wdata = { + lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[riscv::XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation + }; + // output address + // we can now output the lower 12 bit as the index to the cache + assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + // translation from last cycle, again: control is handled in the FSM + assign req_port_o.address_tag = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH + + ariane_pkg::DCACHE_INDEX_WIDTH-1 : + ariane_pkg::DCACHE_INDEX_WIDTH]; + // request id = index of the load buffer's entry + assign req_port_o.data_id = ldbuf_windex; + // directly forward exception fields (valid bit is set below) + assign ex_o.cause = ex_i.cause; + assign ex_o.tval = ex_i.tval; + + // Check that NI operations follow the necessary conditions + logic paddr_ni; + logic not_commit_time; + logic inflight_stores; + logic stall_ni; + assign paddr_ni = config_pkg::is_inside_nonidempotent_regions( + CVA6Cfg, {{52 - riscv::PPNW{1'b0}}, dtlb_ppn_i, 12'd0} + ); + assign not_commit_time = commit_tran_id_i != lsu_ctrl_i.trans_id; + assign inflight_stores = (!dcache_wbuffer_not_ni_i || !store_buffer_empty_i); + assign stall_ni = (inflight_stores || not_commit_time) && (paddr_ni && CVA6Cfg.NonIdemPotenceEn); + + // --------------- + // Load Control + // --------------- + always_comb begin : load_control + automatic logic accept_req; + + // default assignments + state_d = state_q; + translation_req_o = 1'b0; + req_port_o.data_req = 1'b0; + // tag control + req_port_o.kill_req = 1'b0; + req_port_o.tag_valid = 1'b0; + req_port_o.data_be = lsu_ctrl_i.be; + req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operation); + pop_ld_o = 1'b0; + + // In IDLE and SEND_TAG states, this unit can accept a new load request + // when the load buffer is not full or if there is a response and the + // load buffer is in fall-through mode + accept_req = (valid_i && (!ldbuf_full || (LDBUF_FALLTHROUGH && ldbuf_r))); + + case (state_q) + IDLE: begin + if (accept_req) begin + // start the translation process even though we do not know if the addresses match + // this should ease timing + translation_req_o = 1'b1; + // check if the page offset matches with a store, if it does then stall and wait + if (!page_offset_matches_i) begin + // make a load request to memory + req_port_o.data_req = 1'b1; + // we got no data grant so wait for the grant before sending the tag + if (!req_port_i.data_gnt) begin + state_d = WAIT_GNT; + end else begin + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = ABORT_TRANSACTION; + end else begin + if (!stall_ni) begin + // we got a grant and a hit on the DTLB so we can send the tag in the next cycle + state_d = SEND_TAG; + pop_ld_o = 1'b1; + // translation valid but this is to NC and the WB is not yet empty. + end else if (CVA6Cfg.NonIdemPotenceEn) begin + state_d = ABORT_TRANSACTION_NI; + end + end + end + end else begin + // wait for the store buffer to train and the page offset to not match anymore + state_d = WAIT_PAGE_OFFSET; + end + end + end + + // wait here for the page offset to not match anymore + WAIT_PAGE_OFFSET: begin + // we make a new request as soon as the page offset does not match anymore + if (!page_offset_matches_i) begin + state_d = WAIT_GNT; + end + end + + WAIT_GNT: begin + // keep the translation request up + translation_req_o = 1'b1; + // keep the request up + req_port_o.data_req = 1'b1; + // we finally got a data grant + if (req_port_i.data_gnt) begin + // so we send the tag in the next cycle + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = ABORT_TRANSACTION; + end else begin + if (!stall_ni) begin + // we got a grant and a hit on the DTLB so we can send the tag in the next cycle + state_d = SEND_TAG; + pop_ld_o = 1'b1; + // translation valid but this is to NC and the WB is not yet empty. + end else if (CVA6Cfg.NonIdemPotenceEn) begin + state_d = ABORT_TRANSACTION_NI; + end + end + + end + // otherwise we keep waiting on our grant + end + // we know for sure that the tag we want to send is valid + SEND_TAG: begin + req_port_o.tag_valid = 1'b1; + state_d = IDLE; + + if (accept_req) begin + // start the translation process even though we do not know if the addresses match + // this should ease timing + translation_req_o = 1'b1; + // check if the page offset matches with a store, if it does stall and wait + if (!page_offset_matches_i) begin + // make a load request to memory + req_port_o.data_req = 1'b1; + // we got no data grant so wait for the grant before sending the tag + if (!req_port_i.data_gnt) begin + state_d = WAIT_GNT; + end else begin + // we got a grant so we can send the tag in the next cycle + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = ABORT_TRANSACTION; + end else begin + if (!stall_ni) begin + // we got a grant and a hit on the DTLB so we can send the tag in the next cycle + state_d = SEND_TAG; + pop_ld_o = 1'b1; + // translation valid but this is to NC and the WB is not yet empty. + end else if (CVA6Cfg.NonIdemPotenceEn) begin + state_d = ABORT_TRANSACTION_NI; + end + end + end + end else begin + // wait for the store buffer to train and the page offset to not match anymore + state_d = WAIT_PAGE_OFFSET; + end + end + // ---------- + // Exception + // ---------- + // if we got an exception we need to kill the request immediately + if (ex_i.valid) begin + req_port_o.kill_req = 1'b1; + end + end + + WAIT_FLUSH: begin + // the D$ arbiter will take care of presenting this to the memory only in case we + // have an outstanding request + req_port_o.kill_req = 1'b1; + req_port_o.tag_valid = 1'b1; + // we've killed the current request so we can go back to idle + state_d = IDLE; + end + + default: begin + // abort the previous request - free the D$ arbiter + // we are here because of a TLB miss, we need to abort the current request and give way for the + // PTW walker to satisfy the TLB miss + if (state_q == ABORT_TRANSACTION && ariane_pkg::MMU_PRESENT) begin + req_port_o.kill_req = 1'b1; + req_port_o.tag_valid = 1'b1; + // wait until the WB is empty + state_d = WAIT_TRANSLATION; + end else if (state_q == ABORT_TRANSACTION_NI && CVA6Cfg.NonIdemPotenceEn) begin + req_port_o.kill_req = 1'b1; + req_port_o.tag_valid = 1'b1; + // re-do the request + state_d = WAIT_WB_EMPTY; + end else if (state_q == WAIT_WB_EMPTY && CVA6Cfg.NonIdemPotenceEn && dcache_wbuffer_not_ni_i) begin + // Wait until the write-back buffer is empty in the data cache. + // the write buffer is empty, so lets go and re-do the translation. + state_d = WAIT_TRANSLATION; + end else if(state_q == WAIT_TRANSLATION && (ariane_pkg::MMU_PRESENT || CVA6Cfg.NonIdemPotenceEn)) begin + translation_req_o = 1'b1; + // we've got a hit and we can continue with the request process + if (dtlb_hit_i) state_d = WAIT_GNT; + + // we got an exception + if (ex_i.valid) begin + // the next state will be the idle state + state_d = IDLE; + // pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction + pop_ld_o = ~req_port_i.data_rvalid; + end + end else begin + state_d = IDLE; + end + end + endcase + + // if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage + if (flush_i) begin + state_d = WAIT_FLUSH; + end + end + + // track the load data for later usage + assign ldbuf_w = req_port_o.data_req & req_port_i.data_gnt; + + // --------------- + // Retire Load + // --------------- + assign ldbuf_rindex = (CVA6Cfg.NrLoadBufEntries > 1) ? ldbuf_id_t'(req_port_i.data_rid) : 1'b0, + ldbuf_rdata = ldbuf_q[ldbuf_rindex]; + + // decoupled rvalid process + always_comb begin : rvalid_output + // read the pending load buffer + ldbuf_r = req_port_i.data_rvalid; + trans_id_o = ldbuf_q[ldbuf_rindex].trans_id; + valid_o = 1'b0; + ex_o.valid = 1'b0; + + // we got an rvalid and it's corresponding request was not flushed + if (req_port_i.data_rvalid && !ldbuf_flushed_q[ldbuf_rindex]) begin + // if the response corresponds to the last request, check that we are not killing it + if ((ldbuf_last_id_q != ldbuf_rindex) || !req_port_o.kill_req) valid_o = 1'b1; + // the output is also valid if we got an exception. An exception arrives one cycle after + // dtlb_hit_i is asserted, i.e. when we are in SEND_TAG. Otherwise, the exception + // corresponds to the next request that is already being translated (see below). + if (ex_i.valid && (state_q == SEND_TAG)) begin + valid_o = 1'b1; + ex_o.valid = 1'b1; + end + end + + // an exception occurred during translation + // exceptions can retire out-of-order -> but we need to give priority to non-excepting load and stores + // so we simply check if we got an rvalid if so we prioritize it by not retiring the exception - we simply go for another + // round in the load FSM + if ((ariane_pkg::MMU_PRESENT || CVA6Cfg.NonIdemPotenceEn) && (state_q == WAIT_TRANSLATION) && !req_port_i.data_rvalid && ex_i.valid && valid_i) begin + trans_id_o = lsu_ctrl_i.trans_id; + valid_o = 1'b1; + ex_o.valid = 1'b1; + end + end + + + // latch physical address for the tag cycle (one cycle after applying the index) + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + end else begin + state_q <= state_d; + end + end + + // --------------- + // Sign Extend + // --------------- + riscv::xlen_t shifted_data; + + // realign as needed + assign shifted_data = req_port_i.data_rdata >> {ldbuf_rdata.address_offset, 3'b000}; + + /* // result mux (leaner code, but more logic stages. + // can be used instead of the code below (in between //result mux fast) if timing is not so critical) + always_comb begin + unique case (ldbuf_rdata.operation) + LWU: result_o = shifted_data[31:0]; + LHU: result_o = shifted_data[15:0]; + LBU: result_o = shifted_data[7:0]; + LW: result_o = 64'(signed'(shifted_data[31:0])); + LH: result_o = 64'(signed'(shifted_data[15:0])); + LB: result_o = 64'(signed'(shifted_data[ 7:0])); + default: result_o = shifted_data; + endcase + end */ + + // result mux fast + logic [ (riscv::XLEN/8)-1:0] rdata_sign_bits; + logic [riscv::XLEN_ALIGN_BYTES-1:0] rdata_offset; + logic rdata_sign_bit, rdata_is_signed, rdata_is_fp_signed; + + + // prepare these signals for faster selection in the next cycle + assign rdata_is_signed = ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::LH, ariane_pkg::LB}; + assign rdata_is_fp_signed = ldbuf_rdata.operation inside {ariane_pkg::FLW, ariane_pkg::FLH, ariane_pkg::FLB}; + assign rdata_offset = ((ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::FLW}) & riscv::IS_XLEN64) ? ldbuf_rdata.address_offset + 3 : + ( ldbuf_rdata.operation inside {ariane_pkg::LH, ariane_pkg::FLH}) ? ldbuf_rdata.address_offset + 1 : + ldbuf_rdata.address_offset; + + for (genvar i = 0; i < (riscv::XLEN / 8); i++) begin : gen_sign_bits + assign rdata_sign_bits[i] = req_port_i.data_rdata[(i+1)*8-1]; + end + + + // select correct sign bit in parallel to result shifter above + // pull to 0 if unsigned + assign rdata_sign_bit = rdata_is_signed & rdata_sign_bits[rdata_offset] | rdata_is_fp_signed; + + // result mux + always_comb begin + unique case (ldbuf_rdata.operation) + ariane_pkg::LW, ariane_pkg::LWU: + result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]}; + ariane_pkg::LH, ariane_pkg::LHU: + result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]}; + ariane_pkg::LB, ariane_pkg::LBU: + result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]}; + default: begin + // FLW, FLH and FLB have been defined here in default case to improve Code Coverage + if (CVA6Cfg.FpPresent) begin + unique case (ldbuf_rdata.operation) + ariane_pkg::FLW: begin + result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]}; + end + ariane_pkg::FLH: begin + result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]}; + end + ariane_pkg::FLB: begin + result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]}; + end + default: begin + result_o = shifted_data[riscv::XLEN-1:0]; + end + endcase + end else begin + result_o = shifted_data[riscv::XLEN-1:0]; + end + end + endcase + end + // end result mux fast + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off +`ifndef VERILATOR + initial + assert (ariane_pkg::DCACHE_TID_WIDTH >= REQ_ID_BITS) + else $fatal(1, "CVA6ConfigDcacheIdWidth parameter is not wide enough to encode pending loads"); + // check invalid offsets, but only issue a warning as these conditions actually trigger a load address misaligned exception + addr_offset0 : + assert property (@(posedge clk_i) disable iff (~rst_ni) + ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LW, ariane_pkg::LWU}) |-> ldbuf_wdata.address_offset < 5) + else $fatal(1, "invalid address offset used with {LW, LWU}"); + addr_offset1 : + assert property (@(posedge clk_i) disable iff (~rst_ni) + ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LH, ariane_pkg::LHU}) |-> ldbuf_wdata.address_offset < 7) + else $fatal(1, "invalid address offset used with {LH, LHU}"); + addr_offset2 : + assert property (@(posedge clk_i) disable iff (~rst_ni) + ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LB, ariane_pkg::LBU}) |-> ldbuf_wdata.address_offset < 8) + else $fatal(1, "invalid address offset used with {LB, LBU}"); +`endif + //pragma translate_on + +endmodule diff --git a/test/type_param/core/lsu_bypass.sv b/test/type_param/core/lsu_bypass.sv new file mode 100644 index 00000000..96f6d502 --- /dev/null +++ b/test/type_param/core/lsu_bypass.sv @@ -0,0 +1,122 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.04.2017 +// Description: Load Store Unit, handles address calculation and memory interface signals + + +// ------------------ +// LSU Control +// ------------------ +// The LSU consists of two independent block which share a common address translation block. +// The one block is the load unit, the other one is the store unit. They will signal their readiness +// with separate signals. If they are not ready the LSU control should keep the last applied signals stable. +// Furthermore it can be the case that another request for one of the two store units arrives in which case +// the LSU control should sample it and store it for later application to the units. It does so, by storing it in a +// two element FIFO. This is necessary as we only know very late in the cycle whether the load/store will succeed (address check, +// TLB hit mainly). So we better unconditionally allow another request to arrive and store this request in case we need to. +module lsu_bypass + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + + input lsu_ctrl_t lsu_req_i, + input logic lsu_req_valid_i, + input logic pop_ld_i, + input logic pop_st_i, + + output lsu_ctrl_t lsu_ctrl_o, + output logic ready_o +); + + lsu_ctrl_t [1:0] mem_n, mem_q; + logic read_pointer_n, read_pointer_q; + logic write_pointer_n, write_pointer_q; + logic [1:0] status_cnt_n, status_cnt_q; + + logic empty; + assign empty = (status_cnt_q == 0); + assign ready_o = empty; + + always_comb begin + automatic logic [1:0] status_cnt; + automatic logic write_pointer; + automatic logic read_pointer; + + status_cnt = status_cnt_q; + write_pointer = write_pointer_q; + read_pointer = read_pointer_q; + + mem_n = mem_q; + // we've got a valid LSU request + if (lsu_req_valid_i) begin + mem_n[write_pointer_q] = lsu_req_i; + write_pointer++; + status_cnt++; + end + + if (pop_ld_i) begin + // invalidate the result + mem_n[read_pointer_q].valid = 1'b0; + read_pointer++; + status_cnt--; + end + + if (pop_st_i) begin + // invalidate the result + mem_n[read_pointer_q].valid = 1'b0; + read_pointer++; + status_cnt--; + end + + if (pop_st_i && pop_ld_i) mem_n = '0; + + if (flush_i) begin + status_cnt = '0; + write_pointer = '0; + read_pointer = '0; + mem_n = '0; + end + // default assignments + read_pointer_n = read_pointer; + write_pointer_n = write_pointer; + status_cnt_n = status_cnt; + end + + // output assignment + always_comb begin : output_assignments + if (empty) begin + lsu_ctrl_o = lsu_req_i; + end else begin + lsu_ctrl_o = mem_q[read_pointer_q]; + end + end + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mem_q <= '0; + status_cnt_q <= '0; + write_pointer_q <= '0; + read_pointer_q <= '0; + end else begin + mem_q <= mem_n; + status_cnt_q <= status_cnt_n; + write_pointer_q <= write_pointer_n; + read_pointer_q <= read_pointer_n; + end + end +endmodule + diff --git a/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv b/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv new file mode 100644 index 00000000..d194306e --- /dev/null +++ b/test/type_param/core/mmu_sv32/cva6_mmu_sv32.sv @@ -0,0 +1,565 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Memory Management Unit for CV32A6, contains TLB and +// address translation unit. Sv32 as defined in RISC-V +// privilege specification 1.11-WIP. +// This module is an adaptation of the MMU Sv39 developed +// by Florian Zaruba to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq MMU Sv32 for CV32A6 +// =========================================================================== // + +module cva6_mmu_sv32 + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned INSTR_TLB_ENTRIES = 2, + parameter int unsigned DATA_TLB_ENTRIES = 2, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic enable_translation_i, + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + // IF interface + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + // LSU interface + // this is a more minimalistic interface because the actual addressing logic is handled + // in the LSU as we distinguish load and stores, what we do here is simple address translation + input exception_t misaligned_ex_i, + input logic lsu_req_i, // request address translation + input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in + input logic lsu_is_store_i, // the translation is requested by a store + // if we need to walk the page table we can't grant in the same cycle + // Cycle 0 + output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB + output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) + // Cycle 1 + output logic lsu_valid_o, // translation is valid + output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address + output exception_t lsu_exception_o, // address translation threw an exception + // General control signals + input riscv::priv_lvl_t priv_lvl_i, + input riscv::priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input logic mxr_i, + // input logic flag_mprv_i, + input logic [riscv::PPNW-1:0] satp_ppn_i, + input logic [ASID_WIDTH-1:0] asid_i, + input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic flush_tlb_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i +); + + logic iaccess_err; // insufficient privilege to access this instruction page + logic daccess_err; // insufficient privilege to access this data page + logic ptw_active; // PTW is currently walking a page table + logic walking_instr; // PTW is walking because of an ITLB miss + logic ptw_error; // PTW threw an exception + logic ptw_access_exception; // PTW threw an access exception (PMPs) + logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr + + logic [riscv::VLEN-1:0] update_vaddr; + tlb_update_sv32_t update_itlb, update_dtlb, update_shared_tlb; + + logic itlb_lu_access; + riscv::pte_sv32_t itlb_content; + logic itlb_is_4M; + logic itlb_lu_hit; + + logic dtlb_lu_access; + riscv::pte_sv32_t dtlb_content; + logic dtlb_is_4M; + logic dtlb_lu_hit; + + logic shared_tlb_access; + logic [riscv::VLEN-1:0] shared_tlb_vaddr; + logic shared_tlb_hit; + + logic itlb_req; + + + // Assignments + assign itlb_lu_access = icache_areq_i.fetch_req; + assign dtlb_lu_access = lsu_req_i; + + + cva6_tlb_sv32 #( + .CVA6Cfg (CVA6Cfg), + .TLB_ENTRIES(INSTR_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_itlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .update_i(update_itlb), + + .lu_access_i (itlb_lu_access), + .lu_asid_i (asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (icache_areq_i.fetch_vaddr), + .lu_content_o (itlb_content), + + .lu_is_4M_o(itlb_is_4M), + .lu_hit_o (itlb_lu_hit) + ); + + cva6_tlb_sv32 #( + .CVA6Cfg (CVA6Cfg), + .TLB_ENTRIES(DATA_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_dtlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .update_i(update_dtlb), + + .lu_access_i (dtlb_lu_access), + .lu_asid_i (asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (lsu_vaddr_i), + .lu_content_o (dtlb_content), + + .lu_is_4M_o(dtlb_is_4M), + .lu_hit_o (dtlb_lu_hit) + ); + + cva6_shared_tlb_sv32 #( + .CVA6Cfg (CVA6Cfg), + .SHARED_TLB_DEPTH(64), + .SHARED_TLB_WAYS (2), + .ASID_WIDTH (ASID_WIDTH) + ) i_shared_tlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .enable_translation_i (enable_translation_i), + .en_ld_st_translation_i(en_ld_st_translation_i), + + .asid_i (asid_i), + // from TLBs + // did we miss? + .itlb_access_i(itlb_lu_access), + .itlb_hit_i (itlb_lu_hit), + .itlb_vaddr_i (icache_areq_i.fetch_vaddr), + + .dtlb_access_i(dtlb_lu_access), + .dtlb_hit_i (dtlb_lu_hit), + .dtlb_vaddr_i (lsu_vaddr_i), + + // to TLBs, update logic + .itlb_update_o(update_itlb), + .dtlb_update_o(update_dtlb), + + // Performance counters + .itlb_miss_o(itlb_miss_o), + .dtlb_miss_o(dtlb_miss_o), + + .shared_tlb_access_o(shared_tlb_access), + .shared_tlb_hit_o (shared_tlb_hit), + .shared_tlb_vaddr_o (shared_tlb_vaddr), + + .itlb_req_o (itlb_req), + // to update shared tlb + .shared_tlb_update_i(update_shared_tlb) + ); + + cva6_ptw_sv32 #( + .CVA6Cfg (CVA6Cfg), + .ASID_WIDTH(ASID_WIDTH) + ) i_ptw ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_i), + + .ptw_active_o (ptw_active), + .walking_instr_o (walking_instr), + .ptw_error_o (ptw_error), + .ptw_access_exception_o(ptw_access_exception), + + .lsu_is_store_i(lsu_is_store_i), + // PTW memory interface + .req_port_i (req_port_i), + .req_port_o (req_port_o), + + // to Shared TLB, update logic + .shared_tlb_update_o(update_shared_tlb), + + .update_vaddr_o(update_vaddr), + + .asid_i(asid_i), + + // from shared TLB + // did we miss? + .shared_tlb_access_i(shared_tlb_access), + .shared_tlb_hit_i (shared_tlb_hit), + .shared_tlb_vaddr_i (shared_tlb_vaddr), + + .itlb_req_i(itlb_req), + + // from CSR file + .satp_ppn_i(satp_ppn_i), // ppn from satp + .mxr_i (mxr_i), + + // Performance counters + .shared_tlb_miss_o(), //open for now + + // PMP + .pmpcfg_i (pmpcfg_i), + .pmpaddr_i (pmpaddr_i), + .bad_paddr_o(ptw_bad_paddr) + + ); + + // ila_1 i_ila_1 ( + // .clk(clk_i), // input wire clk + // .probe0({req_port_o.address_tag, req_port_o.address_index}), + // .probe1(req_port_o.data_req), // input wire [63:0] probe1 + // .probe2(req_port_i.data_gnt), // input wire [0:0] probe2 + // .probe3(req_port_i.data_rdata), // input wire [0:0] probe3 + // .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4 + // .probe5(ptw_error), // input wire [1:0] probe5 + // .probe6(update_vaddr), // input wire [0:0] probe6 + // .probe7(update_itlb.valid), // input wire [0:0] probe7 + // .probe8(update_dtlb.valid), // input wire [0:0] probe8 + // .probe9(dtlb_lu_access), // input wire [0:0] probe9 + // .probe10(lsu_vaddr_i), // input wire [0:0] probe10 + // .probe11(dtlb_lu_hit), // input wire [0:0] probe11 + // .probe12(itlb_lu_access), // input wire [0:0] probe12 + // .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13 + // .probe14(itlb_lu_hit) // input wire [0:0] probe13 + // ); + + //----------------------- + // Instruction Interface + //----------------------- + logic match_any_execute_region; + logic pmp_instr_allow; + + // The instruction interface is a simple request response interface + always_comb begin : instr_interface + // MMU disabled: just pass through + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + if (riscv::PLEN > riscv::VLEN) + icache_areq_o.fetch_paddr = { + {riscv::PLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr + }; // play through in case we disabled address translation + else + icache_areq_o.fetch_paddr = { + 2'b00, icache_areq_i.fetch_vaddr[riscv::VLEN-1:0] + }; // play through in case we disabled address translation + // two potential exception sources: + // 1. HPTW threw an exception -> signal with a page fault exception + // 2. We got an access error because of insufficient permissions -> throw an access exception + icache_areq_o.fetch_exception = '0; + // Check whether we are allowed to access this memory region from a fetch perspective + iaccess_err = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u) + || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u)); + + // MMU enabled: address from TLB, request delayed until hit. Error when TLB + // hit and no access right or TLB hit and translated address not valid (e.g. + // AXI decode error), or when PTW performs walk due to ITLB miss and raises + // an error. + if (enable_translation_i) begin + // we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal + if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + end + + icache_areq_o.fetch_valid = 1'b0; + + // 4K page + icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]}; + // Mega page + if (itlb_is_4M) begin + icache_areq_o.fetch_paddr[21:12] = icache_areq_i.fetch_vaddr[21:12]; + end + + + // --------- + // ITLB Hit + // -------- + // if we hit the ITLB output the request signal immediately + if (itlb_lu_hit) begin + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + // we got an access error + if (iaccess_err) begin + // throw a page fault + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; //to check on wave --> not connected + end else if (!pmp_instr_allow) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, icache_areq_i.fetch_vaddr, 1'b1 + }; //to check on wave --> not connected + end + end else + // --------- + // ITLB Miss + // --------- + // watch out for exceptions happening during walking the page table + if (ptw_active && walking_instr) begin + icache_areq_o.fetch_valid = ptw_error | ptw_access_exception; + if (ptw_error) + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + }; //to check on wave + // TODO(moschn,zarubaf): What should the value of tval be in this case? + else + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1 + }; //to check on wave --> not connected + end + end + // if it didn't match any execute region throw an `Instruction Access Fault` + // or: if we are not translating, check PMPs immediately on the paddr + if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1 + }; //to check on wave --> not connected + end + end + + // check for execute flag on memory + assign match_any_execute_region = config_pkg::is_inside_execute_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr} + ); + + // Instruction fetch + pmp #( + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_if ( + .addr_i (icache_areq_o.fetch_paddr), + .priv_lvl_i, + // we will always execute on the instruction fetch port + .access_type_i(riscv::ACCESS_EXEC), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_instr_allow) + ); + + //----------------------- + // Data Interface + //----------------------- + logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; + riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q; + exception_t misaligned_ex_n, misaligned_ex_q; + logic lsu_req_n, lsu_req_q; + logic lsu_is_store_n, lsu_is_store_q; + logic dtlb_hit_n, dtlb_hit_q; + logic dtlb_is_4M_n, dtlb_is_4M_q; + + // check if we need to do translation or if we are always ready (e.g.: we are not translating anything) + assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1; + + // Wires to PMP checks + riscv::pmp_access_t pmp_access_type; + logic pmp_data_allow; + localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1; + // The data interface is simpler and only consists of a request/response interface + always_comb begin : data_interface + // save request and DTLB response + lsu_vaddr_n = lsu_vaddr_i; + lsu_req_n = lsu_req_i; + misaligned_ex_n = misaligned_ex_i; + dtlb_pte_n = dtlb_content; + dtlb_hit_n = dtlb_lu_hit; + lsu_is_store_n = lsu_is_store_i; + dtlb_is_4M_n = dtlb_is_4M; + + if (riscv::PLEN > riscv::VLEN) begin + lsu_paddr_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_q}; + lsu_dtlb_ppn_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n[riscv::VLEN-1:12]}; + end else begin + lsu_paddr_o = {2'b00, lsu_vaddr_q[riscv::VLEN-1:0]}; + lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PPNW-1:0]; + end + lsu_valid_o = lsu_req_q; + lsu_exception_o = misaligned_ex_q; + pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ; + + // mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions + misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i; + + // Check if the User flag is set, then we may only access it in supervisor mode + // if SUM is enabled + daccess_err = (ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode + (ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it + // translation is enabled and no misaligned exception occurred + if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin + lsu_valid_o = 1'b0; + // 4K page + lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]}; + lsu_dtlb_ppn_o = dtlb_content.ppn; + // Mega page + if (dtlb_is_4M_q) begin + lsu_paddr_o[21:12] = lsu_vaddr_q[21:12]; + lsu_dtlb_ppn_o[21:12] = lsu_vaddr_n[21:12]; + end + // --------- + // DTLB Hit + // -------- + if (dtlb_hit_q && lsu_req_q) begin + lsu_valid_o = 1'b1; + // exception priority: + // PAGE_FAULTS have higher priority than ACCESS_FAULTS + // virtual memory based exceptions are PAGE_FAULTS + // physical memory based exceptions are ACCESS_FAULTS (PMA/PMP) + + // this is a store + if (lsu_is_store_q) begin + // check if the page is write-able and we are not violating privileges + // also check if the dirty flag is set + if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; //to check on wave + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1 + }; //only 32 bits on 34b of lsu_paddr_o are returned. + end + + // this is a load + end else begin + // check for sufficient access privileges - throw a page fault if necessary + if (daccess_err) begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1 + }; //only 32 bits on 34b of lsu_paddr_o are returned. + end + end + end else + + // --------- + // DTLB Miss + // --------- + // watch out for exceptions + if (ptw_active && !walking_instr) begin + // page table walker threw an exception + if (ptw_error) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + if (lsu_is_store_q) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end + end + + if (ptw_access_exception) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1}; + end + end + end // If translation is not enabled, check the paddr immediately against PMPs + else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin + if (lsu_is_store_q) begin + lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; + end else begin + lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; + end + end + end + + // Load/store PMP check + pmp #( + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_data ( + .addr_i (lsu_paddr_o), + .priv_lvl_i (ld_st_priv_lvl_i), + .access_type_i(pmp_access_type), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_data_allow) + ); + + // ---------- + // Registers + // ---------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + lsu_vaddr_q <= '0; + lsu_req_q <= '0; + misaligned_ex_q <= '0; + dtlb_pte_q <= '0; + dtlb_hit_q <= '0; + lsu_is_store_q <= '0; + dtlb_is_4M_q <= '0; + end else begin + lsu_vaddr_q <= lsu_vaddr_n; + lsu_req_q <= lsu_req_n; + misaligned_ex_q <= misaligned_ex_n; + dtlb_pte_q <= dtlb_pte_n; + dtlb_hit_q <= dtlb_hit_n; + lsu_is_store_q <= lsu_is_store_n; + dtlb_is_4M_q <= dtlb_is_4M_n; + end + end +endmodule diff --git a/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv b/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv new file mode 100644 index 00000000..4bd736bd --- /dev/null +++ b/test/type_param/core/mmu_sv32/cva6_ptw_sv32.sv @@ -0,0 +1,400 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Hardware-PTW (Page-Table-Walker) for MMU Sv32. +// This module is an adaptation of the Sv39 PTW developed +// by Florian Zaruba and David Schaffenrath to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq PTW Sv32 for CV32A6 +// =========================================================================== // + +/* verilator lint_off WIDTH */ + +module cva6_ptw_sv32 + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush everything, we need to do this because + // actually everything we do is speculative at this stage + // e.g.: there could be a CSR instruction that changes everything + output logic ptw_active_o, + output logic walking_instr_o, // set when walking for TLB + output logic ptw_error_o, // set when an error occurred + output logic ptw_access_exception_o, // set when an PMP access exception occured + + input logic lsu_is_store_i, // this translation was triggered by a store + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + + // to Shared TLB, update logic + output tlb_update_sv32_t shared_tlb_update_o, + + output logic [riscv::VLEN-1:0] update_vaddr_o, + + input logic [ASID_WIDTH-1:0] asid_i, + + // from shared TLB + input logic shared_tlb_access_i, + input logic shared_tlb_hit_i, + input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i, + + input logic itlb_req_i, + + // from CSR file + input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp + input logic mxr_i, + + // Performance counters + output logic shared_tlb_miss_o, + + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + output logic [riscv::PLEN-1:0] bad_paddr_o + +); + + // input registers + logic data_rvalid_q; + riscv::xlen_t data_rdata_q; + + riscv::pte_sv32_t pte; + assign pte = riscv::pte_sv32_t'(data_rdata_q); + + + enum logic [2:0] { + IDLE, + WAIT_GRANT, + PTE_LOOKUP, + WAIT_RVALID, + PROPAGATE_ERROR, + PROPAGATE_ACCESS_ERROR, + LATENCY + } + state_q, state_d; + + // SV32 defines two levels of page tables + enum logic { + LVL1, + LVL2 + } + ptw_lvl_q, ptw_lvl_n; + + // is this an instruction page table walk? + logic is_instr_ptw_q, is_instr_ptw_n; + logic global_mapping_q, global_mapping_n; + // latched tag signal + logic tag_valid_n, tag_valid_q; + // register the ASID + logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; + // register the VPN we need to walk, SV32 defines a 32 bit virtual address + logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; + // 4 byte aligned physical pointer + logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n; + + // Assignments + assign update_vaddr_o = vaddr_q; + + assign ptw_active_o = (state_q != IDLE); + //assign walking_instr_o = is_instr_ptw_q; + assign walking_instr_o = is_instr_ptw_q; + // directly output the correct physical address + assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; + // we are never going to kill this request + assign req_port_o.kill_req = '0; + // we are never going to write with the HPTW + assign req_port_o.data_wdata = '0; + // we only issue one single request at a time + assign req_port_o.data_id = '0; + + // ----------- + // Shared TLB Update + // ----------- + assign shared_tlb_update_o.vpn = vaddr_q[riscv::SV-1:12]; + // update the correct page table level + assign shared_tlb_update_o.is_4M = (ptw_lvl_q == LVL1); + // output the correct ASID + assign shared_tlb_update_o.asid = tlb_update_asid_q; + // set the global mapping bit + assign shared_tlb_update_o.content = pte | (global_mapping_q << 5); + + + assign req_port_o.tag_valid = tag_valid_q; + + logic allow_access; + + assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0; + + pmp #( + .CVA6Cfg (CVA6Cfg), + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_ptw ( + .addr_i (ptw_pptr_q), + // PTW access are always checked as if in S-Mode... + .priv_lvl_i (riscv::PRIV_LVL_S), + // ...and they are always loads + .access_type_i(riscv::ACCESS_READ), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (allow_access) + ); + + + assign req_port_o.data_be = be_gen_32(req_port_o.address_index[1:0], req_port_o.data_size); + + //------------------- + // Page table walker + //------------------- + // A virtual address va is translated into a physical address pa as follows: + // 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, + // PAGESIZE=2^12 and LEVELS=3.) + // 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For + // Sv32, PTESIZE=4.) + // 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access + // exception. + // 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5. + // Otherwise, this PTE is a pointer to the next level of the page table. + // Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let + // a = pte.ppn × PAGESIZE and go to step 2. + // 5. A leaf PTE has been found. Determine if the requested memory access + // is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and + // raise an access exception. Otherwise, the translation is successful. + // Set pte.a to 1, and, if the memory access is a store, set pte.d to 1. + // The translated physical address is given as follows: + // - pa.pgoff = va.pgoff. + // - If i > 0, then this is a superpage translation and + // pa.ppn[i-1:0] = va.vpn[i-1:0]. + // - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + always_comb begin : ptw + // default assignments + // PTW memory interface + tag_valid_n = 1'b0; + req_port_o.data_req = 1'b0; + req_port_o.data_size = 2'b10; + req_port_o.data_we = 1'b0; + ptw_error_o = 1'b0; + ptw_access_exception_o = 1'b0; + shared_tlb_update_o.valid = 1'b0; + is_instr_ptw_n = is_instr_ptw_q; + ptw_lvl_n = ptw_lvl_q; + ptw_pptr_n = ptw_pptr_q; + state_d = state_q; + global_mapping_n = global_mapping_q; + // input registers + tlb_update_asid_n = tlb_update_asid_q; + vaddr_n = vaddr_q; + + shared_tlb_miss_o = 1'b0; + + case (state_q) + + IDLE: begin + // by default we start with the top-most page table + ptw_lvl_n = LVL1; + global_mapping_n = 1'b0; + is_instr_ptw_n = 1'b0; + // if we got a Shared TLB miss + if (shared_tlb_access_i & ~shared_tlb_hit_i) begin + ptw_pptr_n = { + satp_ppn_i, shared_tlb_vaddr_i[riscv::SV-1:22], 2'b0 + }; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4 + is_instr_ptw_n = itlb_req_i; + tlb_update_asid_n = asid_i; + vaddr_n = shared_tlb_vaddr_i; + state_d = WAIT_GRANT; + shared_tlb_miss_o = 1'b1; + end + end + + WAIT_GRANT: begin + // send a request out + req_port_o.data_req = 1'b1; + // wait for the WAIT_GRANT + if (req_port_i.data_gnt) begin + // send the tag valid signal one cycle later + tag_valid_n = 1'b1; + state_d = PTE_LOOKUP; + end + end + + PTE_LOOKUP: begin + // we wait for the valid signal + if (data_rvalid_q) begin + + // check if the global mapping bit is set + if (pte.g) global_mapping_n = 1'b1; + + // ------------- + // Invalid PTE + // ------------- + // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception. + if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR; + // ----------- + // Valid PTE + // ----------- + else begin + //state_d = IDLE; + state_d = LATENCY; + // it is a valid PTE + // if pte.r = 1 or pte.x = 1 it is a valid PTE + if (pte.r || pte.x) begin + // Valid translation found (either 4M or 4K entry) + if (is_instr_ptw_q) begin + // ------------ + // Update ITLB + // ------------ + // If page is not executable, we can directly raise an error. This + // doesn't put a useless entry into the TLB. The same idea applies + // to the access flag since we let the access flag be managed by SW. + if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR; + else shared_tlb_update_o.valid = 1'b1; + + end else begin + // ------------ + // Update DTLB + // ------------ + // Check if the access flag has been set, otherwise throw a page-fault + // and let the software handle those bits. + // If page is not readable (there are no write-only pages) + // we can directly raise an error. This doesn't put a useless + // entry into the TLB. + if (pte.a && (pte.r || (pte.x && mxr_i))) begin + shared_tlb_update_o.valid = 1'b1; + end else begin + state_d = PROPAGATE_ERROR; + end + // Request is a store: perform some additional checks + // If the request was a store and the page is not write-able, raise an error + // the same applies if the dirty flag is not set + if (lsu_is_store_i && (!pte.w || !pte.d)) begin + shared_tlb_update_o.valid = 1'b0; + state_d = PROPAGATE_ERROR; + end + end + // check if the ppn is correctly aligned: + // 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault + // exception. + if (ptw_lvl_q == LVL1 && pte.ppn[9:0] != '0) begin + state_d = PROPAGATE_ERROR; + shared_tlb_update_o.valid = 1'b0; + end + // this is a pointer to the next TLB level + end else begin + // pointer to next level of page table + if (ptw_lvl_q == LVL1) begin + // we are in the second level now + ptw_lvl_n = LVL2; + ptw_pptr_n = {pte.ppn, vaddr_q[21:12], 2'b0}; + end + + state_d = WAIT_GRANT; + + if (ptw_lvl_q == LVL2) begin + // Should already be the last level page table => Error + ptw_lvl_n = LVL2; + state_d = PROPAGATE_ERROR; + end + end + end + + // Check if this access was actually allowed from a PMP perspective + if (!allow_access) begin + shared_tlb_update_o.valid = 1'b0; + // we have to return the failed address in bad_addr + ptw_pptr_n = ptw_pptr_q; + state_d = PROPAGATE_ACCESS_ERROR; + end + end + // we've got a data WAIT_GRANT so tell the cache that the tag is valid + end + // Propagate error to MMU/LSU + PROPAGATE_ERROR: begin + state_d = LATENCY; + ptw_error_o = 1'b1; + end + PROPAGATE_ACCESS_ERROR: begin + state_d = LATENCY; + ptw_access_exception_o = 1'b1; + end + // wait for the rvalid before going back to IDLE + WAIT_RVALID: begin + if (data_rvalid_q) state_d = IDLE; + end + LATENCY: begin + state_d = IDLE; + end + default: begin + state_d = IDLE; + end + endcase + + // ------- + // Flush + // ------- + // should we have flushed before we got an rvalid, wait for it until going back to IDLE + if (flush_i) begin + // on a flush check whether we are + // 1. in the PTE Lookup check whether we still need to wait for an rvalid + // 2. waiting for a grant, if so: wait for it + // if not, go back to idle + if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) || + ((state_q == WAIT_GRANT) && req_port_i.data_gnt)) + state_d = WAIT_RVALID; + else state_d = LATENCY; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + is_instr_ptw_q <= 1'b0; + ptw_lvl_q <= LVL1; + tag_valid_q <= 1'b0; + tlb_update_asid_q <= '0; + vaddr_q <= '0; + ptw_pptr_q <= '0; + global_mapping_q <= 1'b0; + data_rdata_q <= '0; + data_rvalid_q <= 1'b0; + end else begin + state_q <= state_d; + ptw_pptr_q <= ptw_pptr_n; + is_instr_ptw_q <= is_instr_ptw_n; + ptw_lvl_q <= ptw_lvl_n; + tag_valid_q <= tag_valid_n; + tlb_update_asid_q <= tlb_update_asid_n; + vaddr_q <= vaddr_n; + global_mapping_q <= global_mapping_n; + data_rdata_q <= req_port_i.data_rdata; + data_rvalid_q <= req_port_i.data_rvalid; + end + end + +endmodule +/* verilator lint_on WIDTH */ diff --git a/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv b/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv new file mode 100644 index 00000000..98e2a044 --- /dev/null +++ b/test/type_param/core/mmu_sv32/cva6_shared_tlb_sv32.sv @@ -0,0 +1,367 @@ +// Copyright (c) 2023 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq - Thales Research & Technology +// Date: 08/03/2023 +// +// Description: N-way associative shared TLB, it allows to reduce the number +// of ITLB and DTLB entries. +// + +/* verilator lint_off WIDTH */ + +module cva6_shared_tlb_sv32 + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int SHARED_TLB_DEPTH = 64, + parameter int SHARED_TLB_WAYS = 2, + parameter int ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + + input logic enable_translation_i, // CSRs indicate to enable SV32 + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + + input logic [ASID_WIDTH-1:0] asid_i, + + // from TLBs + // did we miss? + input logic itlb_access_i, + input logic itlb_hit_i, + input logic [riscv::VLEN-1:0] itlb_vaddr_i, + + input logic dtlb_access_i, + input logic dtlb_hit_i, + input logic [riscv::VLEN-1:0] dtlb_vaddr_i, + + // to TLBs, update logic + output tlb_update_sv32_t itlb_update_o, + output tlb_update_sv32_t dtlb_update_o, + + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + + output logic shared_tlb_access_o, + output logic shared_tlb_hit_o, + output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o, + + output logic itlb_req_o, + + // Update shared TLB in case of miss + input tlb_update_sv32_t shared_tlb_update_i + +); + + function logic [SHARED_TLB_WAYS-1:0] shared_tlb_way_bin2oh(input logic [$clog2(SHARED_TLB_WAYS +)-1:0] in); + logic [SHARED_TLB_WAYS-1:0] out; + out = '0; + out[in] = 1'b1; + return out; + endfunction + + typedef struct packed { + logic [8:0] asid; //9 bits wide + logic [9:0] vpn1; //10 bits wide + logic [9:0] vpn0; //10 bits wide + logic is_4M; + } shared_tag_t; + + shared_tag_t shared_tag_wr; + shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd; + + logic [SHARED_TLB_DEPTH-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d; + + logic [ SHARED_TLB_WAYS-1:0] shared_tag_valid; + + logic [ SHARED_TLB_WAYS-1:0] tag_wr_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_wr_addr; + logic [ $bits(shared_tag_t)-1:0] tag_wr_data; + + logic [ SHARED_TLB_WAYS-1:0] tag_rd_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_rd_addr; + logic [ $bits(shared_tag_t)-1:0] tag_rd_data [SHARED_TLB_WAYS-1:0]; + + logic [ SHARED_TLB_WAYS-1:0] tag_req; + logic [ SHARED_TLB_WAYS-1:0] tag_we; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_addr; + + logic [ SHARED_TLB_WAYS-1:0] pte_wr_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_wr_addr; + logic [$bits(riscv::pte_sv32_t)-1:0] pte_wr_data; + + logic [ SHARED_TLB_WAYS-1:0] pte_rd_en; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_rd_addr; + logic [$bits(riscv::pte_sv32_t)-1:0] pte_rd_data [SHARED_TLB_WAYS-1:0]; + + logic [ SHARED_TLB_WAYS-1:0] pte_req; + logic [ SHARED_TLB_WAYS-1:0] pte_we; + logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_addr; + + logic [9:0] vpn0_d, vpn1_d, vpn0_q, vpn1_q; + + riscv::pte_sv32_t [SHARED_TLB_WAYS-1:0] pte; + + logic [riscv::VLEN-1-12:0] itlb_vpn_q; + logic [riscv::VLEN-1-12:0] dtlb_vpn_q; + + logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d; + + logic shared_tlb_access_q, shared_tlb_access_d; + logic shared_tlb_hit_d; + logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d; + + logic itlb_req_d, itlb_req_q; + logic dtlb_req_d, dtlb_req_q; + + // replacement strategy + logic [SHARED_TLB_WAYS-1:0] way_valid; + logic update_lfsr; // shift the LFSR + logic [$clog2(SHARED_TLB_WAYS)-1:0] inv_way; // first non-valid encountered + logic [$clog2(SHARED_TLB_WAYS)-1:0] rnd_way; // random index for replacement + logic [$clog2(SHARED_TLB_WAYS)-1:0] repl_way; // way to replace + logic [SHARED_TLB_WAYS-1:0] repl_way_oh_d; // way to replace (onehot) + logic all_ways_valid; // we need to switch repl strategy since all are valid + + assign shared_tlb_access_o = shared_tlb_access_q; + assign shared_tlb_hit_o = shared_tlb_hit_d; + assign shared_tlb_vaddr_o = shared_tlb_vaddr_q; + + assign itlb_req_o = itlb_req_q; + + /////////////////////////////////////////////////////// + // tag comparison, hit generation + /////////////////////////////////////////////////////// + always_comb begin : itlb_dtlb_miss + itlb_miss_o = 1'b0; + dtlb_miss_o = 1'b0; + vpn0_d = vpn0_q; + vpn1_d = vpn1_q; + + tag_rd_en = '0; + pte_rd_en = '0; + + itlb_req_d = 1'b0; + dtlb_req_d = 1'b0; + + tlb_update_asid_d = tlb_update_asid_q; + + shared_tlb_access_d = '0; + shared_tlb_vaddr_d = shared_tlb_vaddr_q; + + tag_rd_addr = '0; + pte_rd_addr = '0; + + // if we got an ITLB miss + if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin + tag_rd_en = '1; + tag_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + pte_rd_en = '1; + pte_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + + vpn0_d = itlb_vaddr_i[21:12]; + vpn1_d = itlb_vaddr_i[31:22]; + + itlb_miss_o = 1'b1; + itlb_req_d = 1'b1; + + tlb_update_asid_d = asid_i; + + shared_tlb_access_d = 1'b1; + shared_tlb_vaddr_d = itlb_vaddr_i; + + // we got an DTLB miss + end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin + tag_rd_en = '1; + tag_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + pte_rd_en = '1; + pte_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; + + vpn0_d = dtlb_vaddr_i[21:12]; + vpn1_d = dtlb_vaddr_i[31:22]; + + dtlb_miss_o = 1'b1; + dtlb_req_d = 1'b1; + + tlb_update_asid_d = asid_i; + + shared_tlb_access_d = 1'b1; + shared_tlb_vaddr_d = dtlb_vaddr_i; + end + end //itlb_dtlb_miss + + always_comb begin : tag_comparison + shared_tlb_hit_d = 1'b0; + dtlb_update_o = '0; + itlb_update_o = '0; + //number of ways + for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin + if (shared_tag_valid[i] && ((tlb_update_asid_q == shared_tag_rd[i].asid) || pte[i].g) && vpn1_q == shared_tag_rd[i].vpn1) begin + if (shared_tag_rd[i].is_4M || vpn0_q == shared_tag_rd[i].vpn0) begin + shared_tlb_hit_d = 1'b1; + if (itlb_req_q) begin + itlb_update_o.valid = 1'b1; + itlb_update_o.vpn = itlb_vpn_q; + itlb_update_o.is_4M = shared_tag_rd[i].is_4M; + itlb_update_o.asid = tlb_update_asid_q; + itlb_update_o.content = pte[i]; + end else if (dtlb_req_q) begin + dtlb_update_o.valid = 1'b1; + dtlb_update_o.vpn = dtlb_vpn_q; + dtlb_update_o.is_4M = shared_tag_rd[i].is_4M; + dtlb_update_o.asid = tlb_update_asid_q; + dtlb_update_o.content = pte[i]; + end + end + end + end + end //tag_comparison + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + itlb_vpn_q <= '0; + dtlb_vpn_q <= '0; + tlb_update_asid_q <= '0; + shared_tlb_access_q <= '0; + shared_tlb_vaddr_q <= '0; + shared_tag_valid_q <= '0; + vpn0_q <= '0; + vpn1_q <= '0; + itlb_req_q <= '0; + dtlb_req_q <= '0; + shared_tag_valid <= '0; + end else begin + itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12]; + dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12]; + tlb_update_asid_q <= tlb_update_asid_d; + shared_tlb_access_q <= shared_tlb_access_d; + shared_tlb_vaddr_q <= shared_tlb_vaddr_d; + shared_tag_valid_q <= shared_tag_valid_d; + vpn0_q <= vpn0_d; + vpn1_q <= vpn1_d; + itlb_req_q <= itlb_req_d; + dtlb_req_q <= dtlb_req_d; + shared_tag_valid <= shared_tag_valid_q[tag_rd_addr]; + end + end + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + shared_tag_valid_d = shared_tag_valid_q; + tag_wr_en = '0; + pte_wr_en = '0; + + if (flush_i) begin + shared_tag_valid_d = '0; + end else if (shared_tlb_update_i.valid) begin + for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin + if (repl_way_oh_d[i]) begin + shared_tag_valid_d[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]][i] = 1'b1; + tag_wr_en[i] = 1'b1; + pte_wr_en[i] = 1'b1; + end + end + end + end //update_flush + + assign shared_tag_wr.asid = shared_tlb_update_i.asid; + assign shared_tag_wr.vpn1 = shared_tlb_update_i.vpn[19:10]; + assign shared_tag_wr.vpn0 = shared_tlb_update_i.vpn[9:0]; + assign shared_tag_wr.is_4M = shared_tlb_update_i.is_4M; + + assign tag_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]; + assign tag_wr_data = shared_tag_wr; + + assign pte_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]; + assign pte_wr_data = shared_tlb_update_i.content; + + assign way_valid = shared_tag_valid_q[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]]; + assign repl_way = (all_ways_valid) ? rnd_way : inv_way; + assign update_lfsr = shared_tlb_update_i.valid & all_ways_valid; + assign repl_way_oh_d = (shared_tlb_update_i.valid) ? shared_tlb_way_bin2oh(repl_way) : '0; + + lzc #( + .WIDTH(SHARED_TLB_WAYS) + ) i_lzc ( + .in_i (~way_valid), + .cnt_o (inv_way), + .empty_o(all_ways_valid) + ); + + lfsr #( + .LfsrWidth(8), + .OutWidth ($clog2(SHARED_TLB_WAYS)) + ) i_lfsr ( + .clk_i (clk_i), + .rst_ni(rst_ni), + .en_i (update_lfsr), + .out_o (rnd_way) + ); + + /////////////////////////////////////////////////////// + // memory arrays and regs + /////////////////////////////////////////////////////// + + assign tag_req = tag_wr_en | tag_rd_en; + assign tag_we = tag_wr_en; + assign tag_addr = tag_wr_en ? tag_wr_addr : tag_rd_addr; + + assign pte_req = pte_wr_en | pte_rd_en; + assign pte_we = pte_wr_en; + assign pte_addr = pte_wr_en ? pte_wr_addr : pte_rd_addr; + + for (genvar i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_sram + // Tag RAM + sram #( + .DATA_WIDTH($bits(shared_tag_t)), + .NUM_WORDS (SHARED_TLB_DEPTH) + ) tag_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (tag_req[i]), + .we_i (tag_we[i]), + .addr_i (tag_addr), + .wuser_i('0), + .wdata_i(tag_wr_data), + .be_i ('1), + .ruser_o(), + .rdata_o(tag_rd_data[i]) + ); + + assign shared_tag_rd[i] = shared_tag_t'(tag_rd_data[i]); + + // PTE RAM + sram #( + .DATA_WIDTH($bits(riscv::pte_sv32_t)), + .NUM_WORDS (SHARED_TLB_DEPTH) + ) pte_sram ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (pte_req[i]), + .we_i (pte_we[i]), + .addr_i (pte_addr), + .wuser_i('0), + .wdata_i(pte_wr_data), + .be_i ('1), + .ruser_o(), + .rdata_o(pte_rd_data[i]) + ); + assign pte[i] = riscv::pte_sv32_t'(pte_rd_data[i]); + end +endmodule + +/* verilator lint_on WIDTH */ diff --git a/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv b/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv new file mode 100644 index 00000000..79a7c98d --- /dev/null +++ b/test/type_param/core/mmu_sv32/cva6_tlb_sv32.sv @@ -0,0 +1,281 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Translation Lookaside Buffer, Sv32 , fully set-associative +// This module is an adaptation of the Sv39 TLB developed +// by Florian Zaruba and David Schaffenrath to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq TLB Sv32 for CV32A6 +// =========================================================================== // + +module cva6_tlb_sv32 + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned TLB_ENTRIES = 4, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // Flush signal + // Update TLB + input tlb_update_sv32_t update_i, + // Lookup signals + input logic lu_access_i, + input logic [ASID_WIDTH-1:0] lu_asid_i, + input logic [riscv::VLEN-1:0] lu_vaddr_i, + output riscv::pte_sv32_t lu_content_o, + input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + output logic lu_is_4M_o, + output logic lu_hit_o +); + + // Sv32 defines two levels of page tables + struct packed { + logic [8:0] asid; //9 bits wide + logic [9:0] vpn1; //10 bits wide + logic [9:0] vpn0; //10 bits wide + logic is_4M; + logic valid; + } [TLB_ENTRIES-1:0] + tags_q, tags_n; + + riscv::pte_sv32_t [TLB_ENTRIES-1:0] content_q, content_n; + logic [9:0] vpn0, vpn1; + logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic + logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy + //------------- + // Translation + //------------- + always_comb begin : translation + vpn0 = lu_vaddr_i[21:12]; + vpn1 = lu_vaddr_i[31:22]; + + + // default assignment + lu_hit = '{default: 0}; + lu_hit_o = 1'b0; + lu_content_o = '{default: 0}; + lu_is_4M_o = 1'b0; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + // first level match, this may be a mega page, check the ASID flags as well + // if the entry is associated to a global address, don't match the ASID (ASID is don't care) + if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[ASID_WIDTH-1:0]) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin + if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin + lu_is_4M_o = tags_q[i].is_4M; + lu_content_o = content_q[i]; + lu_hit_o = 1'b1; + lu_hit[i] = 1'b1; + end + end + end + end + + logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high + logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high + logic [TLB_ENTRIES-1:0] vaddr_vpn0_match; + logic [TLB_ENTRIES-1:0] vaddr_vpn1_match; + + + assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i); + assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i); + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + tags_n = tags_q; + content_n = content_q; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + + vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[21:12] == tags_q[i].vpn0); + vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[31:22] == tags_q[i].vpn1); + + if (flush_i) begin + // invalidate logic + // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case) + if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0; + // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages + else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) + else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) + else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // normal replacement + end else if (update_i.valid & replace_en[i]) begin + // update tag array + tags_n[i] = '{ + asid: update_i.asid, + vpn1: update_i.vpn[19:10], + vpn0: update_i.vpn[9:0], + is_4M: update_i.is_4M, + valid: 1'b1 + }; + // and content as well + content_n[i] = update_i.content; + end + end + end + + // ----------------------------------------------- + // PLRU - Pseudo Least Recently Used Replacement + // ----------------------------------------------- + logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n; + logic en; + int unsigned idx_base, shift, new_index; + always_comb begin : plru_replacement + plru_tree_n = plru_tree_q; + en = '0; + idx_base = '0; + shift = '0; + new_index = '0; + // The PLRU-tree indexing: + // lvl0 0 + // / \ + // / \ + // lvl1 1 2 + // / \ / \ + // lvl2 3 4 5 6 + // / \ /\/\ /\ + // ... ... ... ... + // Just predefine which nodes will be set/cleared + // E.g. for a TLB with 8 entries, the for-loop is semantically + // equivalent to the following pseudo-code: + // unique case (1'b1) + // lu_hit[7]: plru_tree_n[0, 2, 6] = {1, 1, 1}; + // lu_hit[6]: plru_tree_n[0, 2, 6] = {1, 1, 0}; + // lu_hit[5]: plru_tree_n[0, 2, 5] = {1, 0, 1}; + // lu_hit[4]: plru_tree_n[0, 2, 5] = {1, 0, 0}; + // lu_hit[3]: plru_tree_n[0, 1, 4] = {0, 1, 1}; + // lu_hit[2]: plru_tree_n[0, 1, 4] = {0, 1, 0}; + // lu_hit[1]: plru_tree_n[0, 1, 3] = {0, 0, 1}; + // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0}; + // default: begin /* No hit */ end + // endcase + for ( + int unsigned i = 0; i < TLB_ENTRIES; i++ + ) begin + // we got a hit so update the pointer as it was least recently used + if (lu_hit[i] & lu_access_i) begin + // Set the nodes to the values we would expect + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + // to circumvent the 32 bit integer arithmetic assignment + new_index = ~((i >> (shift - 1)) & 32'b1); + plru_tree_n[idx_base+(i>>shift)] = new_index[0]; + end + end + end + // Decode tree to write enable signals + // Next for-loop basically creates the following logic for e.g. an 8 entry + // TLB (note: pseudo-code obviously): + // replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1} + // replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0} + // replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1} + // replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0} + // replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1} + // replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0} + // replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1} + // replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0} + // For each entry traverse the tree. If every tree-node matches, + // the corresponding bit of the entry's index, this is + // the next entry to replace. + for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin + en = 1'b1; + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + + // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1); + new_index = (i >> (shift - 1)) & 32'b1; + if (new_index[0]) begin + en &= plru_tree_q[idx_base+(i>>shift)]; + end else begin + en &= ~plru_tree_q[idx_base+(i>>shift)]; + end + end + replace_en[i] = en; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + tags_q <= '{default: 0}; + content_q <= '{default: 0}; + plru_tree_q <= '{default: 0}; + end else begin + tags_q <= tags_n; + content_q <= content_n; + plru_tree_q <= plru_tree_n; + end + end + //-------------- + // Sanity checks + //-------------- + + //pragma translate_off +`ifndef VERILATOR + + initial begin : p_assertions + assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1)) + else begin + $error("TLB size must be a multiple of 2 and greater than 1"); + $stop(); + end + assert (ASID_WIDTH >= 1) + else begin + $error("ASID width must be at least 1"); + $stop(); + end + end + + // Just for checking + function int countSetBits(logic [TLB_ENTRIES-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1)) + else begin + $error("More then one hit in TLB!"); + $stop(); + end + assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1)) + else begin + $error("More then one TLB entry selected for next replace!"); + $stop(); + end + +`endif + //pragma translate_on + +endmodule diff --git a/test/type_param/core/mmu_sv39/mmu.sv b/test/type_param/core/mmu_sv39/mmu.sv new file mode 100644 index 00000000..39e9f343 --- /dev/null +++ b/test/type_param/core/mmu_sv39/mmu.sv @@ -0,0 +1,519 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19/04/2017 +// Description: Memory Management Unit for Ariane, contains TLB and +// address translation unit. SV39 as defined in RISC-V +// privilege specification 1.11-WIP + + +module mmu + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned INSTR_TLB_ENTRIES = 4, + parameter int unsigned DATA_TLB_ENTRIES = 4, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic enable_translation_i, + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + // IF interface + input icache_arsp_t icache_areq_i, + output icache_areq_t icache_areq_o, + // LSU interface + // this is a more minimalistic interface because the actual addressing logic is handled + // in the LSU as we distinguish load and stores, what we do here is simple address translation + input exception_t misaligned_ex_i, + input logic lsu_req_i, // request address translation + input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in + input logic lsu_is_store_i, // the translation is requested by a store + // if we need to walk the page table we can't grant in the same cycle + // Cycle 0 + output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB + output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) + // Cycle 1 + output logic lsu_valid_o, // translation is valid + output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address + output exception_t lsu_exception_o, // address translation threw an exception + // General control signals + input riscv::priv_lvl_t priv_lvl_i, + input riscv::priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input logic mxr_i, + // input logic flag_mprv_i, + input logic [riscv::PPNW-1:0] satp_ppn_i, + input logic [ASID_WIDTH-1:0] asid_i, + input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic flush_tlb_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i +); + + logic iaccess_err; // insufficient privilege to access this instruction page + logic daccess_err; // insufficient privilege to access this data page + logic ptw_active; // PTW is currently walking a page table + logic walking_instr; // PTW is walking because of an ITLB miss + logic ptw_error; // PTW threw an exception + logic ptw_access_exception; // PTW threw an access exception (PMPs) + logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr + + logic [riscv::VLEN-1:0] update_vaddr; + tlb_update_t update_ptw_itlb, update_ptw_dtlb; + + logic itlb_lu_access; + riscv::pte_t itlb_content; + logic itlb_is_2M; + logic itlb_is_1G; + logic itlb_lu_hit; + + logic dtlb_lu_access; + riscv::pte_t dtlb_content; + logic dtlb_is_2M; + logic dtlb_is_1G; + logic dtlb_lu_hit; + + + // Assignments + assign itlb_lu_access = icache_areq_i.fetch_req; + assign dtlb_lu_access = lsu_req_i; + + + tlb #( + .CVA6Cfg (CVA6Cfg), + .TLB_ENTRIES(INSTR_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_itlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .update_i(update_ptw_itlb), + + .lu_access_i (itlb_lu_access), + .lu_asid_i (asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (icache_areq_i.fetch_vaddr), + .lu_content_o (itlb_content), + + .lu_is_2M_o(itlb_is_2M), + .lu_is_1G_o(itlb_is_1G), + .lu_hit_o (itlb_lu_hit) + ); + + tlb #( + .CVA6Cfg (CVA6Cfg), + .TLB_ENTRIES(DATA_TLB_ENTRIES), + .ASID_WIDTH (ASID_WIDTH) + ) i_dtlb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(flush_tlb_i), + + .update_i(update_ptw_dtlb), + + .lu_access_i (dtlb_lu_access), + .lu_asid_i (asid_i), + .asid_to_be_flushed_i (asid_to_be_flushed_i), + .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i), + .lu_vaddr_i (lsu_vaddr_i), + .lu_content_o (dtlb_content), + + .lu_is_2M_o(dtlb_is_2M), + .lu_is_1G_o(dtlb_is_1G), + .lu_hit_o (dtlb_lu_hit) + ); + + + ptw #( + .CVA6Cfg (CVA6Cfg), + .ASID_WIDTH(ASID_WIDTH) + ) i_ptw ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .ptw_active_o (ptw_active), + .walking_instr_o (walking_instr), + .ptw_error_o (ptw_error), + .ptw_access_exception_o(ptw_access_exception), + .enable_translation_i (enable_translation_i), + + .update_vaddr_o(update_vaddr), + .itlb_update_o (update_ptw_itlb), + .dtlb_update_o (update_ptw_dtlb), + + .itlb_access_i(itlb_lu_access), + .itlb_hit_i (itlb_lu_hit), + .itlb_vaddr_i (icache_areq_i.fetch_vaddr), + + .dtlb_access_i(dtlb_lu_access), + .dtlb_hit_i (dtlb_lu_hit), + .dtlb_vaddr_i (lsu_vaddr_i), + + .req_port_i (req_port_i), + .req_port_o (req_port_o), + .pmpcfg_i, + .pmpaddr_i, + .bad_paddr_o(ptw_bad_paddr), + .* + ); + + // ila_1 i_ila_1 ( + // .clk(clk_i), // input wire clk + // .probe0({req_port_o.address_tag, req_port_o.address_index}), + // .probe1(req_port_o.data_req), // input wire [63:0] probe1 + // .probe2(req_port_i.data_gnt), // input wire [0:0] probe2 + // .probe3(req_port_i.data_rdata), // input wire [0:0] probe3 + // .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4 + // .probe5(ptw_error), // input wire [1:0] probe5 + // .probe6(update_vaddr), // input wire [0:0] probe6 + // .probe7(update_ptw_itlb.valid), // input wire [0:0] probe7 + // .probe8(update_ptw_dtlb.valid), // input wire [0:0] probe8 + // .probe9(dtlb_lu_access), // input wire [0:0] probe9 + // .probe10(lsu_vaddr_i), // input wire [0:0] probe10 + // .probe11(dtlb_lu_hit), // input wire [0:0] probe11 + // .probe12(itlb_lu_access), // input wire [0:0] probe12 + // .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13 + // .probe14(itlb_lu_hit) // input wire [0:0] probe13 + // ); + + //----------------------- + // Instruction Interface + //----------------------- + logic match_any_execute_region; + logic pmp_instr_allow; + + // The instruction interface is a simple request response interface + always_comb begin : instr_interface + // MMU disabled: just pass through + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation + // two potential exception sources: + // 1. HPTW threw an exception -> signal with a page fault exception + // 2. We got an access error because of insufficient permissions -> throw an access exception + icache_areq_o.fetch_exception = '0; + // Check whether we are allowed to access this memory region from a fetch perspective + iaccess_err = icache_areq_i.fetch_req && enable_translation_i + && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u) + || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u)); + + // MMU enabled: address from TLB, request delayed until hit. Error when TLB + // hit and no access right or TLB hit and translated address not valid (e.g. + // AXI decode error), or when PTW performs walk due to ITLB miss and raises + // an error. + if (enable_translation_i) begin + // we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal + if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + end + + icache_areq_o.fetch_valid = 1'b0; + + // 4K page + icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]}; + // Mega page + if (itlb_is_2M) begin + icache_areq_o.fetch_paddr[20:12] = icache_areq_i.fetch_vaddr[20:12]; + end + // Giga page + if (itlb_is_1G) begin + icache_areq_o.fetch_paddr[29:12] = icache_areq_i.fetch_vaddr[29:12]; + end + + // --------- + // ITLB Hit + // -------- + // if we hit the ITLB output the request signal immediately + if (itlb_lu_hit) begin + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + // we got an access error + if (iaccess_err) begin + // throw a page fault + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + end else if (!pmp_instr_allow) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + 1'b1 + }; + end + end else + // --------- + // ITLB Miss + // --------- + // watch out for exceptions happening during walking the page table + if (ptw_active && walking_instr) begin + icache_areq_o.fetch_valid = ptw_error | ptw_access_exception; + if (ptw_error) + icache_areq_o.fetch_exception = { + riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + }; + else + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + }; + end + end + // if it didn't match any execute region throw an `Instruction Access Fault` + // or: if we are not translating, check PMPs immediately on the paddr + if ((!match_any_execute_region && !ptw_error) || (!enable_translation_i && !pmp_instr_allow)) begin + icache_areq_o.fetch_exception = { + riscv::INSTR_ACCESS_FAULT, + {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}, + 1'b1 + }; + end + end + + // check for execute flag on memory + assign match_any_execute_region = config_pkg::is_inside_execute_regions( + CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr} + ); + + // Instruction fetch + pmp #( + .CVA6Cfg (CVA6Cfg), + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_if ( + .addr_i (icache_areq_o.fetch_paddr), + .priv_lvl_i, + // we will always execute on the instruction fetch port + .access_type_i(riscv::ACCESS_EXEC), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_instr_allow) + ); + + //----------------------- + // Data Interface + //----------------------- + logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; + riscv::pte_t dtlb_pte_n, dtlb_pte_q; + exception_t misaligned_ex_n, misaligned_ex_q; + logic lsu_req_n, lsu_req_q; + logic lsu_is_store_n, lsu_is_store_q; + logic dtlb_hit_n, dtlb_hit_q; + logic dtlb_is_2M_n, dtlb_is_2M_q; + logic dtlb_is_1G_n, dtlb_is_1G_q; + + // check if we need to do translation or if we are always ready (e.g.: we are not translating anything) + assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1; + + // Wires to PMP checks + riscv::pmp_access_t pmp_access_type; + logic pmp_data_allow; + localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1; + // The data interface is simpler and only consists of a request/response interface + always_comb begin : data_interface + // save request and DTLB response + lsu_vaddr_n = lsu_vaddr_i; + lsu_req_n = lsu_req_i; + misaligned_ex_n = misaligned_ex_i; + dtlb_pte_n = dtlb_content; + dtlb_hit_n = dtlb_lu_hit; + lsu_is_store_n = lsu_is_store_i; + dtlb_is_2M_n = dtlb_is_2M; + dtlb_is_1G_n = dtlb_is_1G; + + lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0]; + lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PLEN-1:12]; + lsu_valid_o = lsu_req_q; + lsu_exception_o = misaligned_ex_q; + pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ; + + // mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions + misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i; + + // Check if the User flag is set, then we may only access it in supervisor mode + // if SUM is enabled + daccess_err = en_ld_st_translation_i && ((ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode + (ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u)); // this is not a user page but we are in user mode and trying to access it + // translation is enabled and no misaligned exception occurred + if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin + lsu_valid_o = 1'b0; + // 4K page + lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]}; + lsu_dtlb_ppn_o = dtlb_content.ppn; + // Mega page + if (dtlb_is_2M_q) begin + lsu_paddr_o[20:12] = lsu_vaddr_q[20:12]; + lsu_dtlb_ppn_o[20:12] = lsu_vaddr_n[20:12]; + end + // Giga page + if (dtlb_is_1G_q) begin + lsu_paddr_o[PPNWMin:12] = lsu_vaddr_q[PPNWMin:12]; + lsu_dtlb_ppn_o[PPNWMin:12] = lsu_vaddr_n[PPNWMin:12]; + end + // --------- + // DTLB Hit + // -------- + if (dtlb_hit_q && lsu_req_q) begin + lsu_valid_o = 1'b1; + // exception priority: + // PAGE_FAULTS have higher priority than ACCESS_FAULTS + // virtual memory based exceptions are PAGE_FAULTS + // physical memory based exceptions are ACCESS_FAULTS (PMA/PMP) + + // this is a store + if (lsu_is_store_q) begin + // check if the page is write-able and we are not violating privileges + // also check if the dirty flag is set + if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + end + + // this is a load + end else begin + // check for sufficient access privileges - throw a page fault if necessary + if (daccess_err) begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + 1'b1 + }; + end + end + end else + + // --------- + // DTLB Miss + // --------- + // watch out for exceptions + if (ptw_active && !walking_instr) begin + // page table walker threw an exception + if (ptw_error) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + if (lsu_is_store_q) begin + lsu_exception_o = { + riscv::STORE_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LOAD_PAGE_FAULT, + {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + 1'b1 + }; + end + end + + if (ptw_access_exception) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // Any fault of the page table walk should be based of the original access type + if (lsu_is_store_q) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1 + }; + end + end + end + end // If translation is not enabled, check the paddr immediately against PMPs + else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin + if (lsu_is_store_q) begin + lsu_exception_o = { + riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1 + }; + end else begin + lsu_exception_o = { + riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1 + }; + end + end + end + + // Load/store PMP check + pmp #( + .CVA6Cfg (CVA6Cfg), + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_data ( + .addr_i (lsu_paddr_o), + .priv_lvl_i (ld_st_priv_lvl_i), + .access_type_i(pmp_access_type), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (pmp_data_allow) + ); + + // ---------- + // Registers + // ---------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + lsu_vaddr_q <= '0; + lsu_req_q <= '0; + misaligned_ex_q <= '0; + dtlb_pte_q <= '0; + dtlb_hit_q <= '0; + lsu_is_store_q <= '0; + dtlb_is_2M_q <= '0; + dtlb_is_1G_q <= '0; + end else begin + lsu_vaddr_q <= lsu_vaddr_n; + lsu_req_q <= lsu_req_n; + misaligned_ex_q <= misaligned_ex_n; + dtlb_pte_q <= dtlb_pte_n; + dtlb_hit_q <= dtlb_hit_n; + lsu_is_store_q <= lsu_is_store_n; + dtlb_is_2M_q <= dtlb_is_2M_n; + dtlb_is_1G_q <= dtlb_is_1G_n; + end + end +endmodule diff --git a/test/type_param/core/mmu_sv39/ptw.sv b/test/type_param/core/mmu_sv39/ptw.sv new file mode 100644 index 00000000..2d0e3780 --- /dev/null +++ b/test/type_param/core/mmu_sv39/ptw.sv @@ -0,0 +1,409 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: David Schaffenrath, TU Graz +// Author: Florian Zaruba, ETH Zurich +// Date: 24.4.2017 +// Description: Hardware-PTW + +/* verilator lint_off WIDTH */ + +module ptw + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush everything, we need to do this because + // actually everything we do is speculative at this stage + // e.g.: there could be a CSR instruction that changes everything + output logic ptw_active_o, + output logic walking_instr_o, // set when walking for TLB + output logic ptw_error_o, // set when an error occurred + output logic ptw_access_exception_o, // set when an PMP access exception occured + input logic enable_translation_i, // CSRs indicate to enable SV39 + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + + input logic lsu_is_store_i, // this translation was triggered by a store + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + + + // to TLBs, update logic + output tlb_update_t itlb_update_o, + output tlb_update_t dtlb_update_o, + + output logic [riscv::VLEN-1:0] update_vaddr_o, + + input logic [ ASID_WIDTH-1:0] asid_i, + // from TLBs + // did we miss? + input logic itlb_access_i, + input logic itlb_hit_i, + input logic [riscv::VLEN-1:0] itlb_vaddr_i, + + input logic dtlb_access_i, + input logic dtlb_hit_i, + input logic [riscv::VLEN-1:0] dtlb_vaddr_i, + // from CSR file + input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp + input logic mxr_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PMP + + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + output logic [riscv::PLEN-1:0] bad_paddr_o + +); + + // input registers + logic data_rvalid_q; + logic [63:0] data_rdata_q; + + riscv::pte_t pte; + assign pte = riscv::pte_t'(data_rdata_q); + + enum logic [2:0] { + IDLE, + WAIT_GRANT, + PTE_LOOKUP, + WAIT_RVALID, + PROPAGATE_ERROR, + PROPAGATE_ACCESS_ERROR + } + state_q, state_d; + + // SV39 defines three levels of page tables + enum logic [1:0] { + LVL1, + LVL2, + LVL3 + } + ptw_lvl_q, ptw_lvl_n; + + // is this an instruction page table walk? + logic is_instr_ptw_q, is_instr_ptw_n; + logic global_mapping_q, global_mapping_n; + // latched tag signal + logic tag_valid_n, tag_valid_q; + // register the ASID + logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; + // register the VPN we need to walk, SV39 defines a 39 bit virtual address + logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; + // 4 byte aligned physical pointer + logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n; + + // Assignments + assign update_vaddr_o = vaddr_q; + + assign ptw_active_o = (state_q != IDLE); + assign walking_instr_o = is_instr_ptw_q; + // directly output the correct physical address + assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; + // we are never going to kill this request + assign req_port_o.kill_req = '0; + // we are never going to write with the HPTW + assign req_port_o.data_wdata = 64'b0; + // we only issue one single request at a time + assign req_port_o.data_id = '0; + // ----------- + // TLB Update + // ----------- + assign itlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]}; + assign dtlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]}; + // update the correct page table level + assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2); + assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1); + assign dtlb_update_o.is_2M = (ptw_lvl_q == LVL2); + assign dtlb_update_o.is_1G = (ptw_lvl_q == LVL1); + // output the correct ASID + assign itlb_update_o.asid = tlb_update_asid_q; + assign dtlb_update_o.asid = tlb_update_asid_q; + // set the global mapping bit + assign itlb_update_o.content = pte | (global_mapping_q << 5); + assign dtlb_update_o.content = pte | (global_mapping_q << 5); + + assign req_port_o.tag_valid = tag_valid_q; + + logic allow_access; + + assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0; + + pmp #( + .CVA6Cfg (CVA6Cfg), + .PLEN (riscv::PLEN), + .PMP_LEN (riscv::PLEN - 2), + .NR_ENTRIES(CVA6Cfg.NrPMPEntries) + ) i_pmp_ptw ( + .addr_i (ptw_pptr_q), + // PTW access are always checked as if in S-Mode... + .priv_lvl_i (riscv::PRIV_LVL_S), + // ...and they are always loads + .access_type_i(riscv::ACCESS_READ), + // Configuration + .conf_addr_i (pmpaddr_i), + .conf_i (pmpcfg_i), + .allow_o (allow_access) + ); + + //------------------- + // Page table walker + //------------------- + // A virtual address va is translated into a physical address pa as follows: + // 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, + // PAGESIZE=2^12 and LEVELS=3.) + // 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For + // Sv32, PTESIZE=4.) + // 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access + // exception. + // 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5. + // Otherwise, this PTE is a pointer to the next level of the page table. + // Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let + // a = pte.ppn × PAGESIZE and go to step 2. + // 5. A leaf PTE has been found. Determine if the requested memory access + // is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and + // raise an access exception. Otherwise, the translation is successful. + // Set pte.a to 1, and, if the memory access is a store, set pte.d to 1. + // The translated physical address is given as follows: + // - pa.pgoff = va.pgoff. + // - If i > 0, then this is a superpage translation and + // pa.ppn[i-1:0] = va.vpn[i-1:0]. + // - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + always_comb begin : ptw + // default assignments + // PTW memory interface + tag_valid_n = 1'b0; + req_port_o.data_req = 1'b0; + req_port_o.data_be = 8'hFF; + req_port_o.data_size = 2'b11; + req_port_o.data_we = 1'b0; + ptw_error_o = 1'b0; + ptw_access_exception_o = 1'b0; + itlb_update_o.valid = 1'b0; + dtlb_update_o.valid = 1'b0; + is_instr_ptw_n = is_instr_ptw_q; + ptw_lvl_n = ptw_lvl_q; + ptw_pptr_n = ptw_pptr_q; + state_d = state_q; + global_mapping_n = global_mapping_q; + // input registers + tlb_update_asid_n = tlb_update_asid_q; + vaddr_n = vaddr_q; + + itlb_miss_o = 1'b0; + dtlb_miss_o = 1'b0; + + case (state_q) + + IDLE: begin + // by default we start with the top-most page table + ptw_lvl_n = LVL1; + global_mapping_n = 1'b0; + is_instr_ptw_n = 1'b0; + // if we got an ITLB miss + if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin + ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:30], 3'b0}; + is_instr_ptw_n = 1'b1; + tlb_update_asid_n = asid_i; + vaddr_n = itlb_vaddr_i; + state_d = WAIT_GRANT; + itlb_miss_o = 1'b1; + // we got an DTLB miss + end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin + ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:30], 3'b0}; + tlb_update_asid_n = asid_i; + vaddr_n = dtlb_vaddr_i; + state_d = WAIT_GRANT; + dtlb_miss_o = 1'b1; + end + end + + WAIT_GRANT: begin + // send a request out + req_port_o.data_req = 1'b1; + // wait for the WAIT_GRANT + if (req_port_i.data_gnt) begin + // send the tag valid signal one cycle later + tag_valid_n = 1'b1; + state_d = PTE_LOOKUP; + end + end + + PTE_LOOKUP: begin + // we wait for the valid signal + if (data_rvalid_q) begin + + // check if the global mapping bit is set + if (pte.g) global_mapping_n = 1'b1; + + // ------------- + // Invalid PTE + // ------------- + // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception. + if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR; + // ----------- + // Valid PTE + // ----------- + else begin + state_d = IDLE; + // it is a valid PTE + // if pte.r = 1 or pte.x = 1 it is a valid PTE + if (pte.r || pte.x) begin + // Valid translation found (either 1G, 2M or 4K entry) + if (is_instr_ptw_q) begin + // ------------ + // Update ITLB + // ------------ + // If page is not executable, we can directly raise an error. This + // doesn't put a useless entry into the TLB. The same idea applies + // to the access flag since we let the access flag be managed by SW. + if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR; + else itlb_update_o.valid = 1'b1; + + end else begin + // ------------ + // Update DTLB + // ------------ + // Check if the access flag has been set, otherwise throw a page-fault + // and let the software handle those bits. + // If page is not readable (there are no write-only pages) + // we can directly raise an error. This doesn't put a useless + // entry into the TLB. + if (pte.a && (pte.r || (pte.x && mxr_i))) begin + dtlb_update_o.valid = 1'b1; + end else begin + state_d = PROPAGATE_ERROR; + end + // Request is a store: perform some additional checks + // If the request was a store and the page is not write-able, raise an error + // the same applies if the dirty flag is not set + if (lsu_is_store_i && (!pte.w || !pte.d)) begin + dtlb_update_o.valid = 1'b0; + state_d = PROPAGATE_ERROR; + end + end + // check if the ppn is correctly aligned: + // 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault + // exception. + if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin + state_d = PROPAGATE_ERROR; + dtlb_update_o.valid = 1'b0; + itlb_update_o.valid = 1'b0; + end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin + state_d = PROPAGATE_ERROR; + dtlb_update_o.valid = 1'b0; + itlb_update_o.valid = 1'b0; + end + // this is a pointer to the next TLB level + end else begin + // pointer to next level of page table + if (ptw_lvl_q == LVL1) begin + // we are in the second level now + ptw_lvl_n = LVL2; + ptw_pptr_n = {pte.ppn, vaddr_q[29:21], 3'b0}; + end + + if (ptw_lvl_q == LVL2) begin + // here we received a pointer to the third level + ptw_lvl_n = LVL3; + ptw_pptr_n = {pte.ppn, vaddr_q[20:12], 3'b0}; + end + + state_d = WAIT_GRANT; + + if (ptw_lvl_q == LVL3) begin + // Should already be the last level page table => Error + ptw_lvl_n = LVL3; + state_d = PROPAGATE_ERROR; + end + end + end + + // Check if this access was actually allowed from a PMP perspective + if (!allow_access) begin + itlb_update_o.valid = 1'b0; + dtlb_update_o.valid = 1'b0; + // we have to return the failed address in bad_addr + ptw_pptr_n = ptw_pptr_q; + state_d = PROPAGATE_ACCESS_ERROR; + end + end + // we've got a data WAIT_GRANT so tell the cache that the tag is valid + end + // Propagate error to MMU/LSU + PROPAGATE_ERROR: begin + state_d = IDLE; + ptw_error_o = 1'b1; + end + PROPAGATE_ACCESS_ERROR: begin + state_d = IDLE; + ptw_access_exception_o = 1'b1; + end + // wait for the rvalid before going back to IDLE + WAIT_RVALID: begin + if (data_rvalid_q) state_d = IDLE; + end + default: begin + state_d = IDLE; + end + endcase + + // ------- + // Flush + // ------- + // should we have flushed before we got an rvalid, wait for it until going back to IDLE + if (flush_i) begin + // on a flush check whether we are + // 1. in the PTE Lookup check whether we still need to wait for an rvalid + // 2. waiting for a grant, if so: wait for it + // if not, go back to idle + if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) || + ((state_q == WAIT_GRANT) && req_port_i.data_gnt)) + state_d = WAIT_RVALID; + else state_d = IDLE; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + is_instr_ptw_q <= 1'b0; + ptw_lvl_q <= LVL1; + tag_valid_q <= 1'b0; + tlb_update_asid_q <= '0; + vaddr_q <= '0; + ptw_pptr_q <= '0; + global_mapping_q <= 1'b0; + data_rdata_q <= '0; + data_rvalid_q <= 1'b0; + end else begin + state_q <= state_d; + ptw_pptr_q <= ptw_pptr_n; + is_instr_ptw_q <= is_instr_ptw_n; + ptw_lvl_q <= ptw_lvl_n; + tag_valid_q <= tag_valid_n; + tlb_update_asid_q <= tlb_update_asid_n; + vaddr_q <= vaddr_n; + global_mapping_q <= global_mapping_n; + data_rdata_q <= req_port_i.data_rdata; + data_rvalid_q <= req_port_i.data_rvalid; + end + end + +endmodule +/* verilator lint_on WIDTH */ diff --git a/test/type_param/core/mmu_sv39/tlb.sv b/test/type_param/core/mmu_sv39/tlb.sv new file mode 100644 index 00000000..3df2cb01 --- /dev/null +++ b/test/type_param/core/mmu_sv39/tlb.sv @@ -0,0 +1,290 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: David Schaffenrath, TU Graz +// Author: Florian Zaruba, ETH Zurich +// Date: 21.4.2017 +// Description: Translation Lookaside Buffer, SV39 +// fully set-associative + + +module tlb + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned TLB_ENTRIES = 4, + parameter int unsigned ASID_WIDTH = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // Flush signal + // Update TLB + input tlb_update_t update_i, + // Lookup signals + input logic lu_access_i, + input logic [ ASID_WIDTH-1:0] lu_asid_i, + input logic [riscv::VLEN-1:0] lu_vaddr_i, + output riscv::pte_t lu_content_o, + input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + output logic lu_is_2M_o, + output logic lu_is_1G_o, + output logic lu_hit_o +); + + // SV39 defines three levels of page tables + struct packed { + logic [ASID_WIDTH-1:0] asid; + logic [riscv::VPN2:0] vpn2; + logic [8:0] vpn1; + logic [8:0] vpn0; + logic is_2M; + logic is_1G; + logic valid; + } [TLB_ENTRIES-1:0] + tags_q, tags_n; + + riscv::pte_t [TLB_ENTRIES-1:0] content_q, content_n; + logic [8:0] vpn0, vpn1; + logic [ riscv::VPN2:0] vpn2; + logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic + logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy + //------------- + // Translation + //------------- + always_comb begin : translation + vpn0 = lu_vaddr_i[20:12]; + vpn1 = lu_vaddr_i[29:21]; + vpn2 = lu_vaddr_i[30+riscv::VPN2:30]; + + // default assignment + lu_hit = '{default: 0}; + lu_hit_o = 1'b0; + lu_content_o = '{default: 0}; + lu_is_1G_o = 1'b0; + lu_is_2M_o = 1'b0; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + // first level match, this may be a giga page, check the ASID flags as well + // if the entry is associated to a global address, don't match the ASID (ASID is don't care) + if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid) || content_q[i].g) && vpn2 == tags_q[i].vpn2) begin + // second level + if (tags_q[i].is_1G) begin + lu_is_1G_o = 1'b1; + lu_content_o = content_q[i]; + lu_hit_o = 1'b1; + lu_hit[i] = 1'b1; + // not a giga page hit so check further + end else if (vpn1 == tags_q[i].vpn1) begin + // this could be a 2 mega page hit or a 4 kB hit + // output accordingly + if (tags_q[i].is_2M || vpn0 == tags_q[i].vpn0) begin + lu_is_2M_o = tags_q[i].is_2M; + lu_content_o = content_q[i]; + lu_hit_o = 1'b1; + lu_hit[i] = 1'b1; + end + end + end + end + end + + + + logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high + logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high + logic [TLB_ENTRIES-1:0] vaddr_vpn0_match; + logic [TLB_ENTRIES-1:0] vaddr_vpn1_match; + logic [TLB_ENTRIES-1:0] vaddr_vpn2_match; + + assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i); + assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i); + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + tags_n = tags_q; + content_n = content_q; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + + vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[20:12] == tags_q[i].vpn0); + vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[29:21] == tags_q[i].vpn1); + vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+riscv::VPN2:30] == tags_q[i].vpn2); + + if (flush_i) begin + // invalidate logic + // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case) + if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0; + // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages + else if (asid_to_be_flushed_is0 && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M) ) && (~vaddr_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) + else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) + else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // normal replacement + end else if (update_i.valid & replace_en[i]) begin + // update tag array + tags_n[i] = '{ + asid: update_i.asid, + vpn2: update_i.vpn[18+riscv::VPN2:18], + vpn1: update_i.vpn[17:9], + vpn0: update_i.vpn[8:0], + is_1G: update_i.is_1G, + is_2M: update_i.is_2M, + valid: 1'b1 + }; + // and content as well + content_n[i] = update_i.content; + end + end + end + + // ----------------------------------------------- + // PLRU - Pseudo Least Recently Used Replacement + // ----------------------------------------------- + logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n; + always_comb begin : plru_replacement + plru_tree_n = plru_tree_q; + // The PLRU-tree indexing: + // lvl0 0 + // / \ + // / \ + // lvl1 1 2 + // / \ / \ + // lvl2 3 4 5 6 + // / \ /\/\ /\ + // ... ... ... ... + // Just predefine which nodes will be set/cleared + // E.g. for a TLB with 8 entries, the for-loop is semantically + // equivalent to the following pseudo-code: + // unique case (1'b1) + // lu_hit[7]: plru_tree_n[0, 2, 6] = {1, 1, 1}; + // lu_hit[6]: plru_tree_n[0, 2, 6] = {1, 1, 0}; + // lu_hit[5]: plru_tree_n[0, 2, 5] = {1, 0, 1}; + // lu_hit[4]: plru_tree_n[0, 2, 5] = {1, 0, 0}; + // lu_hit[3]: plru_tree_n[0, 1, 4] = {0, 1, 1}; + // lu_hit[2]: plru_tree_n[0, 1, 4] = {0, 1, 0}; + // lu_hit[1]: plru_tree_n[0, 1, 3] = {0, 0, 1}; + // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0}; + // default: begin /* No hit */ end + // endcase + for ( + int unsigned i = 0; i < TLB_ENTRIES; i++ + ) begin + automatic int unsigned idx_base, shift, new_index; + // we got a hit so update the pointer as it was least recently used + if (lu_hit[i] & lu_access_i) begin + // Set the nodes to the values we would expect + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + // to circumvent the 32 bit integer arithmetic assignment + new_index = ~((i >> (shift - 1)) & 32'b1); + plru_tree_n[idx_base+(i>>shift)] = new_index[0]; + end + end + end + // Decode tree to write enable signals + // Next for-loop basically creates the following logic for e.g. an 8 entry + // TLB (note: pseudo-code obviously): + // replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1} + // replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0} + // replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1} + // replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0} + // replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1} + // replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0} + // replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1} + // replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0} + // For each entry traverse the tree. If every tree-node matches, + // the corresponding bit of the entry's index, this is + // the next entry to replace. + for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin + automatic logic en; + automatic int unsigned idx_base, shift, new_index; + en = 1'b1; + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2 ** lvl) - 1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + + // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1); + new_index = (i >> (shift - 1)) & 32'b1; + if (new_index[0]) begin + en &= plru_tree_q[idx_base+(i>>shift)]; + end else begin + en &= ~plru_tree_q[idx_base+(i>>shift)]; + end + end + replace_en[i] = en; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + tags_q <= '{default: 0}; + content_q <= '{default: 0}; + plru_tree_q <= '{default: 0}; + end else begin + tags_q <= tags_n; + content_q <= content_n; + plru_tree_q <= plru_tree_n; + end + end + //-------------- + // Sanity checks + //-------------- + + //pragma translate_off +`ifndef VERILATOR + + initial begin : p_assertions + assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1)) + else begin + $error("TLB size must be a multiple of 2 and greater than 1"); + $stop(); + end + assert (ASID_WIDTH >= 1) + else begin + $error("ASID width must be at least 1"); + $stop(); + end + end + + // Just for checking + function int countSetBits(logic [TLB_ENTRIES-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1)) + else begin + $error("More then one hit in TLB!"); + $stop(); + end + assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1)) + else begin + $error("More then one TLB entry selected for next replace!"); + $stop(); + end + +`endif + //pragma translate_on + +endmodule diff --git a/test/type_param/core/mult.sv b/test/type_param/core/mult.sv new file mode 100644 index 00000000..72703895 --- /dev/null +++ b/test/type_param/core/mult.sv @@ -0,0 +1,149 @@ + + +module mult + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input fu_data_t fu_data_i, + input logic mult_valid_i, + output riscv::xlen_t result_o, + output logic mult_valid_o, + output logic mult_ready_o, + output logic [TRANS_ID_BITS-1:0] mult_trans_id_o +); + logic mul_valid; + logic div_valid; + logic div_ready_i; // receiver of division result is able to accept the result + logic [TRANS_ID_BITS-1:0] mul_trans_id; + logic [TRANS_ID_BITS-1:0] div_trans_id; + riscv::xlen_t mul_result; + riscv::xlen_t div_result; + + logic div_valid_op; + logic mul_valid_op; + // Input Arbitration + + assign mul_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR }); + + assign div_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW }); + + // --------------------- + // Output Arbitration + // --------------------- + // we give precedence to multiplication as the divider supports stalling and the multiplier is + // just a dumb pipelined multiplier + assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1; + assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id; + assign result_o = (mul_valid) ? mul_result : div_result; + assign mult_valid_o = div_valid | mul_valid; + // mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests + + // --------------------- + // Multiplication + // --------------------- + multiplier #( + .CVA6Cfg(CVA6Cfg) + ) i_multiplier ( + .clk_i, + .rst_ni, + .trans_id_i (fu_data_i.trans_id), + .operation_i (fu_data_i.operation), + .operand_a_i (fu_data_i.operand_a), + .operand_b_i (fu_data_i.operand_b), + .result_o (mul_result), + .mult_valid_i (mul_valid_op), + .mult_valid_o (mul_valid), + .mult_trans_id_o(mul_trans_id), + .mult_ready_o () // this unit is unconditionally ready + ); + + // --------------------- + // Division + // --------------------- + riscv::xlen_t + operand_b, + operand_a; // input operands after input MUX (input silencing, word operations or full inputs) + riscv::xlen_t result; // result before result mux + + logic div_signed; // signed or unsigned division + logic rem; // is it a reminder (or not a reminder e.g.: a division) + logic word_op_d, word_op_q; // save whether the operation was signed or not + + // is this a signed op? + assign div_signed = fu_data_i.operation inside {DIV, DIVW, REM, REMW}; + // is this a modulo? + assign rem = fu_data_i.operation inside {REM, REMU, REMW, REMUW}; + + // prepare the input operands and control divider + always_comb begin + // silence the inputs + operand_a = '0; + operand_b = '0; + // control signals + word_op_d = word_op_q; + + // we've go a new division operation + if (mult_valid_i && fu_data_i.operation inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin + // is this a word operation? + if (riscv::IS_XLEN64 && (fu_data_i.operation == DIVW || fu_data_i.operation == DIVUW || fu_data_i.operation == REMW || fu_data_i.operation == REMUW)) begin + // yes so check if we should sign extend this is only done for a signed operation + if (div_signed) begin + operand_a = sext32(fu_data_i.operand_a[31:0]); + operand_b = sext32(fu_data_i.operand_b[31:0]); + end else begin + operand_a = fu_data_i.operand_a[31:0]; + operand_b = fu_data_i.operand_b[31:0]; + end + + // save whether we want sign extend the result or not, this is done for all word operations + word_op_d = 1'b1; + end else begin + // regular op + operand_a = fu_data_i.operand_a; + operand_b = fu_data_i.operand_b; + word_op_d = 1'b0; + end + end + end + + // --------------------- + // Serial Divider + // --------------------- + serdiv #( + .CVA6Cfg(CVA6Cfg), + .WIDTH (riscv::XLEN) + ) i_div ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .id_i (fu_data_i.trans_id), + .op_a_i (operand_a), + .op_b_i (operand_b), + .opcode_i ({rem, div_signed}), // 00: udiv, 10: urem, 01: div, 11: rem + .in_vld_i (div_valid_op), + .in_rdy_o (mult_ready_o), + .flush_i (flush_i), + .out_vld_o(div_valid), + .out_rdy_i(div_ready_i), + .id_o (div_trans_id), + .res_o (result) + ); + + // Result multiplexer + // if it was a signed word operation the bit will be set and the result will be sign extended accordingly + assign div_result = (riscv::IS_XLEN64 && word_op_q) ? sext32(result) : result; + + // --------------------- + // Registers + // --------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + word_op_q <= '0; + end else begin + word_op_q <= word_op_d; + end + end +endmodule diff --git a/test/type_param/core/multiplier.sv b/test/type_param/core/multiplier.sv new file mode 100644 index 00000000..e13d6147 --- /dev/null +++ b/test/type_param/core/multiplier.sv @@ -0,0 +1,158 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +// +// Description: Multiplication Unit with one pipeline register +// This unit relies on retiming features of the synthesizer +// + + +module multiplier + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, + input logic rst_ni, + input logic [TRANS_ID_BITS-1:0] trans_id_i, + input logic mult_valid_i, + input fu_op operation_i, + input riscv::xlen_t operand_a_i, + input riscv::xlen_t operand_b_i, + output riscv::xlen_t result_o, + output logic mult_valid_o, + output logic mult_ready_o, + output logic [TRANS_ID_BITS-1:0] mult_trans_id_o +); + // Carry-less multiplication + logic [riscv::XLEN-1:0] + clmul_q, clmul_d, clmulr_q, clmulr_d, operand_a, operand_b, operand_a_rev, operand_b_rev; + logic clmul_rmode, clmul_hmode; + + if (CVA6Cfg.RVB) begin : gen_bitmanip + // checking for clmul_rmode and clmul_hmode + assign clmul_rmode = (operation_i == CLMULR); + assign clmul_hmode = (operation_i == CLMULH); + + // operand_a and b reverse generator + for (genvar i = 0; i < riscv::XLEN; i++) begin + assign operand_a_rev[i] = operand_a_i[(riscv::XLEN-1)-i]; + assign operand_b_rev[i] = operand_b_i[(riscv::XLEN-1)-i]; + end + + // operand_a and operand_b selection + assign operand_a = (clmul_rmode | clmul_hmode) ? operand_a_rev : operand_a_i; + assign operand_b = (clmul_rmode | clmul_hmode) ? operand_b_rev : operand_b_i; + + // implementation + always_comb begin + clmul_d = '0; + for (int i = 0; i <= riscv::XLEN; i++) begin + clmul_d = (|((operand_b >> i) & 1)) ? clmul_d ^ (operand_a << i) : clmul_d; + end + end + + // clmulr + clmulh result generator + for (genvar i = 0; i < riscv::XLEN; i++) begin + assign clmulr_d[i] = clmul_d[(riscv::XLEN-1)-i]; + end + end + + // Pipeline register + logic [TRANS_ID_BITS-1:0] trans_id_q; + logic mult_valid_q; + fu_op operator_d, operator_q; + logic [riscv::XLEN*2-1:0] mult_result_d, mult_result_q; + + // control registers + logic sign_a, sign_b; + logic mult_valid; + + // control signals + assign mult_valid_o = mult_valid_q; + assign mult_trans_id_o = trans_id_q; + assign mult_ready_o = 1'b1; + + assign mult_valid = mult_valid_i && (operation_i inside {MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR}); + + // Sign Select MUX + always_comb begin + sign_a = 1'b0; + sign_b = 1'b0; + + // signed multiplication + if (operation_i == MULH) begin + sign_a = 1'b1; + sign_b = 1'b1; + // signed - unsigned multiplication + end else if (operation_i == MULHSU) begin + sign_a = 1'b1; + // unsigned multiplication + end else begin + sign_a = 1'b0; + sign_b = 1'b0; + end + end + + + // single stage version + assign mult_result_d = $signed( + {operand_a_i[riscv::XLEN-1] & sign_a, operand_a_i} + ) * $signed( + {operand_b_i[riscv::XLEN-1] & sign_b, operand_b_i} + ); + + + assign operator_d = operation_i; + + always_comb begin : p_selmux + unique case (operator_q) + MULH, MULHU, MULHSU: result_o = mult_result_q[riscv::XLEN*2-1:riscv::XLEN]; + CLMUL: result_o = clmul_q; + CLMULH: result_o = clmulr_q >> 1; + CLMULR: result_o = clmulr_q; + // MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register + default: begin + if (operator_q == MULW && riscv::IS_XLEN64) result_o = sext32(mult_result_q[31:0]); + else result_o = mult_result_q[riscv::XLEN-1:0]; // including MUL + end + endcase + end + if (CVA6Cfg.RVB) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + clmul_q <= '0; + clmulr_q <= '0; + end else begin + clmul_q <= clmul_d; + clmulr_q <= clmulr_d; + end + end + end + // ----------------------- + // Output pipeline register + // ----------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mult_valid_q <= '0; + trans_id_q <= '0; + operator_q <= MUL; + mult_result_q <= '0; + end else begin + // Input silencing + trans_id_q <= trans_id_i; + // Output Register + mult_valid_q <= mult_valid; + operator_q <= operator_d; + mult_result_q <= mult_result_d; + end + end +endmodule diff --git a/test/type_param/core/perf_counters.sv b/test/type_param/core/perf_counters.sv new file mode 100644 index 00000000..ff6d0d1e --- /dev/null +++ b/test/type_param/core/perf_counters.sv @@ -0,0 +1,226 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 06.10.2017 +// Description: Performance counters + + +module perf_counters + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned NumPorts = 3 // number of miss ports +) ( + input logic clk_i, + input logic rst_ni, + input logic debug_mode_i, // debug mode + // SRAM like interface + input logic [11:0] addr_i, // read/write address (up to 6 counters possible) + input logic we_i, // write enable + input riscv::xlen_t data_i, // data to write + output riscv::xlen_t data_o, // data to read + // from commit stage + input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // acknowledge that we are indeed committing + // from L1 caches + input logic l1_icache_miss_i, + input logic l1_dcache_miss_i, + // from MMU + input logic itlb_miss_i, + input logic dtlb_miss_i, + // from issue stage + input logic sb_full_i, + // from frontend + input logic if_empty_i, + // from PC Gen + input exception_t ex_i, + input logic eret_i, + input bp_resolve_t resolved_branch_i, + // for newly added events + input exception_t branch_exceptions_i, //Branch exceptions->execute unit-> branch_exception_o + input icache_dreq_t l1_icache_access_i, + input dcache_req_i_t [2:0] l1_dcache_access_i, + input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]miss_vld_bits_i, //For Cache eviction (3ports-LOAD,STORE,PTW) + input logic i_tlb_flush_i, + input logic stall_issue_i, //stall-read operands + input logic [31:0] mcountinhibit_i +); + + logic [63:0] generic_counter_d[6:1]; + logic [63:0] generic_counter_q[6:1]; + + //internal signal to keep track of exception + logic read_access_exception, update_access_exception; + + logic events[6:1]; + //internal signal for MUX select line input + logic [4:0] mhpmevent_d[6:1]; + logic [4:0] mhpmevent_q[6:1]; + // internal signal to detect event on multiple commit ports + logic [CVA6Cfg.NrCommitPorts-1:0] load_event; + logic [CVA6Cfg.NrCommitPorts-1:0] store_event; + logic [CVA6Cfg.NrCommitPorts-1:0] branch_event; + logic [CVA6Cfg.NrCommitPorts-1:0] call_event; + logic [CVA6Cfg.NrCommitPorts-1:0] return_event; + logic [CVA6Cfg.NrCommitPorts-1:0] int_event; + logic [CVA6Cfg.NrCommitPorts-1:0] fp_event; + + //Multiplexer + always_comb begin : Mux + events[6:1] = '{default: 0}; + load_event = '{default: 0}; + store_event = '{default: 0}; + branch_event = '{default: 0}; + call_event = '{default: 0}; + return_event = '{default: 0}; + int_event = '{default: 0}; + fp_event = '{default: 0}; + + for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin + load_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == LOAD); + store_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == STORE); + branch_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == CTRL_FLOW); + call_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == CTRL_FLOW && (commit_instr_i[j].op == ADD || commit_instr_i[j].op == JALR) && (commit_instr_i[j].rd == 'd1 || commit_instr_i[j].rd == 'd5)); + return_event[j] = commit_ack_i[j] & (commit_instr_i[j].op == JALR && commit_instr_i[j].rd == 'd0); + int_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == ALU || commit_instr_i[j].fu == MULT); + fp_event[j] = commit_ack_i[j] & (commit_instr_i[j].fu == FPU || commit_instr_i[j].fu == FPU_VEC); + end + + for (int unsigned i = 1; i <= 6; i++) begin + case (mhpmevent_q[i]) + 5'b00000: events[i] = 0; + 5'b00001: events[i] = l1_icache_miss_i; //L1 I-Cache misses + 5'b00010: events[i] = l1_dcache_miss_i; //L1 D-Cache misses + 5'b00011: events[i] = itlb_miss_i; //ITLB misses + 5'b00100: events[i] = dtlb_miss_i; //DTLB misses + 5'b00101: events[i] = |load_event; //Load accesses + 5'b00110: events[i] = |store_event; //Store accesses + 5'b00111: events[i] = ex_i.valid; //Exceptions + 5'b01000: events[i] = eret_i; //Exception handler returns + 5'b01001: events[i] = |branch_event; // Branch instructions + 5'b01010: + events[i] = resolved_branch_i.valid && resolved_branch_i.is_mispredict;//Branch mispredicts + 5'b01011: events[i] = branch_exceptions_i.valid; //Branch exceptions + // The standard software calling convention uses register x1 to hold the return address on a call + // the unconditional jump is decoded as ADD op + 5'b01100: events[i] = |call_event; //Call + 5'b01101: events[i] = |return_event; //Return + 5'b01110: events[i] = sb_full_i; //MSB Full + 5'b01111: events[i] = if_empty_i; //Instruction fetch Empty + 5'b10000: events[i] = l1_icache_access_i.req; //L1 I-Cache accesses + 5'b10001: + events[i] = l1_dcache_access_i[0].data_req || l1_dcache_access_i[1].data_req || l1_dcache_access_i[2].data_req;//L1 D-Cache accesses + 5'b10010: + events[i] = (l1_dcache_miss_i && miss_vld_bits_i[0] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[1] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[2] == 8'hFF);//eviction + 5'b10011: events[i] = i_tlb_flush_i; //I-TLB flush + 5'b10100: events[i] = |int_event; //Integer instructions + 5'b10101: events[i] = |fp_event; //Floating Point Instructions + 5'b10110: events[i] = stall_issue_i; //Pipeline bubbles + default: events[i] = 0; + endcase + end + + end + + always_comb begin : generic_counter + generic_counter_d = generic_counter_q; + data_o = 'b0; + mhpmevent_d = mhpmevent_q; + read_access_exception = 1'b0; + update_access_exception = 1'b0; + + // Increment the non-inhibited counters with active events + for (int unsigned i = 1; i <= 6; i++) begin + if ((!debug_mode_i) && (!we_i)) begin + if ((events[i]) == 1 && (!mcountinhibit_i[i+2])) begin + generic_counter_d[i] = generic_counter_q[i] + 1'b1; + end + end + end + + //Read + unique case (addr_i) + riscv::CSR_MHPM_COUNTER_3, + riscv::CSR_MHPM_COUNTER_4, + riscv::CSR_MHPM_COUNTER_5, + riscv::CSR_MHPM_COUNTER_6, + riscv::CSR_MHPM_COUNTER_7, + riscv::CSR_MHPM_COUNTER_8 :begin + if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0]; + else data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1]; + end + riscv::CSR_MHPM_COUNTER_3H, + riscv::CSR_MHPM_COUNTER_4H, + riscv::CSR_MHPM_COUNTER_5H, + riscv::CSR_MHPM_COUNTER_6H, + riscv::CSR_MHPM_COUNTER_7H, + riscv::CSR_MHPM_COUNTER_8H :begin + if (riscv::XLEN == 32) + data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32]; + else read_access_exception = 1'b1; + end + riscv::CSR_MHPM_EVENT_3, + riscv::CSR_MHPM_EVENT_4, + riscv::CSR_MHPM_EVENT_5, + riscv::CSR_MHPM_EVENT_6, + riscv::CSR_MHPM_EVENT_7, + riscv::CSR_MHPM_EVENT_8 : + data_o = mhpmevent_q[addr_i-riscv::CSR_MHPM_EVENT_3+1]; + default: data_o = 'b0; + endcase + + //Write + if (we_i) begin + unique case (addr_i) + riscv::CSR_MHPM_COUNTER_3, + riscv::CSR_MHPM_COUNTER_4, + riscv::CSR_MHPM_COUNTER_5, + riscv::CSR_MHPM_COUNTER_6, + riscv::CSR_MHPM_COUNTER_7, + riscv::CSR_MHPM_COUNTER_8 :begin + if (riscv::XLEN == 32) + generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0] = data_i; + else generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1] = data_i; + end + riscv::CSR_MHPM_COUNTER_3H, + riscv::CSR_MHPM_COUNTER_4H, + riscv::CSR_MHPM_COUNTER_5H, + riscv::CSR_MHPM_COUNTER_6H, + riscv::CSR_MHPM_COUNTER_7H, + riscv::CSR_MHPM_COUNTER_8H :begin + if (riscv::XLEN == 32) + generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32] = data_i; + else update_access_exception = 1'b1; + end + riscv::CSR_MHPM_EVENT_3, + riscv::CSR_MHPM_EVENT_4, + riscv::CSR_MHPM_EVENT_5, + riscv::CSR_MHPM_EVENT_6, + riscv::CSR_MHPM_EVENT_7, + riscv::CSR_MHPM_EVENT_8 : + mhpmevent_d[addr_i-riscv::CSR_MHPM_EVENT_3+1] = data_i; + default: update_access_exception = 1'b1; + endcase + end + end + + //Registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + generic_counter_q <= '{default: 0}; + mhpmevent_q <= '{default: 0}; + end else begin + generic_counter_q <= generic_counter_d; + mhpmevent_q <= mhpmevent_d; + end + end + +endmodule diff --git a/test/type_param/core/pmp/src/pmp.sv b/test/type_param/core/pmp/src/pmp.sv new file mode 100644 index 00000000..a3adbb90 --- /dev/null +++ b/test/type_param/core/pmp/src/pmp.sv @@ -0,0 +1,94 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Moritz Schneider, ETH Zurich +// Date: 2.10.2019 +// Description: purely combinatorial PMP unit (with extraction for more complex configs such as NAPOT) + +module pmp #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned PLEN = 34, // rv64: 56 + parameter int unsigned PMP_LEN = 32, // rv64: 54 + parameter int unsigned NR_ENTRIES = 4 +) ( + // Input + input logic [PLEN-1:0] addr_i, + input riscv::pmp_access_t access_type_i, + input riscv::priv_lvl_t priv_lvl_i, + // Configuration + input logic [15:0][PMP_LEN-1:0] conf_addr_i, + input riscv::pmpcfg_t [15:0] conf_i, + // Output + output logic allow_o +); + // if there are no PMPs we can always grant the access. + if (NR_ENTRIES > 0) begin : gen_pmp + logic [NR_ENTRIES-1:0] match; + + for (genvar i = 0; i < NR_ENTRIES; i++) begin + logic [PMP_LEN-1:0] conf_addr_prev; + + assign conf_addr_prev = (i == 0) ? '0 : conf_addr_i[i-1]; + + pmp_entry #( + .CVA6Cfg(CVA6Cfg), + .PLEN (PLEN), + .PMP_LEN(PMP_LEN) + ) i_pmp_entry ( + .addr_i (addr_i), + .conf_addr_i (conf_addr_i[i]), + .conf_addr_prev_i(conf_addr_prev), + .conf_addr_mode_i(conf_i[i].addr_mode), + .match_o (match[i]) + ); + end + + always_comb begin + int i; + + allow_o = 1'b0; + for (i = 0; i < NR_ENTRIES; i++) begin + // either we are in S or U mode or the config is locked in which + // case it also applies in M mode + if (priv_lvl_i != riscv::PRIV_LVL_M || conf_i[i].locked) begin + if (match[i]) begin + if ((access_type_i & conf_i[i].access_type) != access_type_i) allow_o = 1'b0; + else allow_o = 1'b1; + break; + end + end + end + if (i == NR_ENTRIES) begin // no PMP entry matched the address + // allow all accesses from M-mode for no pmp match + if (priv_lvl_i == riscv::PRIV_LVL_M) allow_o = 1'b1; + // disallow accesses for all other modes + else + allow_o = 1'b0; + end + end + end else assign allow_o = 1'b1; + + // synthesis translate_off + always_comb begin + logic no_locked; + no_locked = 1'b0; + if (priv_lvl_i == riscv::PRIV_LVL_M) begin + no_locked = 1'b1; + for (int i = 0; i < NR_ENTRIES; i++) begin + if (conf_i[i].locked && conf_i[i].addr_mode != riscv::OFF) begin + no_locked &= 1'b0; + end else no_locked &= 1'b1; + end + if (no_locked == 1'b1) assert (allow_o == 1'b1); + end + end + // synthesis translate_on + +endmodule diff --git a/test/type_param/core/pmp/src/pmp_entry.sv b/test/type_param/core/pmp/src/pmp_entry.sv new file mode 100644 index 00000000..667ae189 --- /dev/null +++ b/test/type_param/core/pmp/src/pmp_entry.sv @@ -0,0 +1,125 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Moritz Schneider, ETH Zurich +// Date: 2.10.2019 +// Description: single PMP entry + +module pmp_entry #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int unsigned PLEN = 56, + parameter int unsigned PMP_LEN = 54 +) ( + // Input + input logic [PLEN-1:0] addr_i, + + // Configuration + input logic [PMP_LEN-1:0] conf_addr_i, + input logic [PMP_LEN-1:0] conf_addr_prev_i, + input riscv::pmp_addr_mode_t conf_addr_mode_i, + + // Output + output logic match_o +); + logic [PLEN-1:0] conf_addr_n; + logic [$clog2(PLEN)-1:0] trail_ones; + logic [PLEN-1:0] base; + logic [PLEN-1:0] mask; + int unsigned size; + assign conf_addr_n = {2'b11, ~conf_addr_i}; + lzc #( + .WIDTH(PLEN), + .MODE (1'b0) + ) i_lzc ( + .in_i (conf_addr_n), + .cnt_o (trail_ones), + .empty_o() + ); + + always_comb begin + case (conf_addr_mode_i) + riscv::TOR: begin + base = '0; + mask = '0; + size = '0; + // check that the requested address is in between the two + // configuration addresses + if (addr_i >= ({2'b0, conf_addr_prev_i} << 2) && addr_i < ({2'b0, conf_addr_i} << 2)) begin + match_o = 1'b1; + end else match_o = 1'b0; + + // synthesis translate_off + if (match_o == 0) begin + assert (addr_i >= ({2'b0, conf_addr_i} << 2) || addr_i < ({2'b0, conf_addr_prev_i} << 2)); + end else begin + assert (addr_i < ({2'b0, conf_addr_i} << 2) && addr_i >= ({2'b0, conf_addr_prev_i} << 2)); + end + // synthesis translate_on + + end + riscv::NA4, riscv::NAPOT: begin + + if (conf_addr_mode_i == riscv::NA4) size = 2; + else begin + // use the extracted trailing ones + size = {{(32 - $clog2(PLEN)) {1'b0}}, trail_ones} + 3; + end + + mask = '1 << size; + base = ({2'b0, conf_addr_i} << 2) & mask; + match_o = (addr_i & mask) == base ? 1'b1 : 1'b0; + + // synthesis translate_off + // size extract checks + assert (size >= 2); + if (conf_addr_mode_i == riscv::NAPOT) begin + assert (size > 2); + if (size < PMP_LEN) assert (conf_addr_i[size-3] == 0); + for (int i = 0; i < PMP_LEN; i++) begin + if (size > 3 && i <= size - 4) begin + assert (conf_addr_i[i] == 1); // check that all the rest are ones + end + end + end + + if (size < PLEN - 1) begin + if (base + 2 ** size > base) begin // check for overflow + if (match_o == 0) begin + assert (addr_i >= base + 2 ** size || addr_i < base); + end else begin + assert (addr_i < base + 2 ** size && addr_i >= base); + end + end else begin + if (match_o == 0) begin + assert (addr_i - 2 ** size >= base || addr_i < base); + end else begin + assert (addr_i - 2 ** size < base && addr_i >= base); + end + end + end + // synthesis translate_on + + end + riscv::OFF: begin + match_o = 1'b0; + base = '0; + mask = '0; + size = '0; + end + default: begin + match_o = 0; + base = '0; + mask = '0; + size = '0; + end + endcase + end + +endmodule diff --git a/test/type_param/core/scoreboard.sv b/test/type_param/core/scoreboard.sv new file mode 100644 index 00000000..5ea29cdb --- /dev/null +++ b/test/type_param/core/scoreboard.sv @@ -0,0 +1,452 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.04.2017 +// Description: Scoreboard - keeps track of all decoded, issued and committed instructions + +module scoreboard #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rs3_len_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + output logic sb_full_o, + input logic flush_unissued_instr_i, // flush only un-issued instructions + input logic flush_i, // flush whole scoreboard + input logic unresolved_branch_i, // we have an unresolved branch + // list of clobbered registers to issue stage + output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o, + output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o, + + // regfile like interface to operand read stage + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i, + output riscv::xlen_t rs1_o, + output logic rs1_valid_o, + + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i, + output riscv::xlen_t rs2_o, + output logic rs2_valid_o, + + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i, + output rs3_len_t rs3_o, + output logic rs3_valid_o, + + // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer + output ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, + input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, + + // instruction to put on top of scoreboard e.g.: top pointer + // we can always put this instruction to the top unless we signal with asserted full_o + input ariane_pkg::scoreboard_entry_t decoded_instr_i, + input logic decoded_instr_valid_i, + output logic decoded_instr_ack_o, + + // instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer + output ariane_pkg::scoreboard_entry_t issue_instr_o, + output logic issue_instr_valid_o, + input logic issue_ack_i, + + // write-back port + input ariane_pkg::bp_resolve_t resolved_branch_i, + input logic [CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back + input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, // write data in + input ariane_pkg::exception_t [CVA6Cfg.NrWbPorts-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception) + input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, // data in is valid + input logic x_we_i, // cvxif we for writeback + + // RVFI + output logic [ariane_pkg::TRANS_ID_BITS-1:0] rvfi_issue_pointer_o, + output logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] rvfi_commit_pointer_o +); + + // this is the FIFO struct of the issue queue + typedef struct packed { + logic issued; // this bit indicates whether we issued this instruction e.g.: if it is valid + logic is_rd_fpr_flag; // redundant meta info, added for speed + ariane_pkg::scoreboard_entry_t sbe; // this is the score board entry we will send to ex + } sb_mem_t; + sb_mem_t [ariane_pkg::NR_SB_ENTRIES-1:0] mem_q, mem_n; + + logic issue_full, issue_en; + logic [ariane_pkg::TRANS_ID_BITS:0] issue_cnt_n, issue_cnt_q; + logic [ariane_pkg::TRANS_ID_BITS-1:0] issue_pointer_n, issue_pointer_q; + logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] + commit_pointer_n, commit_pointer_q; + logic [$clog2(CVA6Cfg.NrCommitPorts):0] num_commit; + + // the issue queue is full don't issue any new instructions + // works since aligned to power of 2 + assign issue_full = (issue_cnt_q[ariane_pkg::TRANS_ID_BITS] == 1'b1); + + assign sb_full_o = issue_full; + + ariane_pkg::scoreboard_entry_t decoded_instr; + always_comb begin + decoded_instr = decoded_instr_i; + end + + // output commit instruction directly + always_comb begin : commit_ports + for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + commit_instr_o[i] = mem_q[commit_pointer_q[i]].sbe; + commit_instr_o[i].trans_id = commit_pointer_q[i]; + end + end + + // an instruction is ready for issue if we have place in the issue FIFO and it the decoder says it is valid + always_comb begin + issue_instr_o = decoded_instr_i; + // make sure we assign the correct trans ID + issue_instr_o.trans_id = issue_pointer_q; + // we are ready if we are not full and don't have any unresolved branches, but it can be + // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i == 1) + issue_instr_valid_o = decoded_instr_valid_i & ~unresolved_branch_i & ~issue_full; + decoded_instr_ack_o = issue_ack_i & ~issue_full; + end + + // maintain a FIFO with issued instructions + // keep track of all issued instructions + always_comb begin : issue_fifo + // default assignment + mem_n = mem_q; + issue_en = 1'b0; + + // if we got a acknowledge from the issue stage, put this scoreboard entry in the queue + if (decoded_instr_valid_i && decoded_instr_ack_o && !flush_unissued_instr_i) begin + // the decoded instruction we put in there is valid (1st bit) + // increase the issue counter and advance issue pointer + issue_en = 1'b1; + mem_n[issue_pointer_q] = { + 1'b1, // valid bit + (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + decoded_instr_i.op + )), // whether rd goes to the fpr + decoded_instr // decoded instruction record + }; + end + + // ------------ + // FU NONE + // ------------ + for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin + // The FU is NONE -> this instruction is valid immediately + if (mem_q[i].sbe.fu == ariane_pkg::NONE && mem_q[i].issued) mem_n[i].sbe.valid = 1'b1; + end + + // ------------ + // Write Back + // ------------ + for (int unsigned i = 0; i < CVA6Cfg.NrWbPorts; i++) begin + // check if this instruction was issued (e.g.: it could happen after a flush that there is still + // something in the pipeline e.g. an incomplete memory operation) + if (wt_valid_i[i] && mem_q[trans_id_i[i]].issued) begin + mem_n[trans_id_i[i]].sbe.valid = 1'b1; + mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; + // save the target address of a branch (needed for debug in commit stage) + if (CVA6Cfg.DebugEn) begin + mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; + end + if (mem_n[trans_id_i[i]].sbe.fu == ariane_pkg::CVXIF && ~x_we_i) begin + mem_n[trans_id_i[i]].sbe.rd = 5'b0; + end + // write the exception back if it is valid + if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; + // write the fflags back from the FPU (exception valid is never set), leave tval intact + else if(CVA6Cfg.FpPresent && (mem_q[trans_id_i[i]].sbe.fu == ariane_pkg::FPU || mem_q[trans_id_i[i]].sbe.fu == ariane_pkg::FPU_VEC)) begin + mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause; + end + end + end + + // ------------ + // Commit Port + // ------------ + // we've got an acknowledge from commit + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + if (commit_ack_i[i]) begin + // this instruction is no longer in issue e.g.: it is considered finished + mem_n[commit_pointer_q[i]].issued = 1'b0; + mem_n[commit_pointer_q[i]].sbe.valid = 1'b0; + end + end + + // ------ + // Flush + // ------ + if (flush_i) begin + for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin + // set all valid flags for all entries to zero + mem_n[i].issued = 1'b0; + mem_n[i].sbe.valid = 1'b0; + mem_n[i].sbe.ex.valid = 1'b0; + end + end + end + + // FIFO counter updates + if (CVA6Cfg.NrCommitPorts == 2) begin : gen_commit_ports + assign num_commit = commit_ack_i[1] + commit_ack_i[0]; + end else begin : gen_one_commit_port + assign num_commit = commit_ack_i[0]; + end + + assign issue_cnt_n = (flush_i) ? '0 : issue_cnt_q - {{ariane_pkg::TRANS_ID_BITS - $clog2( + CVA6Cfg.NrCommitPorts + ) {1'b0}}, num_commit} + {{ariane_pkg::TRANS_ID_BITS - 1{1'b0}}, issue_en}; + assign commit_pointer_n[0] = (flush_i) ? '0 : commit_pointer_q[0] + num_commit; + assign issue_pointer_n = (flush_i) ? '0 : issue_pointer_q + issue_en; + + // precompute offsets for commit slots + for (genvar k = 1; k < CVA6Cfg.NrCommitPorts; k++) begin : gen_cnt_incr + assign commit_pointer_n[k] = (flush_i) ? '0 : commit_pointer_n[0] + unsigned'(k); + end + + // ------------------- + // RD clobber process + // ------------------- + // rd_clobber output: output currently clobbered destination registers + logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][ariane_pkg::NR_SB_ENTRIES:0] gpr_clobber_vld; + logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][ariane_pkg::NR_SB_ENTRIES:0] fpr_clobber_vld; + ariane_pkg::fu_t [ ariane_pkg::NR_SB_ENTRIES:0] clobber_fu; + + always_comb begin : clobber_assign + gpr_clobber_vld = '0; + fpr_clobber_vld = '0; + + // default (highest entry hast lowest prio in arbiter tree below) + clobber_fu[ariane_pkg::NR_SB_ENTRIES] = ariane_pkg::NONE; + for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin + gpr_clobber_vld[i][ariane_pkg::NR_SB_ENTRIES] = 1'b1; + fpr_clobber_vld[i][ariane_pkg::NR_SB_ENTRIES] = 1'b1; + end + + // check for all valid entries and set the clobber accordingly + for (int unsigned i = 0; i < ariane_pkg::NR_SB_ENTRIES; i++) begin + gpr_clobber_vld[mem_q[i].sbe.rd][i] = mem_q[i].issued & ~mem_q[i].is_rd_fpr_flag; + fpr_clobber_vld[mem_q[i].sbe.rd][i] = mem_q[i].issued & mem_q[i].is_rd_fpr_flag; + clobber_fu[i] = mem_q[i].sbe.fu; + end + + // GPR[0] is always free + gpr_clobber_vld[0] = '0; + end + + for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers + // get fu that is going to clobber this register (there should be only one) + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + 1), + .DataType(ariane_pkg::fu_t), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_gpr_clobbers ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (gpr_clobber_vld[k]), + .gnt_o (), + .data_i (clobber_fu), + .gnt_i (1'b1), + .req_o (), + .data_o (rd_clobber_gpr_o[k]), + .idx_o () + ); + if (CVA6Cfg.FpPresent) begin + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + 1), + .DataType(ariane_pkg::fu_t), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_fpr_clobbers ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (fpr_clobber_vld[k]), + .gnt_o (), + .data_i (clobber_fu), + .gnt_i (1'b1), + .req_o (), + .data_o (rd_clobber_fpr_o[k]), + .idx_o () + ); + end + end + + // ---------------------------------- + // Read Operands (a.k.a forwarding) + // ---------------------------------- + // read operand interface: same logic as register file + logic [ariane_pkg::NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0] rs1_fwd_req, rs2_fwd_req, rs3_fwd_req; + logic [ariane_pkg::NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] rs_data; + logic rs1_valid, rs2_valid, rs3_valid; + + // WB ports have higher prio than entries + for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb + assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o.op + ))); + assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o.op + ))); + assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o.op + ))); + assign rs_data[k] = wbdata_i[k]; + end + for (genvar k = 0; unsigned'(k) < ariane_pkg::NR_SB_ENTRIES; k++) begin : gen_rs_entries + assign rs1_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o.op + ))); + assign rs2_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o.op + ))); + assign rs3_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o.op + ))); + assign rs_data[k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result; + end + + // check whether we are accessing GPR[0] + assign rs1_valid_o = rs1_valid & ((|rs1_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_o.op + ))); + assign rs2_valid_o = rs2_valid & ((|rs2_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_o.op + ))); + assign rs3_valid_o = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid & ((|rs3_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_o.op + ))) : rs3_valid; + + // use fixed prio here + // this implicitly gives higher prio to WB ports + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(riscv::XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs1 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs1_fwd_req), + .gnt_o (), + .data_i (rs_data), + .gnt_i (1'b1), + .req_o (rs1_valid), + .data_o (rs1_o), + .idx_o () + ); + + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(riscv::XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs2 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs2_fwd_req), + .gnt_o (), + .data_i (rs_data), + .gnt_i (1'b1), + .req_o (rs2_valid), + .data_o (rs2_o), + .idx_o () + ); + + riscv::xlen_t rs3; + + rr_arb_tree #( + .NumIn(ariane_pkg::NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(riscv::XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs3 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs3_fwd_req), + .gnt_o (), + .data_i (rs_data), + .gnt_i (1'b1), + .req_o (rs3_valid), + .data_o (rs3), + .idx_o () + ); + + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port + assign rs3_o = rs3[riscv::XLEN-1:0]; + end else begin : gen_fp_three_port + assign rs3_o = rs3[CVA6Cfg.FLen-1:0]; + end + + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin : regs + if (!rst_ni) begin + mem_q <= '{default: sb_mem_t'(0)}; + issue_cnt_q <= '0; + commit_pointer_q <= '0; + issue_pointer_q <= '0; + end else begin + issue_cnt_q <= issue_cnt_n; + issue_pointer_q <= issue_pointer_n; + mem_q <= mem_n; + commit_pointer_q <= commit_pointer_n; + end + end + + //RVFI + assign rvfi_issue_pointer_o = issue_pointer_q; + assign rvfi_commit_pointer_o = commit_pointer_q; + + //pragma translate_off + initial begin + assert (ariane_pkg::NR_SB_ENTRIES == 2 ** ariane_pkg::TRANS_ID_BITS) + else $fatal(1, "Scoreboard size needs to be a power of two."); + end + + // assert that zero is never set + assert property (@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE)) + else $fatal(1, "RD 0 should not bet set"); + // assert that we never acknowledge a commit if the instruction is not valid + assert property ( + @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[0] |-> commit_instr_o[0].valid) + else $fatal(1, "Commit acknowledged but instruction is not valid"); + if (CVA6Cfg.NrCommitPorts == 2) begin : gen_two_commit_ports + assert property ( + @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[1] |-> commit_instr_o[1].valid) + else $fatal(1, "Commit acknowledged but instruction is not valid"); + end + // assert that we never give an issue ack signal if the instruction is not valid + assert property (@(posedge clk_i) disable iff (!rst_ni) issue_ack_i |-> issue_instr_valid_o) + else $fatal(1, "Issue acknowledged but instruction is not valid"); + + // there should never be more than one instruction writing the same destination register (except x0) + // check that no functional unit is retiring with the same transaction id + for (genvar i = 0; i < CVA6Cfg.NrWbPorts; i++) begin + for (genvar j = 0; j < CVA6Cfg.NrWbPorts; j++) begin + assert property ( + @(posedge clk_i) disable iff (!rst_ni) wt_valid_i[i] && wt_valid_i[j] && (i != j) |-> (trans_id_i[i] != trans_id_i[j])) + else + $fatal( + 1, + "Two or more functional units are retiring instructions with the same transaction id!" + ); + end + end + //pragma translate_on +endmodule diff --git a/test/type_param/core/serdiv.sv b/test/type_param/core/serdiv.sv new file mode 100644 index 00000000..244ee975 --- /dev/null +++ b/test/type_param/core/serdiv.sv @@ -0,0 +1,269 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Andreas Traber , ETH Zurich +// +// Date: 18.10.2018 +// Description: simple 64bit serial divider + + +module serdiv + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter WIDTH = 64, + parameter STABLE_HANDSHAKE = 0 // Guarantee a stable in_rdy_o during the input handshake. Keep it at 0 in CVA6 +) ( + input logic clk_i, + input logic rst_ni, + // input IF + input logic [TRANS_ID_BITS-1:0] id_i, + input logic [WIDTH-1:0] op_a_i, + input logic [WIDTH-1:0] op_b_i, + input logic [1:0] opcode_i, // 0: udiv, 2: urem, 1: div, 3: rem + // handshake + input logic in_vld_i, // there is a cycle delay from in_rdy_o->in_vld_i, see issue_read_operands.sv stage + output logic in_rdy_o, + input logic flush_i, + // output IF + output logic out_vld_o, + input logic out_rdy_i, + output logic [TRANS_ID_BITS-1:0] id_o, + output logic [WIDTH-1:0] res_o +); + + ///////////////////////////////////// + // signal declarations + ///////////////////////////////////// + + enum logic [1:0] { + IDLE, + DIVIDE, + FINISH + } + state_d, state_q; + + logic [WIDTH-1:0] res_q, res_d; + logic [WIDTH-1:0] op_a_q, op_a_d; + logic [WIDTH-1:0] op_b_q, op_b_d; + logic op_a_sign, op_b_sign; + logic op_b_zero, op_b_zero_q, op_b_zero_d; + logic op_b_neg_one, op_b_neg_one_q, op_b_neg_one_d; + + logic [TRANS_ID_BITS-1:0] id_q, id_d; + + logic rem_sel_d, rem_sel_q; + logic comp_inv_d, comp_inv_q; + logic res_inv_d, res_inv_q; + + logic [WIDTH-1:0] add_mux; + logic [WIDTH-1:0] add_out; + logic [WIDTH-1:0] add_tmp; + logic [WIDTH-1:0] b_mux; + logic [WIDTH-1:0] out_mux; + + logic [$clog2(WIDTH)-1:0] cnt_q, cnt_d; + logic cnt_zero; + + logic [WIDTH-1:0] lzc_a_input, lzc_b_input, op_b; + logic [$clog2(WIDTH)-1:0] lzc_a_result, lzc_b_result; + logic [$clog2(WIDTH+1)-1:0] shift_a; + logic [ $clog2(WIDTH+1):0] div_shift; + + logic a_reg_en, b_reg_en, res_reg_en, ab_comp, pm_sel, load_en; + logic lzc_a_no_one, lzc_b_no_one; + logic div_res_zero_d, div_res_zero_q; + + + ///////////////////////////////////// + // align the input operands + // for faster division + ///////////////////////////////////// + + assign op_a_sign = op_a_i[$high(op_a_i)]; + assign op_b_sign = op_b_i[$high(op_b_i)]; + assign op_b_zero = lzc_b_no_one & ~op_b_sign; + assign op_b_neg_one = lzc_b_no_one & op_b_sign; + + assign lzc_a_input = (opcode_i[0] & op_a_sign) ? {~op_a_i[$high(op_a_i)-1:0], 1'b1} : op_a_i; + assign lzc_b_input = (opcode_i[0] & op_b_sign) ? ~op_b_i : op_b_i; + + lzc #( + .MODE (1), // count leading zeros + .WIDTH(WIDTH) + ) i_lzc_a ( + .in_i (lzc_a_input), + .cnt_o (lzc_a_result), + .empty_o(lzc_a_no_one) + ); + + lzc #( + .MODE (1), // count leading zeros + .WIDTH(WIDTH) + ) i_lzc_b ( + .in_i (lzc_b_input), + .cnt_o (lzc_b_result), + .empty_o(lzc_b_no_one) + ); + + assign shift_a = (lzc_a_no_one) ? WIDTH : {1'b0, lzc_a_result}; + assign div_shift = {1'b0, lzc_b_result} - shift_a; + + assign op_b = op_b_i <<< $unsigned(div_shift); + + // the division is zero if |opB| > |opA| and can be terminated + assign div_res_zero_d = (load_en) ? div_shift[$high(div_shift)] : div_res_zero_q; + + ///////////////////////////////////// + // Datapath + ///////////////////////////////////// + + assign pm_sel = load_en & ~(opcode_i[0] & (op_a_sign ^ op_b_sign)); + + // muxes + assign add_mux = (load_en) ? op_a_i : op_b_q; + + // attention: logical shift by one in case of negative operand B! + assign b_mux = (load_en) ? op_b : {comp_inv_q, (op_b_q[$high(op_b_q):1])}; + + // in case of bad timing, we could output from regs -> needs a cycle more in the FSM + assign out_mux = (rem_sel_q) ? (op_b_neg_one_q ? '0 : op_a_q) : (op_b_zero_q ? '1 : (op_b_neg_one_q ? op_a_q : res_q)); + + // invert if necessary + assign res_o = (res_inv_q) ? -$signed(out_mux) : out_mux; + + // main comparator + assign ab_comp = ((op_a_q == op_b_q) | ((op_a_q > op_b_q) ^ comp_inv_q)) & ((|op_a_q) | op_b_zero_q); + + // main adder + assign add_tmp = (load_en) ? 0 : op_a_q; + assign add_out = (pm_sel) ? add_tmp + add_mux : add_tmp - $signed(add_mux); + + ///////////////////////////////////// + // FSM, counter + ///////////////////////////////////// + + assign cnt_zero = (cnt_q == 0); + assign cnt_d = (load_en) ? div_shift[$clog2(WIDTH)-1:0] : (~cnt_zero) ? cnt_q - 1 : cnt_q; + + always_comb begin : p_fsm + // default + state_d = state_q; + in_rdy_o = 1'b0; + out_vld_o = 1'b0; + load_en = 1'b0; + a_reg_en = 1'b0; + b_reg_en = 1'b0; + res_reg_en = 1'b0; + + unique case (state_q) + IDLE: begin + in_rdy_o = 1'b1; + + if (in_vld_i) begin + // CVA6: there is a cycle delay until the valid signal is asserted by the id stage + // Ara: we need a stable handshake + in_rdy_o = (STABLE_HANDSHAKE) ? 1'b1 : 1'b0; + a_reg_en = 1'b1; + b_reg_en = 1'b1; + load_en = 1'b1; + state_d = DIVIDE; + end + end + DIVIDE: begin + if (~(div_res_zero_q | op_b_zero_q | op_b_neg_one_q)) begin + a_reg_en = ab_comp; + b_reg_en = 1'b1; + res_reg_en = 1'b1; + end + // can end the division immediately if the result is known + if (div_res_zero_q | op_b_zero_q | op_b_neg_one_q) begin + out_vld_o = 1'b1; + state_d = FINISH; + if (out_rdy_i) begin + // in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage + state_d = IDLE; + end + end else if (cnt_zero) begin + state_d = FINISH; + end + end + FINISH: begin + out_vld_o = 1'b1; + + if (out_rdy_i) begin + // in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage + state_d = IDLE; + end + end + default: state_d = IDLE; + endcase + + if (flush_i) begin + in_rdy_o = 1'b0; + out_vld_o = 1'b0; + a_reg_en = 1'b0; + b_reg_en = 1'b0; + load_en = 1'b0; + state_d = IDLE; + end + end + + ///////////////////////////////////// + // regs, flags + ///////////////////////////////////// + + // get flags + assign rem_sel_d = (load_en) ? opcode_i[1] : rem_sel_q; + assign comp_inv_d = (load_en) ? opcode_i[0] & op_b_sign : comp_inv_q; + assign op_b_zero_d = (load_en) ? op_b_zero : op_b_zero_q; + assign op_b_neg_one_d = (load_en) ? op_b_neg_one : op_b_neg_one_q; + assign res_inv_d = (load_en) ? (~op_b_zero | opcode_i[1]) & opcode_i[0] & (op_a_sign ^ op_b_sign ^ op_b_neg_one) : res_inv_q; + + // transaction id + assign id_d = (load_en) ? id_i : id_q; + assign id_o = id_q; + + assign op_a_d = (a_reg_en) ? add_out : op_a_q; + assign op_b_d = (b_reg_en) ? b_mux : op_b_q; + assign res_d = (load_en) ? '0 : (res_reg_en) ? {res_q[$high(res_q)-1:0], ab_comp} : res_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (~rst_ni) begin + state_q <= IDLE; + op_a_q <= '0; + op_b_q <= '0; + res_q <= '0; + cnt_q <= '0; + id_q <= '0; + rem_sel_q <= 1'b0; + comp_inv_q <= 1'b0; + res_inv_q <= 1'b0; + op_b_zero_q <= 1'b0; + op_b_neg_one_q <= 1'b0; + div_res_zero_q <= 1'b0; + end else begin + state_q <= state_d; + op_a_q <= op_a_d; + op_b_q <= op_b_d; + res_q <= res_d; + cnt_q <= cnt_d; + id_q <= id_d; + rem_sel_q <= rem_sel_d; + comp_inv_q <= comp_inv_d; + res_inv_q <= res_inv_d; + op_b_zero_q <= op_b_zero_d; + op_b_neg_one_q <= op_b_neg_one_d; + div_res_zero_q <= div_res_zero_d; + end + end + +endmodule diff --git a/test/type_param/core/store_buffer.sv b/test/type_param/core/store_buffer.sv new file mode 100644 index 00000000..d41551d5 --- /dev/null +++ b/test/type_param/core/store_buffer.sv @@ -0,0 +1,291 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 25.04.2017 +// Description: Store queue persists store requests and pushes them to memory +// if they are no longer speculative + + +module store_buffer + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // if we flush we need to pause the transactions on the memory + // otherwise we will run in a deadlock with the memory arbiter + input logic stall_st_pending_i, // Stall issuing non-speculative request + output logic no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy) + output logic store_buffer_empty_o, // there is no store pending in neither the speculative unit or the non-speculative queue + + input logic [11:0] page_offset_i, // check for the page offset (the last 12 bit if the current load matches them) + output logic page_offset_matches_o, // the above input page offset matches -> let the store buffer drain + + input logic commit_i, // commit the instruction which was placed there most recently + output logic commit_ready_o, // commit queue is ready to accept another commit request + output logic ready_o, // the store queue is ready to accept a new request + // it is only ready if it can unconditionally commit the instruction, e.g.: + // the commit buffer needs to be empty + input logic valid_i, // this is a valid store + input logic valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action + + input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue + output [riscv::PLEN-1:0] rvfi_mem_paddr_o, + input riscv::xlen_t data_i, // data which is placed in the queue + input logic [(riscv::XLEN/8)-1:0] be_i, // byte enable in + input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) + + // D$ interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o +); + + // the store queue has two parts: + // 1. Speculative queue + // 2. Commit queue which is non-speculative, e.g.: the store will definitely happen. + struct packed { + logic [riscv::PLEN-1:0] address; + riscv::xlen_t data; + logic [(riscv::XLEN/8)-1:0] be; + logic [1:0] data_size; + logic valid; // this entry is valid, we need this for checking if the address offset matches + } + speculative_queue_n[DEPTH_SPEC-1:0], + speculative_queue_q[DEPTH_SPEC-1:0], + commit_queue_n[DEPTH_COMMIT-1:0], + commit_queue_q[DEPTH_COMMIT-1:0]; + + // keep a status count for both buffers + logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt_n, speculative_status_cnt_q; + logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt_n, commit_status_cnt_q; + // Speculative queue + logic [$clog2(DEPTH_SPEC)-1:0] speculative_read_pointer_n, speculative_read_pointer_q; + logic [$clog2(DEPTH_SPEC)-1:0] speculative_write_pointer_n, speculative_write_pointer_q; + // Commit Queue + logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q; + logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q; + + assign store_buffer_empty_o = (speculative_status_cnt_q == 0) & no_st_pending_o; + // ---------------------------------------- + // Speculative Queue - Core Interface + // ---------------------------------------- + always_comb begin : core_if + automatic logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt; + speculative_status_cnt = speculative_status_cnt_q; + + // default assignments + speculative_status_cnt_n = speculative_status_cnt_q; + speculative_read_pointer_n = speculative_read_pointer_q; + speculative_write_pointer_n = speculative_write_pointer_q; + speculative_queue_n = speculative_queue_q; + // LSU interface + // we are ready to accept a new entry and the input data is valid + if (valid_i) begin + speculative_queue_n[speculative_write_pointer_q].address = paddr_i; + speculative_queue_n[speculative_write_pointer_q].data = data_i; + speculative_queue_n[speculative_write_pointer_q].be = be_i; + speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i; + speculative_queue_n[speculative_write_pointer_q].valid = 1'b1; + // advance the write pointer + speculative_write_pointer_n = speculative_write_pointer_q + 1'b1; + speculative_status_cnt++; + end + + // evict the current entry out of this queue, the commit queue will thankfully take it and commit it + // to the memory hierarchy + if (commit_i) begin + // invalidate + speculative_queue_n[speculative_read_pointer_q].valid = 1'b0; + // advance the read pointer + speculative_read_pointer_n = speculative_read_pointer_q + 1'b1; + speculative_status_cnt--; + end + + speculative_status_cnt_n = speculative_status_cnt; + + // when we flush evict the speculative stores + if (flush_i) begin + // reset all valid flags + for (int unsigned i = 0; i < DEPTH_SPEC; i++) speculative_queue_n[i].valid = 1'b0; + + speculative_write_pointer_n = speculative_read_pointer_q; + // also reset the status count + speculative_status_cnt_n = 'b0; + end + + // we are ready if the speculative and the commit queue have a space left + ready_o = (speculative_status_cnt_n < (DEPTH_SPEC)) || commit_i; + end + + // ---------------------------------------- + // Commit Queue - Memory Interface + // ---------------------------------------- + + // we will never kill a request in the store buffer since we already know that the translation is valid + // e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault + assign req_port_o.kill_req = 1'b0; + assign req_port_o.data_we = 1'b1; // we will always write in the store queue + assign req_port_o.tag_valid = 1'b0; + + // we do not require an acknowledgement for writes, thus we do not need to identify uniquely the responses + assign req_port_o.data_id = '0; + // those signals can directly be output to the memory + assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + // if we got a new request we already saved the tag from the previous cycle + assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH + + ariane_pkg::DCACHE_INDEX_WIDTH-1 : + ariane_pkg::DCACHE_INDEX_WIDTH]; + assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data; + assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be; + assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size; + + assign rvfi_mem_paddr_o = commit_queue_n[commit_read_pointer_n].address; + + always_comb begin : store_if + automatic logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt; + commit_status_cnt = commit_status_cnt_q; + + commit_ready_o = (commit_status_cnt_q < DEPTH_COMMIT); + // no store is pending if we don't have any element in the commit queue e.g.: it is empty + no_st_pending_o = (commit_status_cnt_q == 0); + // default assignments + commit_read_pointer_n = commit_read_pointer_q; + commit_write_pointer_n = commit_write_pointer_q; + + commit_queue_n = commit_queue_q; + + req_port_o.data_req = 1'b0; + + // there should be no commit when we are flushing + // if the entry in the commit queue is valid and not speculative anymore we can issue this instruction + if (commit_queue_q[commit_read_pointer_q].valid && !stall_st_pending_i) begin + req_port_o.data_req = 1'b1; + if (req_port_i.data_gnt) begin + // we can evict it from the commit buffer + commit_queue_n[commit_read_pointer_q].valid = 1'b0; + // advance the read_pointer + commit_read_pointer_n = commit_read_pointer_q + 1'b1; + commit_status_cnt--; + end + end + // we ignore the rvalid signal for now as we assume that the store + // happened if we got a grant + + // shift the store request from the speculative buffer to the non-speculative + if (commit_i) begin + commit_queue_n[commit_write_pointer_q] = speculative_queue_q[speculative_read_pointer_q]; + commit_write_pointer_n = commit_write_pointer_n + 1'b1; + commit_status_cnt++; + end + + commit_status_cnt_n = commit_status_cnt; + end + + // ------------------ + // Address Checker + // ------------------ + // The load should return the data stored by the most recent store to the + // same physical address. The most direct way to implement this is to + // maintain physical addresses in the store buffer. + + // Of course, there are other micro-architectural techniques to accomplish + // the same thing: you can interlock and wait for the store buffer to + // drain if the load VA matches any store VA modulo the page size (i.e. + // bits 11:0). As a special case, it is correct to bypass if the full VA + // matches, and no younger stores' VAs match in bits 11:0. + // + // checks if the requested load is in the store buffer + // page offsets are virtually and physically the same + always_comb begin : address_checker + page_offset_matches_o = 1'b0; + + // check if the LSBs are identical and the entry is valid + for (int unsigned i = 0; i < DEPTH_COMMIT; i++) begin + // Check if the page offset matches and whether the entry is valid, for the commit queue + if ((page_offset_i[11:3] == commit_queue_q[i].address[11:3]) && commit_queue_q[i].valid) begin + page_offset_matches_o = 1'b1; + break; + end + end + + for (int unsigned i = 0; i < DEPTH_SPEC; i++) begin + // do the same for the speculative queue + if ((page_offset_i[11:3] == speculative_queue_q[i].address[11:3]) && speculative_queue_q[i].valid) begin + page_offset_matches_o = 1'b1; + break; + end + end + // or it matches with the entry we are currently putting into the queue + if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin + page_offset_matches_o = 1'b1; + end + end + + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin : p_spec + if (~rst_ni) begin + speculative_queue_q <= '{default: 0}; + speculative_read_pointer_q <= '0; + speculative_write_pointer_q <= '0; + speculative_status_cnt_q <= '0; + end else begin + speculative_queue_q <= speculative_queue_n; + speculative_read_pointer_q <= speculative_read_pointer_n; + speculative_write_pointer_q <= speculative_write_pointer_n; + speculative_status_cnt_q <= speculative_status_cnt_n; + end + end + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin : p_commit + if (~rst_ni) begin + commit_queue_q <= '{default: 0}; + commit_read_pointer_q <= '0; + commit_write_pointer_q <= '0; + commit_status_cnt_q <= '0; + end else begin + commit_queue_q <= commit_queue_n; + commit_read_pointer_q <= commit_read_pointer_n; + commit_write_pointer_q <= commit_write_pointer_n; + commit_status_cnt_q <= commit_status_cnt_n; + end + end + + /////////////////////////////////////////////////////// + // assertions + /////////////////////////////////////////////////////// + + //pragma translate_off + // assert that commit is never set when we are flushing this would be counter intuitive + // as flush and commit is decided in the same stage + commit_and_flush : + assert property (@(posedge clk_i) rst_ni && flush_i |-> !commit_i) + else $error("[Commit Queue] You are trying to commit and flush in the same cycle"); + + speculative_buffer_overflow : + assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i) + else + $error("[Speculative Queue] You are trying to push new data although the buffer is not ready"); + + speculative_buffer_underflow : + assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i) + else $error("[Speculative Queue] You are committing although there are no stores to commit"); + + commit_buffer_overflow : + assert property (@(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_COMMIT) |-> !commit_i) + else $error("[Commit Queue] You are trying to commit a store although the buffer is full"); + //pragma translate_on +endmodule + + + diff --git a/test/type_param/core/store_unit.sv b/test/type_param/core/store_unit.sv new file mode 100644 index 00000000..fb93818c --- /dev/null +++ b/test/type_param/core/store_unit.sv @@ -0,0 +1,300 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 22.05.2017 +// Description: Store Unit, takes care of all store requests and atomic memory operations (AMOs) + + +module store_unit + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic stall_st_pending_i, + output logic no_st_pending_o, + output logic store_buffer_empty_o, + // store unit input port + input logic valid_i, + input lsu_ctrl_t lsu_ctrl_i, + output logic pop_st_o, + input logic commit_i, + output logic commit_ready_o, + input logic amo_valid_commit_i, + // store unit output port + output logic valid_o, + output logic [TRANS_ID_BITS-1:0] trans_id_o, + output riscv::xlen_t result_o, + output exception_t ex_o, + // MMU -> Address Translation + output logic translation_req_o, // request address translation + output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out + output [riscv::PLEN-1:0] rvfi_mem_paddr_o, + input logic [riscv::PLEN-1:0] paddr_i, // physical address in + input exception_t ex_i, + input logic dtlb_hit_i, // will be one in the same cycle translation_req was asserted if it hits + // address checker + input logic [11:0] page_offset_i, + output logic page_offset_matches_o, + // D$ interface + output amo_req_t amo_req_o, + input amo_resp_t amo_resp_i, + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o +); + // it doesn't matter what we are writing back as stores don't return anything + assign result_o = lsu_ctrl_i.data; + + enum logic [1:0] { + IDLE, + VALID_STORE, + WAIT_TRANSLATION, + WAIT_STORE_READY + } + state_d, state_q; + + // store buffer control signals + logic st_ready; + logic st_valid; + logic st_valid_without_flush; + logic instr_is_amo; + assign instr_is_amo = is_amo(lsu_ctrl_i.operation); + // keep the data and the byte enable for the second cycle (after address translation) + riscv::xlen_t st_data_n, st_data_q; + logic [(riscv::XLEN/8)-1:0] st_be_n, st_be_q; + logic [1:0] st_data_size_n, st_data_size_q; + amo_t amo_op_d, amo_op_q; + + logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; + + // output assignments + assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address + assign trans_id_o = trans_id_q; // transaction id from previous cycle + + always_comb begin : store_control + translation_req_o = 1'b0; + valid_o = 1'b0; + st_valid = 1'b0; + st_valid_without_flush = 1'b0; + pop_st_o = 1'b0; + ex_o = ex_i; + trans_id_n = lsu_ctrl_i.trans_id; + state_d = state_q; + + case (state_q) + // we got a valid store + IDLE: begin + if (valid_i) begin + state_d = VALID_STORE; + translation_req_o = 1'b1; + pop_st_o = 1'b1; + // check if translation was valid and we have space in the store buffer + // otherwise simply stall + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = WAIT_TRANSLATION; + pop_st_o = 1'b0; + end + + if (!st_ready) begin + state_d = WAIT_STORE_READY; + pop_st_o = 1'b0; + end + end + end + + VALID_STORE: begin + valid_o = 1'b1; + // post this store to the store buffer if we are not flushing + if (!flush_i) st_valid = 1'b1; + + st_valid_without_flush = 1'b1; + + // we have another request and its not an AMO (the AMO buffer only has depth 1) + if ((valid_i && CVA6Cfg.RVA && !instr_is_amo) || (valid_i && !CVA6Cfg.RVA)) begin + + translation_req_o = 1'b1; + state_d = VALID_STORE; + pop_st_o = 1'b1; + + if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin + state_d = WAIT_TRANSLATION; + pop_st_o = 1'b0; + end + + if (!st_ready) begin + state_d = WAIT_STORE_READY; + pop_st_o = 1'b0; + end + // if we do not have another request go back to idle + end else begin + state_d = IDLE; + end + end + + // the store queue is currently full + WAIT_STORE_READY: begin + // keep the translation request high + translation_req_o = 1'b1; + + if (st_ready && dtlb_hit_i) begin + state_d = IDLE; + end + end + + default: begin + // we didn't receive a valid translation, wait for one + // but we know that the store queue is not full as we could only have landed here if + // it wasn't full + if (state_q == WAIT_TRANSLATION && ariane_pkg::MMU_PRESENT) begin + translation_req_o = 1'b1; + + if (dtlb_hit_i) begin + state_d = IDLE; + end + end + end + endcase + + // ----------------- + // Access Exception + // ----------------- + // we got an address translation exception (access rights, misaligned or page fault) + if (ex_i.valid && (state_q != IDLE)) begin + // the only difference is that we do not want to store this request + pop_st_o = 1'b1; + st_valid = 1'b0; + state_d = IDLE; + valid_o = 1'b1; + end + + if (flush_i) state_d = IDLE; + end + + // ----------- + // Re-aligner + // ----------- + // re-align the write data to comply with the address offset + always_comb begin + st_be_n = lsu_ctrl_i.be; + // don't shift the data if we are going to perform an AMO as we still need to operate on this data + st_data_n = (CVA6Cfg.RVA && instr_is_amo) ? lsu_ctrl_i.data[riscv::XLEN-1:0] : + data_align(lsu_ctrl_i.vaddr[2:0], {{64 - riscv::XLEN{1'b0}}, lsu_ctrl_i.data}); + st_data_size_n = extract_transfer_size(lsu_ctrl_i.operation); + // save AMO op for next cycle + if (CVA6Cfg.RVA) begin + case (lsu_ctrl_i.operation) + AMO_LRW, AMO_LRD: amo_op_d = AMO_LR; + AMO_SCW, AMO_SCD: amo_op_d = AMO_SC; + AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP; + AMO_ADDW, AMO_ADDD: amo_op_d = AMO_ADD; + AMO_ANDW, AMO_ANDD: amo_op_d = AMO_AND; + AMO_ORW, AMO_ORD: amo_op_d = AMO_OR; + AMO_XORW, AMO_XORD: amo_op_d = AMO_XOR; + AMO_MAXW, AMO_MAXD: amo_op_d = AMO_MAX; + AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU; + AMO_MINW, AMO_MIND: amo_op_d = AMO_MIN; + AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU; + default: amo_op_d = AMO_NONE; + endcase + end else begin + amo_op_d = AMO_NONE; + end + end + + logic store_buffer_valid, amo_buffer_valid; + logic store_buffer_ready, amo_buffer_ready; + + // multiplex between store unit and amo buffer + assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE); + assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE); + + assign st_ready = store_buffer_ready & amo_buffer_ready; + + // --------------- + // Store Queue + // --------------- + store_buffer #( + .CVA6Cfg(CVA6Cfg) + ) store_buffer_i ( + .clk_i, + .rst_ni, + .flush_i, + .stall_st_pending_i, + .no_st_pending_o, + .store_buffer_empty_o, + .page_offset_i, + .page_offset_matches_o, + .commit_i, + .commit_ready_o, + .ready_o (store_buffer_ready), + .valid_i (store_buffer_valid), + // the flush signal can be critical and we need this valid + // signal to check whether the page_offset matches or not, + // functionaly it doesn't make a difference whether we use + // the correct valid signal or not as we are flushing + // the whole pipeline anyway + .valid_without_flush_i(st_valid_without_flush), + .paddr_i, + .rvfi_mem_paddr_o (rvfi_mem_paddr_o), + .data_i (st_data_q), + .be_i (st_be_q), + .data_size_i (st_data_size_q), + .req_port_i (req_port_i), + .req_port_o (req_port_o) + ); + + if (CVA6Cfg.RVA) begin + amo_buffer #( + .CVA6Cfg(CVA6Cfg) + ) i_amo_buffer ( + .clk_i, + .rst_ni, + .flush_i, + .valid_i (amo_buffer_valid), + .ready_o (amo_buffer_ready), + .paddr_i (paddr_i), + .amo_op_i (amo_op_q), + .data_i (st_data_q), + .data_size_i (st_data_size_q), + .amo_req_o (amo_req_o), + .amo_resp_i (amo_resp_i), + .amo_valid_commit_i(amo_valid_commit_i), + .no_st_pending_i (no_st_pending_o) + ); + end else begin + assign amo_buffer_ready = '1; + assign amo_req_o = '0; + end + + // --------------- + // Registers + // --------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + st_be_q <= '0; + st_data_q <= '0; + st_data_size_q <= '0; + trans_id_q <= '0; + amo_op_q <= AMO_NONE; + end else begin + state_q <= state_d; + st_be_q <= st_be_n; + st_data_q <= st_data_n; + trans_id_q <= trans_id_n; + st_data_size_q <= st_data_size_n; + amo_op_q <= amo_op_d; + end + end + +endmodule diff --git a/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv b/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv new file mode 100644 index 00000000..1575595b --- /dev/null +++ b/test/type_param/corev_apu/axi_mem_if/src/axi2mem.sv @@ -0,0 +1,301 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// ---------------------------- +// AXI to SRAM Adapter +// ---------------------------- +// Author: Florian Zaruba (zarubaf@iis.ee.ethz.ch) +// +// Description: Manages AXI transactions +// Supports all burst accesses but only on aligned addresses and with full data width. +// Assertions should guide you if there is something unsupported happening. +// +module axi2mem #( + parameter int unsigned AXI_ID_WIDTH = 10, + parameter int unsigned AXI_ADDR_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned AXI_USER_WIDTH = 10 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + AXI_BUS.Slave slave, + output logic req_o, + output logic we_o, + output logic [AXI_ADDR_WIDTH-1:0] addr_o, + output logic [AXI_DATA_WIDTH/8-1:0] be_o, + output logic [AXI_USER_WIDTH-1:0] user_o, + output logic [AXI_DATA_WIDTH-1:0] data_o, + input logic [AXI_USER_WIDTH-1:0] user_i, + input logic [AXI_DATA_WIDTH-1:0] data_i +); + + // AXI has the following rules governing the use of bursts: + // - for wrapping bursts, the burst length must be 2, 4, 8, or 16 + // - a burst must not cross a 4KB address boundary + // - early termination of bursts is not supported. + typedef enum logic [1:0] { FIXED = 2'b00, INCR = 2'b01, WRAP = 2'b10} axi_burst_t; + + localparam LOG_NR_BYTES = $clog2(AXI_DATA_WIDTH/8); + + typedef struct packed { + logic [AXI_ID_WIDTH-1:0] id; + logic [AXI_ADDR_WIDTH-1:0] addr; + logic [7:0] len; + logic [2:0] size; + axi_burst_t burst; + } ax_req_t; + + // Registers + enum logic [2:0] { IDLE, READ, WRITE, SEND_B, WAIT_WVALID } state_d, state_q; + ax_req_t ax_req_d, ax_req_q; + logic [AXI_ADDR_WIDTH-1:0] req_addr_d, req_addr_q; + logic [7:0] cnt_d, cnt_q; + + function automatic logic [AXI_ADDR_WIDTH-1:0] get_wrap_boundary (input logic [AXI_ADDR_WIDTH-1:0] unaligned_address, input logic [7:0] len); + logic [AXI_ADDR_WIDTH-1:0] warp_address = '0; + // for wrapping transfers ax_len can only be of size 1, 3, 7 or 15 + if (len == 4'b1) + warp_address[AXI_ADDR_WIDTH-1:1+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-1:1+LOG_NR_BYTES]; + else if (len == 4'b11) + warp_address[AXI_ADDR_WIDTH-1:2+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-1:2+LOG_NR_BYTES]; + else if (len == 4'b111) + warp_address[AXI_ADDR_WIDTH-1:3+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-3:2+LOG_NR_BYTES]; + else if (len == 4'b1111) + warp_address[AXI_ADDR_WIDTH-1:4+LOG_NR_BYTES] = unaligned_address[AXI_ADDR_WIDTH-3:4+LOG_NR_BYTES]; + + return warp_address; + endfunction + + logic [AXI_ADDR_WIDTH-1:0] aligned_address; + logic [AXI_ADDR_WIDTH-1:0] wrap_boundary; + logic [AXI_ADDR_WIDTH-1:0] upper_wrap_boundary; + logic [AXI_ADDR_WIDTH-1:0] cons_addr; + + always_comb begin + // address generation + aligned_address = {ax_req_q.addr[AXI_ADDR_WIDTH-1:LOG_NR_BYTES], {{LOG_NR_BYTES}{1'b0}}}; + wrap_boundary = get_wrap_boundary(ax_req_q.addr, ax_req_q.len); + // this will overflow + upper_wrap_boundary = wrap_boundary + ((ax_req_q.len + 1) << LOG_NR_BYTES); + // calculate consecutive address + cons_addr = aligned_address + (cnt_q << LOG_NR_BYTES); + + // Transaction attributes + // default assignments + state_d = state_q; + ax_req_d = ax_req_q; + req_addr_d = req_addr_q; + cnt_d = cnt_q; + // Memory default assignments + data_o = slave.w_data; + user_o = slave.w_user; + be_o = slave.w_strb; + we_o = 1'b0; + req_o = 1'b0; + addr_o = '0; + // AXI assignments + // request + slave.aw_ready = 1'b0; + slave.ar_ready = 1'b0; + // read response channel + slave.r_valid = 1'b0; + slave.r_data = data_i; + slave.r_resp = '0; + slave.r_last = '0; + slave.r_id = ax_req_q.id; + slave.r_user = user_i; + // slave write data channel + slave.w_ready = 1'b0; + // write response channel + slave.b_valid = 1'b0; + slave.b_resp = 1'b0; + slave.b_id = 1'b0; + slave.b_user = 1'b0; + + case (state_q) + + IDLE: begin + // Wait for a read or write + // ------------ + // Read + // ------------ + if (slave.ar_valid) begin + slave.ar_ready = 1'b1; + // sample ax + ax_req_d = {slave.ar_id, slave.ar_addr, slave.ar_len, slave.ar_size, slave.ar_burst}; + state_d = READ; + // we can request the first address, this saves us time + req_o = 1'b1; + addr_o = slave.ar_addr; + // save the address + req_addr_d = slave.ar_addr; + // save the ar_len + cnt_d = 1; + // ------------ + // Write + // ------------ + end else if (slave.aw_valid) begin + slave.aw_ready = 1'b1; + slave.w_ready = 1'b1; + addr_o = slave.aw_addr; + // sample ax + ax_req_d = {slave.aw_id, slave.aw_addr, slave.aw_len, slave.aw_size, slave.aw_burst}; + // we've got our first w_valid so start the write process + if (slave.w_valid) begin + req_o = 1'b1; + we_o = 1'b1; + state_d = (slave.w_last) ? SEND_B : WRITE; + cnt_d = 1; + // we still have to wait for the first w_valid to arrive + end else + state_d = WAIT_WVALID; + end + end + + // ~> we are still missing a w_valid + WAIT_WVALID: begin + slave.w_ready = 1'b1; + addr_o = ax_req_q.addr; + // we can now make our first request + if (slave.w_valid) begin + req_o = 1'b1; + we_o = 1'b1; + state_d = (slave.w_last) ? SEND_B : WRITE; + cnt_d = 1; + end + end + + READ: begin + // keep request to memory high + req_o = 1'b1; + addr_o = req_addr_q; + // send the response + slave.r_valid = 1'b1; + slave.r_data = data_i; + slave.r_user = user_i; + slave.r_id = ax_req_q.id; + slave.r_last = (cnt_q == ax_req_q.len + 1); + + // check that the master is ready, the slave must not wait on this + if (slave.r_ready) begin + // ---------------------------- + // Next address generation + // ---------------------------- + // handle the correct burst type + case (ax_req_q.burst) + FIXED, INCR: addr_o = cons_addr; + WRAP: begin + // check if the address reached warp boundary + if (cons_addr == upper_wrap_boundary) begin + addr_o = wrap_boundary; + // address warped beyond boundary + end else if (cons_addr > upper_wrap_boundary) begin + addr_o = ax_req_q.addr + ((cnt_q - ax_req_q.len) << LOG_NR_BYTES); + // we are still in the incremental regime + end else begin + addr_o = cons_addr; + end + end + endcase + // we need to change the address here for the upcoming request + // we sent the last byte -> go back to idle + if (slave.r_last) begin + state_d = IDLE; + // we already got everything + req_o = 1'b0; + end + // save the request address for the next cycle + req_addr_d = addr_o; + // we can decrease the counter as the master has consumed the read data + cnt_d = cnt_q + 1; + // TODO: configure correct byte-lane + end + end + // ~> we already wrote the first word here + WRITE: begin + + slave.w_ready = 1'b1; + + // consume a word here + if (slave.w_valid) begin + req_o = 1'b1; + we_o = 1'b1; + // ---------------------------- + // Next address generation + // ---------------------------- + // handle the correct burst type + case (ax_req_q.burst) + + FIXED, INCR: addr_o = cons_addr; + WRAP: begin + // check if the address reached warp boundary + if (cons_addr == upper_wrap_boundary) begin + addr_o = wrap_boundary; + // address warped beyond boundary + end else if (cons_addr > upper_wrap_boundary) begin + addr_o = ax_req_q.addr + ((cnt_q - ax_req_q.len) << LOG_NR_BYTES); + // we are still in the incremental regime + end else begin + addr_o = cons_addr; + end + end + endcase + // save the request address for the next cycle + req_addr_d = addr_o; + // we can decrease the counter as the master has consumed the read data + cnt_d = cnt_q + 1; + + if (slave.w_last) + state_d = SEND_B; + end + end + // ~> send a write acknowledge back + SEND_B: begin + slave.b_valid = 1'b1; + slave.b_id = ax_req_q.id; + if (slave.b_ready) + state_d = IDLE; + end + + endcase + end + + `ifndef SYNTHESIS + `ifndef VERILATOR + // assert that only full data lane transfers allowed + // assert property ( + // @(posedge clk_i) slave.aw_valid |-> (slave.aw_size == LOG_NR_BYTES)) else $fatal ("Only full data lane transfers allowed"); + // assert property ( + // @(posedge clk_i) slave.ar_valid |-> (slave.ar_size == LOG_NR_BYTES)) else $fatal ("Only full data lane transfers allowed"); + // assert property ( + // @(posedge clk_i) slave.aw_valid |-> (slave.ar_addr[LOG_NR_BYTES-1:0] == '0)) else $fatal ("Unaligned accesses are not allowed at the moment"); + // assert property ( + // @(posedge clk_i) slave.ar_valid |-> (slave.aw_addr[LOG_NR_BYTES-1:0] == '0)) else $fatal ("Unaligned accesses are not allowed at the moment"); + `endif + `endif + // -------------- + // Registers + // -------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + ax_req_q <= '0; + req_addr_q <= '0; + cnt_q <= '0; + end else begin + state_q <= state_d; + ax_req_q <= ax_req_d; + req_addr_q <= req_addr_d; + cnt_q <= cnt_d; + end + end +endmodule + + diff --git a/test/type_param/corev_apu/bootrom/bootrom.sv b/test/type_param/corev_apu/bootrom/bootrom.sv new file mode 100644 index 00000000..58ba804b --- /dev/null +++ b/test/type_param/corev_apu/bootrom/bootrom.sv @@ -0,0 +1,225 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: $filename.v + * + * Description: Auto-generated bootrom + */ + +// Auto-generated code +module bootrom ( + input logic clk_i, + input logic req_i, + input logic [63:0] addr_i, + output logic [63:0] rdata_o +); + localparam int RomSize = 186; + + const logic [RomSize-1:0][63:0] mem = { + 64'h00000000_00000068, + 64'h74646977_2d6f692d, + 64'h67657200_74666968, + 64'h732d6765_72007374, + 64'h70757272_65746e69, + 64'h00646565_70732d74, + 64'h6e657272_75630073, + 64'h656d616e_2d676572, + 64'h00646564_6e657478, + 64'h652d7374_70757272, + 64'h65746e69_00736567, + 64'h6e617200_656c646e, + 64'h61687000_72656c6c, + 64'h6f72746e_6f632d74, + 64'h70757272_65746e69, + 64'h00736c6c_65632d74, + 64'h70757272_65746e69, + 64'h23007469_6c70732d, + 64'h626c7400_65707974, + 64'h2d756d6d_00617369, + 64'h2c766373_69720073, + 64'h75746174_73006765, + 64'h72006570_79745f65, + 64'h63697665_64007963, + 64'h6e657571_6572662d, + 64'h6b636f6c_63007963, + 64'h6e657571_6572662d, + 64'h65736162_656d6974, + 64'h006c6564_6f6d0065, + 64'h6c626974_61706d6f, + 64'h6300736c_6c65632d, + 64'h657a6973_2300736c, + 64'h6c65632d_73736572, + 64'h64646123_09000000, + 64'h02000000_02000000, + 64'h02000000_006c6f72, + 64'h746e6f63_cc000000, + 64'h08000000_03000000, + 64'h00100000_00000000, + 64'h00000018_00000000, + 64'h5b000000_10000000, + 64'h03000000_07000000, + 64'h06000000_05000000, + 64'h04000000_e4000000, + 64'h10000000_03000000, + 64'h00007265_6d69745f, + 64'h6270612c_706c7570, + 64'h1b000000_0f000000, + 64'h03000000_00003030, + 64'h30303030_38314072, + 64'h656d6974_01000000, + 64'h02000000_04000000, + 64'hf9000000_04000000, + 64'h03000000_02000000, + 64'hef000000_04000000, + 64'h03000000_01000000, + 64'he4000000_04000000, + 64'h03000000_00c20100, + 64'hd6000000_04000000, + 64'h03000000_80f0fa02, + 64'h3f000000_04000000, + 64'h03000000_00100000, + 64'h00000000_00000010, + 64'h00000000_5b000000, + 64'h10000000_03000000, + 64'h00000000_61303535, + 64'h3631736e_1b000000, + 64'h09000000_03000000, + 64'h00000030_30303030, + 64'h30303140_74726175, + 64'h01000000_02000000, + 64'h006c6f72_746e6f63, + 64'hcc000000_08000000, + 64'h03000000_00000c00, + 64'h00000000_00000002, + 64'h00000000_5b000000, + 64'h10000000_03000000, + 64'h07000000_01000000, + 64'h03000000_01000000, + 64'hb8000000_10000000, + 64'h03000000_00000000, + 64'h30746e69_6c632c76, + 64'h63736972_1b000000, + 64'h0d000000_03000000, + 64'h00000030_30303030, + 64'h30324074_6e696c63, + 64'h01000000_b1000000, + 64'h00000000_03000000, + 64'h00007375_622d656c, + 64'h706d6973_00636f73, + 64'h2d657261_622d656e, + 64'h61697261_2c687465, + 64'h1b000000_1f000000, + 64'h03000000_02000000, + 64'h0f000000_04000000, + 64'h03000000_02000000, + 64'h00000000_04000000, + 64'h03000000_00636f73, + 64'h01000000_02000000, + 64'h00000010_00000000, + 64'h00000080_00000000, + 64'h5b000000_10000000, + 64'h03000000_00007972, + 64'h6f6d656d_4f000000, + 64'h07000000_03000000, + 64'h00303030_30303030, + 64'h38407972_6f6d656d, + 64'h01000000_02000000, + 64'h02000000_02000000, + 64'h01000000_a9000000, + 64'h04000000_03000000, + 64'h00006374_6e692d75, + 64'h70632c76_63736972, + 64'h1b000000_0f000000, + 64'h03000000_94000000, + 64'h00000000_03000000, + 64'h01000000_83000000, + 64'h04000000_03000000, + 64'h00000000_72656c6c, + 64'h6f72746e_6f632d74, + 64'h70757272_65746e69, + 64'h01000000_79000000, + 64'h00000000_03000000, + 64'h00003933_76732c76, + 64'h63736972_70000000, + 64'h0b000000_03000000, + 64'h00006364_66616d69, + 64'h34367672_66000000, + 64'h0b000000_03000000, + 64'h00000076_63736972, + 64'h00656e61_69726120, + 64'h2c687465_1b000000, + 64'h12000000_03000000, + 64'h00000000_79616b6f, + 64'h5f000000_05000000, + 64'h03000000_00000000, + 64'h5b000000_04000000, + 64'h03000000_00757063, + 64'h4f000000_04000000, + 64'h03000000_80f0fa02, + 64'h3f000000_04000000, + 64'h03000000_00000030, + 64'h40757063_01000000, + 64'h00800000_2c000000, + 64'h04000000_03000000, + 64'h00000000_0f000000, + 64'h04000000_03000000, + 64'h01000000_00000000, + 64'h04000000_03000000, + 64'h00000000_73757063, + 64'h01000000_00657261, + 64'h622d656e_61697261, + 64'h2c687465_26000000, + 64'h10000000_03000000, + 64'h00766564_2d657261, + 64'h622d656e_61697261, + 64'h2c687465_1b000000, + 64'h14000000_03000000, + 64'h02000000_0f000000, + 64'h04000000_03000000, + 64'h02000000_00000000, + 64'h04000000_03000000, + 64'h00000000_01000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h0c040000_06010000, + 64'h00000000_10000000, + 64'h11000000_28000000, + 64'h44040000_38000000, + 64'h4a050000_edfe0dd0, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_ffdff06f, + 64'h10500073_03c58593, + 64'h00000597_f1402573, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00000000_00000000, + 64'h00040067_07458593, + 64'h00000597_f1402573, + 64'h01f41413_00100413 + }; + + logic [$clog2(RomSize)-1:0] addr_q; + + always_ff @(posedge clk_i) begin + if (req_i) begin + addr_q <= addr_i[$clog2(RomSize)-1+3:3]; + end + end + + // this prevents spurious Xes from propagating into + // the speculative fetch stage of the core + assign rdata_o = (addr_q < RomSize) ? mem[addr_q] : '0; +endmodule diff --git a/test/type_param/corev_apu/clint/axi_lite_interface.sv b/test/type_param/corev_apu/clint/axi_lite_interface.sv new file mode 100644 index 00000000..c431dc02 --- /dev/null +++ b/test/type_param/corev_apu/clint/axi_lite_interface.sv @@ -0,0 +1,170 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 17/07/2017 +// Description: AXI Lite compatible interface +// + +module axi_lite_interface #( + parameter int unsigned AXI_ADDR_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned AXI_ID_WIDTH = 10, + parameter type axi_req_t = ariane_axi::req_t, + parameter type axi_resp_t = ariane_axi::resp_t +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input axi_req_t axi_req_i, + output axi_resp_t axi_resp_o, + + output logic [AXI_ADDR_WIDTH-1:0] address_o, + output logic en_o, // transaction is valid + output logic we_o, // write + output logic [(AXI_DATA_WIDTH/8)-1:0] be_o, // byte enable write + input logic [AXI_DATA_WIDTH-1:0] data_i, // data + output logic [AXI_DATA_WIDTH-1:0] data_o +); + + // The RLAST signal is not required, and is considered asserted for every transfer on the read data channel. + enum logic [1:0] { IDLE, READ, WRITE, WRITE_B } state_q, state_d; + // save the trans id, we will need it for reflection otherwise we are not plug compatible to the AXI standard + logic [AXI_ID_WIDTH-1:0] trans_id_n, trans_id_q; + // address register + logic [AXI_ADDR_WIDTH-1:0] address_n, address_q; + + // pass through read data on the read data channel + assign axi_resp_o.r.data = data_i; + // send back the transaction id we've latched + assign axi_resp_o.r.id = trans_id_q; + assign axi_resp_o.b.id = trans_id_q; + // set r_last to one as defined by the AXI4 - Lite standard + assign axi_resp_o.r.last = 1'b1; + // we do not support any errors so set response flag to all zeros + assign axi_resp_o.b.resp = 2'b0; + assign axi_resp_o.r.resp = 2'b0; + // output data which we want to write to the slave + assign data_o = axi_req_i.w.data; + assign be_o = axi_req_i.w.strb; + // ------------------------ + // AXI4-Lite State Machine + // ------------------------ + always_comb begin + // default signal assignment + state_d = state_q; + address_n = address_q; + trans_id_n = trans_id_q; + + // we'll answer a write request only if we got address and data + axi_resp_o.aw_ready = 1'b0; + axi_resp_o.w_ready = 1'b0; + axi_resp_o.b_valid = 1'b0; + + axi_resp_o.ar_ready = 1'b0; + axi_resp_o.r_valid = 1'b0; + + address_o = '0; + we_o = 1'b0; + en_o = 1'b0; + + case (state_q) + // we are ready to accept a new request + IDLE: begin + // we've git a valid write request, we also know that we have asserted the aw_ready + if (axi_req_i.aw_valid) begin + axi_resp_o.aw_ready = 1'b1; + // this costs performance but the interconnect does not obey the AXI standard + // e.g.: we could wait for aw_valid && w_valid to do the transaction. + state_d = WRITE; + // save address + address_n = axi_req_i.aw.addr; + // save the transaction id for reflection + trans_id_n = axi_req_i.aw.id; + + // we've got a valid read request, we also know that we have asserted the ar_ready + end else if (axi_req_i.ar_valid) begin + axi_resp_o.ar_ready = 1'b1; + state_d = READ; + // save address + address_n = axi_req_i.ar.addr; + // save the transaction id for reflection + trans_id_n = axi_req_i.ar.id; + + end + end + // We've got a read request at least one cycle earlier + // so data_i will already contain the data we'd like tor read + READ: begin + // enable the ram-like + en_o = 1'b1; + // further assert the correct address + address_o = address_q; + // the read is valid + axi_resp_o.r_valid = 1'b1; + // check if we got a valid r_ready and go back to IDLE + if (axi_req_i.r_ready) + state_d = IDLE; + end + // We've got a write request at least one cycle earlier + // wait here for the data + WRITE: begin + if (axi_req_i.w_valid) begin + axi_resp_o.w_ready = 1'b1; + // use the latched address + address_o = address_q; + en_o = 1'b1; + we_o = 1'b1; + // close this request + state_d = WRITE_B; + end + end + + WRITE_B: begin + axi_resp_o.b_valid = 1'b1; + // we've already performed the write here so wait for the ready signal + if (axi_req_i.b_ready) + state_d = IDLE; + end + default:; + + endcase + end + + // ------------------------ + // Registers + // ------------------------ + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + state_q <= IDLE; + address_q <= '0; + trans_id_q <= '0; + end else begin + state_q <= state_d; + address_q <= address_n; + trans_id_q <= trans_id_n; + end + end + + // ------------------------ + // Assertions + // ------------------------ + // Listen for illegal transactions + //pragma translate_off + `ifndef VERILATOR + // check that burst length is just one + assert property (@(posedge clk_i) axi_req_i.ar_valid |-> ((axi_req_i.ar.len == 8'b0))) + else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end + // do the same for the write channel + assert property (@(posedge clk_i) axi_req_i.aw_valid |-> ((axi_req_i.aw.len == 8'b0))) + else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end + `endif + //pragma translate_on +endmodule diff --git a/test/type_param/corev_apu/clint/clint.sv b/test/type_param/corev_apu/clint/clint.sv new file mode 100644 index 00000000..e76f96d8 --- /dev/null +++ b/test/type_param/corev_apu/clint/clint.sv @@ -0,0 +1,294 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 15/07/2017 +// Description: A RISC-V privilege spec 1.11 (WIP) compatible CLINT (core local interrupt controller) +// + +// Platforms provide a real-time counter, exposed as a memory-mapped machine-mode register, mtime. mtime must run at +// constant frequency, and the platform must provide a mechanism for determining the timebase of mtime (device tree). + +module clint #( + parameter int unsigned AXI_ADDR_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned AXI_ID_WIDTH = 10, + parameter int unsigned NR_CORES = 1, // Number of cores therefore also the number of timecmp registers and timer interrupts + parameter type axi_req_t = ariane_axi::req_t, + parameter type axi_resp_t = ariane_axi::resp_t +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, + input axi_req_t axi_req_i, + output axi_resp_t axi_resp_o, + input logic rtc_i, // Real-time clock in (usually 32.768 kHz) + output logic [NR_CORES-1:0] timer_irq_o, // Timer interrupts + output logic [NR_CORES-1:0] ipi_o // software interrupt (a.k.a inter-process-interrupt) +); + // register offset + localparam logic [15:0] MSIP_BASE = 16'h0; + localparam logic [15:0] MTIMECMP_BASE = 16'h4000; + localparam logic [15:0] MTIME_BASE = 16'hbff8; + + localparam AddrSelWidth = (NR_CORES == 1) ? 1 : $clog2(NR_CORES); + + // signals from AXI 4 Lite + logic [AXI_ADDR_WIDTH-1:0] address; + logic en; + logic we; + logic [7:0] be; + logic [63:0] wdata; + logic [63:0] rdata; + + + // bit 11 and 10 are determining the address offset + logic [15:0] register_address; + assign register_address = address[15:0]; + // actual registers + logic [63:0] mtime_n, mtime_q; + logic [NR_CORES-1:0][63:0] mtimecmp_n, mtimecmp_q; + logic [NR_CORES-1:0] msip_n, msip_q; + // increase the timer + logic increase_timer; + + // ----------------------------- + // AXI Interface Logic + // ----------------------------- + axi_lite_interface #( + .AXI_ADDR_WIDTH ( AXI_ADDR_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .axi_req_t ( axi_req_t ), + .axi_resp_t ( axi_resp_t ) + ) axi_lite_interface_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .axi_req_i ( axi_req_i ), + .axi_resp_o ( axi_resp_o ), + .address_o ( address ), + .en_o ( en ), + .we_o ( we ), + .be_o ( be ), + .data_i ( rdata ), + .data_o ( wdata ) + ); + + // ----------------------------- + // Register Update Logic + // ----------------------------- + // APB register write logic + always_comb begin + mtime_n = mtime_q; + mtimecmp_n = mtimecmp_q; + msip_n = msip_q; + // RTC says we should increase the timer + if (increase_timer) + mtime_n = mtime_q + 1; + + // written from APB bus - gets priority + if (en && we) begin + case (register_address) inside + [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin + msip_n[$unsigned(address[AddrSelWidth-1+2:2])] = wdata[32*address[2]]; + end + + [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin + if (riscv::XLEN == 32) begin + if (be[3:0] == 4'hf) + mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])][31:0] = wdata[31:0]; + else + mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])][63:32] = wdata[63:32]; + + end else begin + mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])] = wdata; + end + end + + [MTIME_BASE:MTIME_BASE+4]: begin + if (riscv::XLEN == 32) begin + if (address[2:0] == 3'h0) + mtime_n[31:0] = wdata[31:0]; + else begin + if (address[2:0] == 3'h4) + mtime_n[63:32] = wdata[63:32]; + end + end else begin + mtime_n = wdata; + end + end + default:; + endcase + end + end + + // APB register read logic + always_comb begin + rdata = 'b0; + + if (en && !we) begin + case (register_address) inside + [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin + if (riscv::XLEN == 32) + rdata[31:0] = msip_q[$unsigned(address[AddrSelWidth-1+2:2])]; + else + rdata = msip_q[$unsigned(address[AddrSelWidth-1+2:2])]; + end + + [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin + if (riscv::XLEN == 32) begin + if (address[2:0] == 3'h0) + rdata[31:0] = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])][31:0]; + else begin + if (address[2:0] == 3'h4) + rdata[63:32] = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])][63:32]; + end + + end else begin + rdata = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])]; + end + end + + [MTIME_BASE:MTIME_BASE+4]: begin + if (riscv::XLEN == 32) begin + if (address[2:0] == 3'h0) + rdata[31:0] = mtime_q[31:0]; + else begin + if (address[2:0] == 3'h4) + rdata[63:32] = mtime_q[63:32]; + end + end else begin + rdata = mtime_q; + end + end + default:; + endcase + end + end + + // ----------------------------- + // IRQ Generation + // ----------------------------- + // The mtime register has a 64-bit precision on all RV32, RV64, and RV128 systems. Platforms provide a 64-bit + // memory-mapped machine-mode timer compare register (mtimecmp), which causes a timer interrupt to be posted when the + // mtime register contains a value greater than or equal (mtime >= mtimecmp) to the value in the mtimecmp register. + // The interrupt remains posted until it is cleared by writing the mtimecmp register. The interrupt will only be taken + // if interrupts are enabled and the MTIE bit is set in the mie register. + always_comb begin : irq_gen + // check that the mtime cmp register is set to a meaningful value + for (int unsigned i = 0; i < NR_CORES; i++) begin + if (mtime_q >= mtimecmp_q[i]) begin + timer_irq_o[i] = 1'b1; + end else begin + timer_irq_o[i] = 1'b0; + end + end + end + + // ----------------------------- + // RTC time tracking facilities + // ----------------------------- + // 1. Put the RTC input through a classic two stage edge-triggered synchronizer to filter out any + // metastability effects (or at least make them unlikely :-)) + clint_sync_wedge i_sync_edge ( + .clk_i, + .rst_ni, + .serial_i ( rtc_i ), + .r_edge_o ( increase_timer ), + .f_edge_o ( ), // left open + .serial_o ( ) // left open + ); + + // Registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mtime_q <= 64'b0; + mtimecmp_q <= 'b0; + msip_q <= '0; + end else begin + mtime_q <= mtime_n; + mtimecmp_q <= mtimecmp_n; + msip_q <= msip_n; + end + end + + assign ipi_o = msip_q; + + // ------------- + // Assertions + // -------------- + //pragma translate_off + `ifndef VERILATOR + // Static assertion check for appropriate bus width + initial begin + assert (AXI_DATA_WIDTH == 64) else $fatal(1, "Timer needs to interface with a 64 bit bus, everything else is not supported"); + end + `endif + //pragma translate_on + +endmodule + +// TODO(zarubaf): Replace by common-cells 2.0 +module clint_sync_wedge #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic serial_i, + output logic r_edge_o, + output logic f_edge_o, + output logic serial_o +); + logic serial, serial_q; + + assign serial_o = serial_q; + assign f_edge_o = (~serial) & serial_q; + assign r_edge_o = serial & (~serial_q); + + clint_sync #( + .STAGES (STAGES) + ) i_sync ( + .clk_i, + .rst_ni, + .serial_i, + .serial_o ( serial ) + ); + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + serial_q <= 1'b0; + end else begin + serial_q <= serial; + end + end +endmodule + +module clint_sync #( + parameter int unsigned STAGES = 2 +) ( + input logic clk_i, + input logic rst_ni, + input logic serial_i, + output logic serial_o +); + + logic [STAGES-1:0] reg_q; + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + reg_q <= 'h0; + end else begin + reg_q <= {reg_q[STAGES-2:0], serial_i}; + end + end + + assign serial_o = reg_q[STAGES-1]; + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv b/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv new file mode 100644 index 00000000..90134cae --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/apb_timer/apb_timer.sv @@ -0,0 +1,88 @@ +// Copyright 2015 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`define REGS_MAX_ADR 2'd2 + +module apb_timer +#( + parameter APB_ADDR_WIDTH = 12, //APB slaves are 4KB by default + parameter TIMER_CNT = 2 // how many timers should be instantiated +) +( + input logic HCLK, + input logic HRESETn, + input logic [APB_ADDR_WIDTH-1:0] PADDR, + input logic [31:0] PWDATA, + input logic PWRITE, + input logic PSEL, + input logic PENABLE, + output logic [31:0] PRDATA, + output logic PREADY, + output logic PSLVERR, + + output logic [(TIMER_CNT * 2) - 1:0] irq_o // overflow and cmp interrupt +); + + logic [TIMER_CNT-1:0] psel_int, pready, pslverr; + logic [$clog2(TIMER_CNT) - 1:0] slave_address_int; + logic [TIMER_CNT-1:0] [31:0] prdata; + + assign slave_address_int = PADDR[$clog2(TIMER_CNT)+ `REGS_MAX_ADR + 1:`REGS_MAX_ADR + 2]; + + always_comb + begin + psel_int = '0; + psel_int[slave_address_int] = PSEL; + end + + // output mux + always_comb + begin + + if (psel_int != '0) + begin + PRDATA = prdata[slave_address_int]; + PREADY = pready[slave_address_int]; + PSLVERR = pslverr[slave_address_int]; + end + else + begin + PRDATA = '0; + PREADY = 1'b1; + PSLVERR = 1'b0; + end + end + + + genvar k; + + generate + for(k = 0; k < TIMER_CNT; k++) + begin : TIMER_GEN + timer #( + .APB_ADDR_WIDTH ( APB_ADDR_WIDTH ) + ) timer_i ( + .HCLK ( HCLK ), + .HRESETn ( HRESETn ), + + .PADDR ( PADDR ), + .PWDATA ( PWDATA ), + .PWRITE ( PWRITE ), + .PSEL ( psel_int[k] ), + .PENABLE ( PENABLE ), + .PRDATA ( prdata[k] ), + .PREADY ( pready[k] ), + .PSLVERR ( pslverr[k] ), + + .irq_o ( irq_o[2*k+1 : 2*k] ) + ); + end +endgenerate +endmodule diff --git a/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv b/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv new file mode 100644 index 00000000..1a3a4f18 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/apb_timer/timer.sv @@ -0,0 +1,145 @@ +// Copyright 2015 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// define three registers per timer - timer, cmp and prescaler registers +`define REGS_MAX_IDX 'd2 +`define REG_TIMER 2'b00 +`define REG_TIMER_CTRL 2'b01 +`define REG_CMP 2'b10 +`define PRESCALER_STARTBIT 'd3 +`define PRESCALER_STOPBIT 'd5 +`define ENABLE_BIT 'd0 + +module timer +#( + parameter APB_ADDR_WIDTH = 12 //APB slaves are 4KB by default +) +( + input logic HCLK, + input logic HRESETn, + input logic [APB_ADDR_WIDTH-1:0] PADDR, + input logic [31:0] PWDATA, + input logic PWRITE, + input logic PSEL, + input logic PENABLE, + output logic [31:0] PRDATA, + output logic PREADY, + output logic PSLVERR, + + output logic [1:0] irq_o // overflow and cmp interrupt +); + + // APB register interface + logic [`REGS_MAX_IDX-1:0] register_adr; + assign register_adr = PADDR[`REGS_MAX_IDX + 2:2]; + // APB logic: we are always ready to capture the data into our regs + // not supporting transfare failure + assign PREADY = 1'b1; + assign PSLVERR = 1'b0; + // registers + logic [0:`REGS_MAX_IDX] [31:0] regs_q, regs_n; + logic [31:0] cycle_counter_n, cycle_counter_q; + + logic [2:0] prescaler_int; + + //irq logic + always_comb + begin + irq_o = 2'b0; + + // overlow irq + if (regs_q[`REG_TIMER] == 32'hffff_ffff) + irq_o[0] = 1'b1; + + // compare match irq if compare reg ist set + if (regs_q[`REG_CMP] != 'b0 && regs_q[`REG_TIMER] == regs_q[`REG_CMP]) + irq_o[1] = 1'b1; + + end + + assign prescaler_int = regs_q[`REG_TIMER_CTRL][`PRESCALER_STOPBIT:`PRESCALER_STARTBIT]; + // register write logic + always_comb + begin + regs_n = regs_q; + cycle_counter_n = cycle_counter_q + 1; + + // reset timer after cmp or overflow + if (irq_o[0] == 1'b1 || irq_o[1] == 1'b1) + regs_n[`REG_TIMER] = 1'b0; + else if(regs_q[`REG_TIMER_CTRL][`ENABLE_BIT] && prescaler_int != 'b0 && prescaler_int == cycle_counter_q) // prescaler + begin + regs_n[`REG_TIMER] = regs_q[`REG_TIMER] + 1; //prescaler mode + end + else if (regs_q[`REG_TIMER_CTRL][`ENABLE_BIT] && regs_q[`REG_TIMER_CTRL][`PRESCALER_STOPBIT:`PRESCALER_STARTBIT] == 'b0) // normal count mode + regs_n[`REG_TIMER] = regs_q[`REG_TIMER] + 1; + + // reset prescaler cycle counter + if (cycle_counter_q >= regs_q[`REG_TIMER_CTRL]) + cycle_counter_n = 32'b0; + + // written from APB bus - gets priority + if (PSEL && PENABLE && PWRITE) + begin + + case (register_adr) + `REG_TIMER: + regs_n[`REG_TIMER] = PWDATA; + + `REG_TIMER_CTRL: + regs_n[`REG_TIMER_CTRL] = PWDATA; + + `REG_CMP: + begin + regs_n[`REG_CMP] = PWDATA; + regs_n[`REG_TIMER] = 32'b0; // reset timer if compare register is written + end + endcase + end + end + + // APB register read logic + always_comb + begin + PRDATA = 'b0; + + if (PSEL && PENABLE && !PWRITE) + begin + + case (register_adr) + `REG_TIMER: + PRDATA = regs_q[`REG_TIMER]; + + `REG_TIMER_CTRL: + PRDATA = regs_q[`REG_TIMER_CTRL]; + + `REG_CMP: + PRDATA = regs_q[`REG_CMP]; + endcase + + end + end + // synchronouse part + always_ff @(posedge HCLK, negedge HRESETn) + begin + if(~HRESETn) + begin + regs_q <= '{default: 32'b0}; + cycle_counter_q <= 32'b0; + end + else + begin + regs_q <= regs_n; + cycle_counter_q <= cycle_counter_n; + end + end + + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv new file mode 100644 index 00000000..ceaa312d --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb.sv @@ -0,0 +1,449 @@ +// Copyright 2014-2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Igor Loi +// Davide Rossi +// Florian Zaruba + +`define OKAY 2'b00 +`define EXOKAY 2'b01 +`define SLVERR 2'b10 +`define DECERR 2'b11 + +module axi2apb +#( + parameter AXI4_ADDRESS_WIDTH = 32, + parameter AXI4_RDATA_WIDTH = 32, + parameter AXI4_WDATA_WIDTH = 32, + parameter AXI4_ID_WIDTH = 16, + parameter AXI4_USER_WIDTH = 10, + parameter AXI_NUMBYTES = AXI4_WDATA_WIDTH/8, + + parameter BUFF_DEPTH_SLAVE = 4, + parameter APB_ADDR_WIDTH = 32 +) +( + input logic ACLK, + input logic ARESETn, + input logic test_en_i, + + input logic [AXI4_ID_WIDTH-1:0] AWID_i, + input logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR_i, + input logic [ 7:0] AWLEN_i, + input logic [ 2:0] AWSIZE_i, + input logic [ 1:0] AWBURST_i, + input logic AWLOCK_i, + input logic [ 3:0] AWCACHE_i, + input logic [ 2:0] AWPROT_i, + input logic [ 3:0] AWREGION_i, + input logic [ AXI4_USER_WIDTH-1:0] AWUSER_i, + input logic [ 3:0] AWQOS_i, + input logic AWVALID_i, + output logic AWREADY_o, + + input logic [AXI4_WDATA_WIDTH-1:0] WDATA_i, + input logic [AXI_NUMBYTES-1:0] WSTRB_i, + input logic WLAST_i, + input logic [AXI4_USER_WIDTH-1:0] WUSER_i, + input logic WVALID_i, + output logic WREADY_o, + + output logic [AXI4_ID_WIDTH-1:0] BID_o, + output logic [ 1:0] BRESP_o, + output logic BVALID_o, + output logic [AXI4_USER_WIDTH-1:0] BUSER_o, + input logic BREADY_i, + + input logic [AXI4_ID_WIDTH-1:0] ARID_i, + input logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR_i, + input logic [ 7:0] ARLEN_i, + input logic [ 2:0] ARSIZE_i, + input logic [ 1:0] ARBURST_i, + input logic ARLOCK_i, + input logic [ 3:0] ARCACHE_i, + input logic [ 2:0] ARPROT_i, + input logic [ 3:0] ARREGION_i, + input logic [ AXI4_USER_WIDTH-1:0] ARUSER_i, + input logic [ 3:0] ARQOS_i, + input logic ARVALID_i, + output logic ARREADY_o, + + output logic [AXI4_ID_WIDTH-1:0] RID_o, + output logic [AXI4_RDATA_WIDTH-1:0] RDATA_o, + output logic [ 1:0] RRESP_o, + output logic RLAST_o, + output logic [AXI4_USER_WIDTH-1:0] RUSER_o, + output logic RVALID_o, + input logic RREADY_i, + + output logic PENABLE, + output logic PWRITE, + output logic [APB_ADDR_WIDTH-1:0] PADDR, + output logic PSEL, + output logic [AXI4_WDATA_WIDTH-1:0] PWDATA, + input logic [AXI4_RDATA_WIDTH-1:0] PRDATA, + input logic PREADY, + input logic PSLVERR +); + + // -------------------- + // AXI write address bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] AWID; + logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR; + logic [ 7:0] AWLEN; + logic [ 2:0] AWSIZE; + logic [ 1:0] AWBURST; + logic AWLOCK; + logic [ 3:0] AWCACHE; + logic [ 2:0] AWPROT; + logic [ 3:0] AWREGION; + logic [ AXI4_USER_WIDTH-1:0] AWUSER; + logic [ 3:0] AWQOS; + logic AWVALID; + logic AWREADY; + // -------------------- + // AXI write data bus + // -------------------- + logic [AXI4_WDATA_WIDTH-1:0] WDATA; // from FIFO + logic [AXI_NUMBYTES-1:0] WSTRB; // from FIFO + logic WLAST; // from FIFO + logic [AXI4_USER_WIDTH-1:0] WUSER; // from FIFO + logic WVALID; // from FIFO + logic WREADY; // TO FIFO + // -------------------- + // AXI write response bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] BID; + logic [ 1:0] BRESP; + logic BVALID; + logic [AXI4_USER_WIDTH-1:0] BUSER; + logic BREADY; + // -------------------- + // AXI read address bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] ARID; + logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR; + logic [ 7:0] ARLEN; + logic [ 2:0] ARSIZE; + logic [ 1:0] ARBURST; + logic ARLOCK; + logic [ 3:0] ARCACHE; + logic [ 2:0] ARPROT; + logic [ 3:0] ARREGION; + logic [ AXI4_USER_WIDTH-1:0] ARUSER; + logic [ 3:0] ARQOS; + logic ARVALID; + logic ARREADY; + // -------------------- + // AXI read data bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] RID; + logic [AXI4_RDATA_WIDTH-1:0] RDATA; + logic [ 1:0] RRESP; + logic RLAST; + logic [AXI4_USER_WIDTH-1:0] RUSER; + logic RVALID; + logic RREADY; + + enum logic [2:0] { IDLE, + DONE_SINGLE_RD, + WAIT_W_PREADY, + WAIT_R_PREADY, + SEND_B_RESP + } CS, NS; + + logic [AXI4_ADDRESS_WIDTH-1:0] address; + logic sample_RDATA; + + logic [AXI4_RDATA_WIDTH-1:0] RDATA_Q; + + logic read_req; + logic write_req; + + assign PENABLE = write_req | read_req; + assign PWRITE = write_req; + assign PADDR = address[APB_ADDR_WIDTH-1:0]; + assign PWDATA = WDATA; + assign PSEL = 1'b1; + + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_aw_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .ADDR_WIDTH ( AXI4_ADDRESS_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_aw_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( AWVALID_i ), + .slave_addr_i ( AWADDR_i ), + .slave_prot_i ( AWPROT_i ), + .slave_region_i ( AWREGION_i ), + .slave_len_i ( AWLEN_i ), + .slave_size_i ( AWSIZE_i ), + .slave_burst_i ( AWBURST_i ), + .slave_lock_i ( AWLOCK_i ), + .slave_cache_i ( AWCACHE_i ), + .slave_qos_i ( AWQOS_i ), + .slave_id_i ( AWID_i ), + .slave_user_i ( AWUSER_i ), + .slave_ready_o ( AWREADY_o ), + + .master_valid_o ( AWVALID ), + .master_addr_o ( AWADDR ), + .master_prot_o ( AWPROT ), + .master_region_o ( AWREGION ), + .master_len_o ( AWLEN ), + .master_size_o ( AWSIZE ), + .master_burst_o ( AWBURST ), + .master_lock_o ( AWLOCK ), + .master_cache_o ( AWCACHE ), + .master_qos_o ( AWQOS ), + .master_id_o ( AWID ), + .master_user_o ( AWUSER ), + .master_ready_i ( AWREADY ) + ); + + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_ar_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .ADDR_WIDTH ( AXI4_ADDRESS_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_ar_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( ARVALID_i ), + .slave_addr_i ( ARADDR_i ), + .slave_prot_i ( ARPROT_i ), + .slave_region_i ( ARREGION_i ), + .slave_len_i ( ARLEN_i ), + .slave_size_i ( ARSIZE_i ), + .slave_burst_i ( ARBURST_i ), + .slave_lock_i ( ARLOCK_i ), + .slave_cache_i ( ARCACHE_i ), + .slave_qos_i ( ARQOS_i ), + .slave_id_i ( ARID_i ), + .slave_user_i ( ARUSER_i ), + .slave_ready_o ( ARREADY_o ), + + .master_valid_o ( ARVALID ), + .master_addr_o ( ARADDR ), + .master_prot_o ( ARPROT ), + .master_region_o ( ARREGION ), + .master_len_o ( ARLEN ), + .master_size_o ( ARSIZE ), + .master_burst_o ( ARBURST ), + .master_lock_o ( ARLOCK ), + .master_cache_o ( ARCACHE ), + .master_qos_o ( ARQOS ), + .master_id_o ( ARID ), + .master_user_o ( ARUSER ), + .master_ready_i ( ARREADY ) + ); + + + axi_w_buffer #( + .DATA_WIDTH(AXI4_WDATA_WIDTH), + .USER_WIDTH(AXI4_USER_WIDTH), + .BUFFER_DEPTH(BUFF_DEPTH_SLAVE) + ) slave_w_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( WVALID_i ), + .slave_data_i ( WDATA_i ), + .slave_strb_i ( WSTRB_i ), + .slave_user_i ( WUSER_i ), + .slave_last_i ( WLAST_i ), + .slave_ready_o ( WREADY_o ), + + .master_valid_o ( WVALID ), + .master_data_o ( WDATA ), + .master_strb_o ( WSTRB ), + .master_user_o ( WUSER ), + .master_last_o ( WLAST ), + .master_ready_i ( WREADY ) + ); + + axi_r_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .DATA_WIDTH ( AXI4_RDATA_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_r_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( RVALID ), + .slave_data_i ( RDATA ), + .slave_resp_i ( RRESP ), + .slave_user_i ( RUSER ), + .slave_id_i ( RID ), + .slave_last_i ( RLAST ), + .slave_ready_o ( RREADY ), + + .master_valid_o ( RVALID_o ), + .master_data_o ( RDATA_o ), + .master_resp_o ( RRESP_o ), + .master_user_o ( RUSER_o ), + .master_id_o ( RID_o ), + .master_last_o ( RLAST_o ), + .master_ready_i ( RREADY_i ) + ); + + axi_b_buffer #( + .ID_WIDTH(AXI4_ID_WIDTH), + .USER_WIDTH(AXI4_USER_WIDTH), + .BUFFER_DEPTH(BUFF_DEPTH_SLAVE) + ) slave_b_buffer ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( BVALID ), + .slave_resp_i ( BRESP ), + .slave_id_i ( BID ), + .slave_user_i ( BUSER ), + .slave_ready_o ( BREADY ), + + .master_valid_o ( BVALID_o ), + .master_resp_o ( BRESP_o ), + .master_id_o ( BID_o ), + .master_user_o ( BUSER_o ), + .master_ready_i ( BREADY_i ) + ); + + always_comb begin + + read_req = 1'b0; + write_req = 1'b0; + address = '0; + + sample_RDATA = 1'b0; + + ARREADY = 1'b0; + AWREADY = 1'b0; + WREADY = 1'b0; + + BVALID = 1'b0; + BRESP = `OKAY; + BID = AWID; + BUSER = AWUSER; + + RVALID = 1'b0; + RLAST = 1'b0; + RID = ARID; + RUSER = ARUSER; + RRESP = `OKAY; + RDATA = RDATA_Q; + + case(CS) + + WAIT_R_PREADY: begin + read_req = 1'b1; + address = ARADDR[APB_ADDR_WIDTH - 1 : 0]; + sample_RDATA = PREADY; + + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + NS = DONE_SINGLE_RD; + end + end + + WAIT_W_PREADY: begin + write_req = 1'b1; + address = AWADDR[APB_ADDR_WIDTH - 1:0]; + // There is a Pending WRITE!! + if (PREADY == 1'b1) begin // APB is READY --> WDATA is LAtched + NS = SEND_B_RESP; + end + end + + IDLE: begin + if (ARVALID == 1'b1) begin + read_req = 1'b1; + address = ARADDR[APB_ADDR_WIDTH - 1:0];; + sample_RDATA = PREADY; + + if(PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + NS = DONE_SINGLE_RD; + end else begin // APB not ready + NS = WAIT_R_PREADY; + end + end else begin + if (AWVALID) begin + address = AWADDR[APB_ADDR_WIDTH - 1:0]; + if (WVALID) begin + write_req = 1'b1; + + // There is a Pending WRITE!! + if (PREADY == 1'b1) begin// APB is READY --> WDATA is LAtched + NS = SEND_B_RESP; + end else begin // APB not READY + NS = WAIT_W_PREADY; + end + end else begin // GOT ADDRESS WRITE, not DATA + write_req = 1'b0; + address = '0; + NS = IDLE; + end + end + end + end + + SEND_B_RESP: begin + + BVALID = 1'b1; + address = '0; + + if (BREADY) begin + NS = IDLE; + AWREADY = 1'b1; + WREADY = 1'b1; + end + end + + DONE_SINGLE_RD: begin + + RVALID = 1'b1; + RLAST = 1; + address = '0; + + if (RREADY) begin // ready to send back the rdata + NS = IDLE; + ARREADY = 1'b1; + end + end + + default: NS = IDLE; + + endcase + end + + always_ff @(posedge ACLK, negedge ARESETn) begin + if (ARESETn == 1'b0) begin + CS <= IDLE; + RDATA_Q <= '0; + end else begin + CS <= NS; + + if (sample_RDATA) + RDATA_Q <= PRDATA; + end + end + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv new file mode 100644 index 00000000..c98b1798 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv @@ -0,0 +1,745 @@ +// Copyright 2014-2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Igor Loi +// Davide Rossi +// Florian Zaruba + +`define OKAY 2'b00 +`define EXOKAY 2'b01 +`define SLVERR 2'b10 +`define DECERR 2'b11 + +module axi2apb_64_32 #( + parameter int unsigned AXI4_ADDRESS_WIDTH = 32, + parameter int unsigned AXI4_RDATA_WIDTH = 64, + parameter int unsigned AXI4_WDATA_WIDTH = 64, + parameter int unsigned AXI4_ID_WIDTH = 16, + parameter int unsigned AXI4_USER_WIDTH = 10, + parameter int unsigned AXI_NUMBYTES = AXI4_WDATA_WIDTH/8, + + parameter int unsigned BUFF_DEPTH_SLAVE = 4, + parameter int unsigned APB_NUM_SLAVES = 8, + parameter int unsigned APB_ADDR_WIDTH = 12 +) +( + input logic ACLK, + input logic ARESETn, + input logic test_en_i, + // --------------------------------------------------------- + // AXI TARG Port Declarations ------------------------------ + // --------------------------------------------------------- + //AXI write address bus -------------- // USED// ----------- + input logic [AXI4_ID_WIDTH-1:0] AWID_i , + input logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR_i , + input logic [ 7:0] AWLEN_i , + input logic [ 2:0] AWSIZE_i , + input logic [ 1:0] AWBURST_i , + input logic AWLOCK_i , + input logic [ 3:0] AWCACHE_i , + input logic [ 2:0] AWPROT_i , + input logic [ 3:0] AWREGION_i , + input logic [ AXI4_USER_WIDTH-1:0] AWUSER_i , + input logic [ 3:0] AWQOS_i , + input logic AWVALID_i , + output logic AWREADY_o , + // --------------------------------------------------------- + + //AXI write data bus -------------- // USED// -------------- + input logic [AXI_NUMBYTES-1:0][7:0] WDATA_i , + input logic [AXI_NUMBYTES-1:0] WSTRB_i , + input logic WLAST_i , + input logic [AXI4_USER_WIDTH-1:0] WUSER_i , + input logic WVALID_i , + output logic WREADY_o , + // --------------------------------------------------------- + + //AXI write response bus -------------- // USED// ---------- + output logic [AXI4_ID_WIDTH-1:0] BID_o , + output logic [ 1:0] BRESP_o , + output logic BVALID_o , + output logic [AXI4_USER_WIDTH-1:0] BUSER_o , + input logic BREADY_i , + // --------------------------------------------------------- + + //AXI read address bus ------------------------------------- + input logic [AXI4_ID_WIDTH-1:0] ARID_i , + input logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR_i , + input logic [ 7:0] ARLEN_i , + input logic [ 2:0] ARSIZE_i , + input logic [ 1:0] ARBURST_i , + input logic ARLOCK_i , + input logic [ 3:0] ARCACHE_i , + input logic [ 2:0] ARPROT_i , + input logic [ 3:0] ARREGION_i , + input logic [ AXI4_USER_WIDTH-1:0] ARUSER_i , + input logic [ 3:0] ARQOS_i , + input logic ARVALID_i , + output logic ARREADY_o , + // --------------------------------------------------------- + + //AXI read data bus ---------------------------------------- + output logic [AXI4_ID_WIDTH-1:0] RID_o , + output logic [AXI4_RDATA_WIDTH-1:0] RDATA_o , + output logic [ 1:0] RRESP_o , + output logic RLAST_o , + output logic [AXI4_USER_WIDTH-1:0] RUSER_o , + output logic RVALID_o , + input logic RREADY_i , + // --------------------------------------------------------- + + output logic PENABLE , + output logic PWRITE , + output logic [APB_ADDR_WIDTH-1:0] PADDR , + output logic PSEL , + output logic [31:0] PWDATA , + input logic [31:0] PRDATA , + input logic PREADY , + input logic PSLVERR +); + + // -------------------- + // AXI write address bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] AWID; + logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR; + logic [ 7:0] AWLEN; + logic [ 2:0] AWSIZE; + logic [ 1:0] AWBURST; + logic AWLOCK; + logic [ 3:0] AWCACHE; + logic [ 2:0] AWPROT; + logic [ 3:0] AWREGION; + logic [ AXI4_USER_WIDTH-1:0] AWUSER; + logic [ 3:0] AWQOS; + logic AWVALID; + logic AWREADY; + // -------------------- + // AXI write data bus + // -------------------- + logic [1:0][31:0] WDATA; // from FIFO + logic [AXI_NUMBYTES-1:0] WSTRB; // from FIFO + logic WLAST; // from FIFO + logic [AXI4_USER_WIDTH-1:0] WUSER; // from FIFO + logic WVALID; // from FIFO + logic WREADY; // TO FIFO + // -------------------- + // AXI write response bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] BID; + logic [ 1:0] BRESP; + logic BVALID; + logic [AXI4_USER_WIDTH-1:0] BUSER; + logic BREADY; + // -------------------- + // AXI read address bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] ARID; + logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR; + logic [ 7:0] ARLEN; + logic [ 2:0] ARSIZE; + logic [ 1:0] ARBURST; + logic ARLOCK; + logic [ 3:0] ARCACHE; + logic [ 2:0] ARPROT; + logic [ 3:0] ARREGION; + logic [ AXI4_USER_WIDTH-1:0] ARUSER; + logic [ 3:0] ARQOS; + logic ARVALID; + logic ARREADY; + // -------------------- + // AXI read data bus + // -------------------- + logic [AXI4_ID_WIDTH-1:0] RID; + logic [1:0][31:0] RDATA; + logic [ 1:0] RRESP; + logic RLAST; + logic [AXI4_USER_WIDTH-1:0] RUSER; + logic RVALID; + logic RREADY; + + enum logic [3:0] { IDLE, + SINGLE_RD, SINGLE_RD_64, + BURST_RD_1, BURST_RD, BURST_RD_64, + BURST_WR, BURST_WR_64, + SINGLE_WR,SINGLE_WR_64, + WAIT_R_PREADY, WAIT_W_PREADY + } CS, NS; + + logic W_word_sel; + + logic [APB_ADDR_WIDTH-1:0] address; + + logic read_req; + logic write_req; + + logic sample_AR; + logic [8:0] ARLEN_Q; + logic decr_ARLEN; + + logic sample_AW; + logic [8:0] AWLEN_Q; + logic decr_AWLEN; + + logic [AXI4_ADDRESS_WIDTH-1:0] ARADDR_Q; + logic incr_ARADDR; + + logic [AXI4_ADDRESS_WIDTH-1:0] AWADDR_Q; + logic incr_AWADDR; + + logic sample_RDATA_0; // sample the first 32 bit chunk to be aggregated in 64 bit rdata + logic sample_RDATA_1; // sample the second 32 bit chunk to be aggregated in 64 bit rdata + logic [31:0] RDATA_Q_0; + logic [31:0] RDATA_Q_1; + + assign PENABLE = write_req | read_req; + assign PWRITE = write_req; + assign PADDR = address[APB_ADDR_WIDTH-1:0]; + + assign PWDATA = WDATA[W_word_sel]; + assign PSEL = 1'b1; + + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_aw_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .ADDR_WIDTH ( AXI4_ADDRESS_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_aw_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + .slave_valid_i ( AWVALID_i ), + .slave_addr_i ( AWADDR_i ), + .slave_prot_i ( AWPROT_i ), + .slave_region_i ( AWREGION_i ), + .slave_len_i ( AWLEN_i ), + .slave_size_i ( AWSIZE_i ), + .slave_burst_i ( AWBURST_i ), + .slave_lock_i ( AWLOCK_i ), + .slave_cache_i ( AWCACHE_i ), + .slave_qos_i ( AWQOS_i ), + .slave_id_i ( AWID_i ), + .slave_user_i ( AWUSER_i ), + .slave_ready_o ( AWREADY_o ), + .master_valid_o ( AWVALID ), + .master_addr_o ( AWADDR ), + .master_prot_o ( AWPROT ), + .master_region_o ( AWREGION ), + .master_len_o ( AWLEN ), + .master_size_o ( AWSIZE ), + .master_burst_o ( AWBURST ), + .master_lock_o ( AWLOCK ), + .master_cache_o ( AWCACHE ), + .master_qos_o ( AWQOS ), + .master_id_o ( AWID ), + .master_user_o ( AWUSER ), + .master_ready_i ( AWREADY ) + ); + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_ar_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .ADDR_WIDTH ( AXI4_ADDRESS_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_ar_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + .slave_valid_i ( ARVALID_i ), + .slave_addr_i ( ARADDR_i ), + .slave_prot_i ( ARPROT_i ), + .slave_region_i ( ARREGION_i ), + .slave_len_i ( ARLEN_i ), + .slave_size_i ( ARSIZE_i ), + .slave_burst_i ( ARBURST_i ), + .slave_lock_i ( ARLOCK_i ), + .slave_cache_i ( ARCACHE_i ), + .slave_qos_i ( ARQOS_i ), + .slave_id_i ( ARID_i ), + .slave_user_i ( ARUSER_i ), + .slave_ready_o ( ARREADY_o ), + .master_valid_o ( ARVALID ), + .master_addr_o ( ARADDR ), + .master_prot_o ( ARPROT ), + .master_region_o ( ARREGION ), + .master_len_o ( ARLEN ), + .master_size_o ( ARSIZE ), + .master_burst_o ( ARBURST ), + .master_lock_o ( ARLOCK ), + .master_cache_o ( ARCACHE ), + .master_qos_o ( ARQOS ), + .master_id_o ( ARID ), + .master_user_o ( ARUSER ), + .master_ready_i ( ARREADY ) + ); + axi_w_buffer #( + .DATA_WIDTH ( AXI4_WDATA_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_w_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + .slave_valid_i ( WVALID_i ), + .slave_data_i ( WDATA_i ), + .slave_strb_i ( WSTRB_i ), + .slave_user_i ( WUSER_i ), + .slave_last_i ( WLAST_i ), + .slave_ready_o ( WREADY_o ), + .master_valid_o ( WVALID ), + .master_data_o ( WDATA ), + .master_strb_o ( WSTRB ), + .master_user_o ( WUSER ), + .master_last_o ( WLAST ), + .master_ready_i ( WREADY ) + ); + axi_r_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .DATA_WIDTH ( AXI4_RDATA_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_r_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + .slave_valid_i ( RVALID ), + .slave_data_i ( RDATA ), + .slave_resp_i ( RRESP ), + .slave_user_i ( RUSER ), + .slave_id_i ( RID ), + .slave_last_i ( RLAST ), + .slave_ready_o ( RREADY ), + .master_valid_o ( RVALID_o ), + .master_data_o ( RDATA_o ), + .master_resp_o ( RRESP_o ), + .master_user_o ( RUSER_o ), + .master_id_o ( RID_o ), + .master_last_o ( RLAST_o ), + .master_ready_i ( RREADY_i ) + ); + + axi_b_buffer #( + .ID_WIDTH ( AXI4_ID_WIDTH ), + .USER_WIDTH ( AXI4_USER_WIDTH ), + .BUFFER_DEPTH ( BUFF_DEPTH_SLAVE ) + ) slave_b_buffer_i ( + .clk_i ( ACLK ), + .rst_ni ( ARESETn ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( BVALID ), + .slave_resp_i ( BRESP ), + .slave_id_i ( BID ), + .slave_user_i ( BUSER ), + .slave_ready_o ( BREADY ), + + .master_valid_o ( BVALID_o ), + .master_resp_o ( BRESP_o ), + .master_id_o ( BID_o ), + .master_user_o ( BUSER_o ), + .master_ready_i ( BREADY_i ) + ); + + always_comb begin + read_req = 1'b0; + write_req = 1'b0; + W_word_sel = 1'b0; // Write Word Selector + + sample_AW = 1'b0; + decr_AWLEN = 1'b0; + sample_AR = 1'b0; + decr_ARLEN = 1'b0; + + incr_AWADDR = 1'b0; + incr_ARADDR = 1'b0; + + sample_RDATA_0 = 1'b0; + sample_RDATA_1 = 1'b0; + + ARREADY = 1'b0; + AWREADY = 1'b0; + WREADY = 1'b0; + RDATA = '0; + + BVALID = 1'b0; + BRESP = `OKAY; + BID = AWID; + BUSER = AWUSER; + + RVALID = 1'b0; + RLAST = 1'b0; + RID = ARID; + RUSER = ARUSER; + RRESP = `OKAY; + + case(CS) + + WAIT_R_PREADY: begin + sample_AR = 1'b0; + read_req = 1'b1; + address = ARADDR; + + if (PREADY == 1'b1) begin// APB is READY --> RDATA is AVAILABLE + if (ARLEN == 0) begin + case (ARSIZE) + 3'h3: begin + NS = SINGLE_RD_64; + if (ARADDR[2:0] == 3'h4) + sample_RDATA_1 = 1'b1; + else sample_RDATA_0 = 1'b1; + end + + default: begin + NS = SINGLE_RD; + if (ARADDR[2:0] == 3'h4) + sample_RDATA_1 = 1'b1; + else + sample_RDATA_0 = 1'b1; + end + endcase + end else begin // ARLEN > 0 --> BURST + NS = BURST_RD_64; + sample_RDATA_0 = 1'b1; + decr_ARLEN = 1'b1; + incr_ARADDR = 1'b1; + end + end else begin // APB not ready + NS = WAIT_R_PREADY; + end + end + + WAIT_W_PREADY: begin + address = AWADDR; + write_req = 1'b1; + + if (AWADDR[2:0] == 3'h4) + W_word_sel = 1'b1; + else + W_word_sel = 1'b0; + + // There is a Pending WRITE!! + if (PREADY == 1'b1) begin // APB is READY --> WDATA is LAtched + if (AWLEN == 0) begin // single write + case (AWSIZE) + 3'h3: NS = SINGLE_WR_64; + default: NS = SINGLE_WR; + endcase + end else begin // BURST WRITE + sample_AW = 1'b1; + NS = BURST_WR_64; + end + end else begin // APB not READY + NS = WAIT_W_PREADY; + end + end + + IDLE: begin + if (ARVALID == 1'b1) begin + sample_AR = 1'b1; + read_req = 1'b1; + address = ARADDR; + + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + if (ARLEN == 0) begin + case (ARSIZE) + 3'h3: begin + NS = SINGLE_RD_64; + if (ARADDR[2:0] == 4) + sample_RDATA_1 = 1'b1; + else + sample_RDATA_0 = 1'b1; + end + default: begin + NS = SINGLE_RD; + if (ARADDR[2:0] == 4) + sample_RDATA_1 = 1'b1; + else + sample_RDATA_0 = 1'b1; + end + endcase end else begin //ARLEN > 0 --> BURST + NS = BURST_RD_64; + sample_RDATA_0 = 1'b1; + end + end else begin // APB not ready + NS = WAIT_R_PREADY; + end + end else begin + + if (AWVALID) begin //: _VALID_AW_REQ_ + if (WVALID) begin // : _VALID_W_REQ_ + write_req = 1'b1; + address = AWADDR; + + if (AWADDR[2:0] == 3'h4) + W_word_sel = 1'b1; + else + W_word_sel = 1'b0; + + // There is a Pending WRITE!! + if (PREADY == 1'b1) begin// APB is READY --> WDATA is LAtched _APB_SLAVE_READY_ + if(AWLEN == 0) begin //: _SINGLE_WRITE_ + case(AWSIZE) + 3'h3: NS = SINGLE_WR_64; + default: NS = SINGLE_WR; + endcase + end else begin // BURST WRITE + sample_AW = 1'b1; + if ((AWADDR[2:0] == 3'h4) && (WSTRB[7:4] == 0)) + incr_AWADDR = 1'b0; + else + incr_AWADDR = 1'b1; + NS = BURST_WR_64; + end + end else begin// APB not READY + NS = WAIT_W_PREADY; + end + end else begin // GOT ADDRESS WRITE, not DATA + write_req = 1'b0; + address = '0; + NS = IDLE; + end + end else begin// No requests + NS = IDLE; + address = '0; + end + end + end + + SINGLE_WR_64: begin + address = AWADDR + 4; + W_word_sel = 1'b1; // write the Second data chunk + write_req = WVALID; + if (WVALID) begin + if (PREADY == 1'b1) + NS = SINGLE_WR; + else + NS = SINGLE_WR_64; + end else begin + NS = SINGLE_WR_64; + end + end + + SINGLE_WR: begin + BVALID = 1'b1; + address = '0; + if (BREADY) begin + NS = IDLE; + AWREADY = 1'b1; + WREADY = 1'b1; + end else begin + NS = SINGLE_WR; + end + end + + BURST_WR_64: begin + W_word_sel = 1'b1; // write the Second data chunk first + write_req = WVALID & (|WSTRB[7:4]); + address = AWADDR_Q; // second Chunk, Fixzed Burst + + if (WVALID) begin + if (&WSTRB[7:4]) begin + if(PREADY == 1'b1) begin + NS = BURST_WR; + WREADY = 1'b1; // pop onother data from the WDATA fifo + decr_AWLEN = 1'b1; // decrement the remaining BURST beat + incr_AWADDR = 1'b1; // increment address + end else begin + NS = BURST_WR_64; + end + end else begin + NS = BURST_WR; + WREADY = 1'b1; // pop onother data from the WDATA fifo + decr_AWLEN = 1'b1; // decrement the remaining BURST beat + incr_AWADDR = 1'b1; // increment address + end + end else begin + NS = BURST_WR_64; + end + end + + BURST_WR: begin + address = AWADDR_Q; // second Chunk, Fixzed Burst + if (AWLEN_Q == 0) begin // last : _BURST_COMPLETED_ + BVALID = 1'b1; + if (BREADY) begin + NS = IDLE; + AWREADY = 1'b1; + end else + NS = BURST_WR; + end else begin //: _BUSRST_NOT_COMPLETED_ + W_word_sel = 1'b0; // write the Second data chunk first + write_req = WVALID & (&WSTRB[3:0]); + if (WVALID) begin + if (PREADY == 1'b1) begin + NS = BURST_WR_64; + incr_AWADDR = 1'b1; + decr_AWLEN = 1'b1; //decrement the remaining BURST beat + end else + NS = BURST_WR; + end else begin + NS = BURST_WR_64; + end + end + end + + BURST_RD_64: begin + read_req = 1'b1; + address = ARADDR_Q; + + if (ARLEN_Q == 0) begin // burst completed + NS = IDLE; + ARREADY = 1'b1; + end else begin + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + decr_ARLEN = 1'b1; + sample_RDATA_1 = 1'b1; + NS = BURST_RD; + + if (ARADDR_Q[2:0] == 3'h4) + incr_ARADDR = 1'b1; + else + incr_ARADDR = 1'b0; + end + else begin + NS = BURST_RD_64; + end + end + end + + BURST_RD: begin + RVALID = 1'b1; + RDATA[0] = RDATA_Q_0; + RDATA[1] = RDATA_Q_1; + RLAST = (ARLEN_Q == 0) ? 1'b1 : 1'b0; + address = ARADDR_Q; + + if (RREADY) begin // ready to send back the rdata + if (ARLEN_Q == 0) begin // burst completed + NS = IDLE; + ARREADY = 1'b1; + end else begin //: _READ_BUSRST_NOT_COMPLETED_ + read_req = 1'b1; + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + sample_RDATA_0 = 1'b1; + NS = BURST_RD_64; + incr_ARADDR = 1'b1; + decr_ARLEN = 1'b1; + end else begin + NS = BURST_RD_1; + end + end + end else begin // NOT ready to send back the rdata + NS = BURST_RD; + end + end + + BURST_RD_1: begin + read_req = 1'b1; + address = ARADDR_Q; + + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + sample_RDATA_0 = 1'b1; + NS = BURST_RD_64; + incr_ARADDR = 1'b1; + decr_ARLEN = 1'b1; + end else begin + NS = BURST_RD_1; + end + end + + SINGLE_RD: begin + RVALID = 1'b1; + RDATA[0] = RDATA_Q_0; + RDATA[1] = RDATA_Q_1; + RLAST = 1; + address = '0; + + if (RREADY) begin // ready to send back the rdata + NS = IDLE; + ARREADY = 1'b1; + end else begin // NOT ready to send back the rdata + NS = SINGLE_RD; + end + end + + SINGLE_RD_64: begin + read_req = 1'b1; + address = ARADDR + 4; + if (PREADY == 1'b1) begin // APB is READY --> RDATA is AVAILABLE + NS = SINGLE_RD; + if(ARADDR[2:0] == 3'h4) + sample_RDATA_0 = 1'b1; + else + sample_RDATA_1 = 1'b1; + end else begin + NS = SINGLE_RD_64; + end + end + + default: begin + NS = IDLE; + address = '0; + end + endcase + end + + // ----------- + // Registers + // ----------- + always_ff @(posedge ACLK, negedge ARESETn) begin + if (ARESETn == 1'b0) begin + CS <= IDLE; + //Read Channel + ARLEN_Q <= '0; + AWADDR_Q <= '0; + //Write Channel + AWLEN_Q <= '0; + RDATA_Q_0 <= '0; + RDATA_Q_1 <= '0; + ARADDR_Q <= '0; + end else begin + CS <= NS; + + if (sample_AR) begin + ARLEN_Q <= {ARLEN,1'b0} + 2; + end else if (decr_ARLEN) begin + ARLEN_Q <= ARLEN_Q - 1; + end + + if (sample_RDATA_0) + RDATA_Q_0 <= PRDATA; + + if (sample_RDATA_1) + RDATA_Q_1 <= PRDATA; + + case ({sample_AW, decr_AWLEN}) + 2'b00: AWLEN_Q <= AWLEN_Q; + 2'b01: AWLEN_Q <= AWLEN_Q - 1; + 2'b10: AWLEN_Q <= {AWLEN, 1'b0} + 1; + 2'b11: AWLEN_Q <= {AWLEN, 1'b0}; + endcase + + case ({sample_AW, incr_AWADDR}) + 2'b00: AWADDR_Q <= AWADDR_Q; + 2'b01: AWADDR_Q <= AWADDR_Q + 4; + 2'b10: AWADDR_Q <= {AWADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000}; + 2'b11: AWADDR_Q <= {AWADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000} + 4; + endcase + + case({sample_AR, incr_ARADDR}) + 2'b00: ARADDR_Q <= ARADDR_Q; + 2'b01: ARADDR_Q <= ARADDR_Q + 4; + 2'b10: ARADDR_Q <= {ARADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000}; + 2'b11: ARADDR_Q <= {ARADDR[AXI4_ADDRESS_WIDTH-1:3], 3'b000} + 4; + endcase + end + end +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv new file mode 100644 index 00000000..e1336938 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv @@ -0,0 +1,74 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_ar_buffer #( + parameter int ID_WIDTH = -1, + parameter int ADDR_WIDTH = -1, + parameter int USER_WIDTH = -1, + parameter int BUFFER_DEPTH = -1 +)( + + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [ADDR_WIDTH-1:0] slave_addr_i, + input logic [2:0] slave_prot_i, + input logic [3:0] slave_region_i, + input logic [7:0] slave_len_i, + input logic [2:0] slave_size_i, + input logic [1:0] slave_burst_i, + input logic slave_lock_i, + input logic [3:0] slave_cache_i, + input logic [3:0] slave_qos_i, + input logic [ID_WIDTH-1:0] slave_id_i, + input logic [USER_WIDTH-1:0] slave_user_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [ADDR_WIDTH-1:0] master_addr_o, + output logic [2:0] master_prot_o, + output logic [3:0] master_region_o, + output logic [7:0] master_len_o, + output logic [2:0] master_size_o, + output logic [1:0] master_burst_o, + output logic master_lock_o, + output logic [3:0] master_cache_o, + output logic [3:0] master_qos_o, + output logic [ID_WIDTH-1:0] master_id_o, + output logic [USER_WIDTH-1:0] master_user_o, + input logic master_ready_i +); + + logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_in; + logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_out; + + assign s_data_in = {slave_cache_i, slave_prot_i, slave_lock_i, slave_burst_i, slave_size_i, slave_len_i, slave_qos_i, slave_region_i, slave_addr_i, slave_user_i, slave_id_i} ; + assign {master_cache_o, master_prot_o, master_lock_o, master_burst_o, master_size_o, master_len_o, master_qos_o, master_region_o, master_addr_o, master_user_o, master_id_o} = s_data_out; + + + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); + + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv new file mode 100644 index 00000000..15b93450 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv @@ -0,0 +1,74 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_aw_buffer #( + parameter int ID_WIDTH = -1, + parameter int ADDR_WIDTH = -1, + parameter int USER_WIDTH = -1, + parameter int BUFFER_DEPTH = -1 +)( + + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [ADDR_WIDTH-1:0] slave_addr_i, + input logic [2:0] slave_prot_i, + input logic [3:0] slave_region_i, + input logic [7:0] slave_len_i, + input logic [2:0] slave_size_i, + input logic [1:0] slave_burst_i, + input logic slave_lock_i, + input logic [3:0] slave_cache_i, + input logic [3:0] slave_qos_i, + input logic [ID_WIDTH-1:0] slave_id_i, + input logic [USER_WIDTH-1:0] slave_user_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [ADDR_WIDTH-1:0] master_addr_o, + output logic [2:0] master_prot_o, + output logic [3:0] master_region_o, + output logic [7:0] master_len_o, + output logic [2:0] master_size_o, + output logic [1:0] master_burst_o, + output logic master_lock_o, + output logic [3:0] master_cache_o, + output logic [3:0] master_qos_o, + output logic [ID_WIDTH-1:0] master_id_o, + output logic [USER_WIDTH-1:0] master_user_o, + input logic master_ready_i +); + + logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_in; + logic [29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH-1:0] s_data_out; + + + + assign s_data_in = {slave_cache_i, slave_prot_i, slave_lock_i, slave_burst_i, slave_size_i, slave_len_i, slave_qos_i, slave_region_i, slave_addr_i, slave_user_i, slave_id_i}; + assign {master_cache_o, master_prot_o, master_lock_o, master_burst_o, master_size_o, master_len_o, master_qos_o, master_region_o, master_addr_o, master_user_o, master_id_o} = s_data_out; + + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(29+ADDR_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv new file mode 100644 index 00000000..d2576bbf --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv @@ -0,0 +1,54 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_b_buffer #( + parameter int ID_WIDTH = -1, + parameter int USER_WIDTH = -1, + parameter int BUFFER_DEPTH = -1 +)( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [1:0] slave_resp_i, + input logic [ID_WIDTH-1:0] slave_id_i, + input logic [USER_WIDTH-1:0] slave_user_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [1:0] master_resp_o, + output logic [ID_WIDTH-1:0] master_id_o, + output logic [USER_WIDTH-1:0] master_user_o, + input logic master_ready_i +); + + logic [2+USER_WIDTH+ID_WIDTH-1:0] s_data_in; + logic [2+USER_WIDTH+ID_WIDTH-1:0] s_data_out; + + assign s_data_in = {slave_id_i, slave_user_i, slave_resp_i}; + assign {master_id_o, master_user_o, master_resp_o} = s_data_out; + + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(2+USER_WIDTH+ID_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv new file mode 100644 index 00000000..3c92b259 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv @@ -0,0 +1,60 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_r_buffer #( + parameter ID_WIDTH = 4, + parameter DATA_WIDTH = 64, + parameter USER_WIDTH = 6, + parameter BUFFER_DEPTH = 8, + parameter STRB_WIDTH = DATA_WIDTH/8 // DO NOT OVERRIDE +)( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [DATA_WIDTH-1:0] slave_data_i, + input logic [1:0] slave_resp_i, + input logic [USER_WIDTH-1:0] slave_user_i, + input logic [ID_WIDTH-1:0] slave_id_i, + input logic slave_last_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [DATA_WIDTH-1:0] master_data_o, + output logic [1:0] master_resp_o, + output logic [USER_WIDTH-1:0] master_user_o, + output logic [ID_WIDTH-1:0] master_id_o, + output logic master_last_o, + input logic master_ready_i +); + + logic [2+DATA_WIDTH+USER_WIDTH+ID_WIDTH:0] s_data_in; + logic [2+DATA_WIDTH+USER_WIDTH+ID_WIDTH:0] s_data_out; + + + assign s_data_in = {slave_id_i, slave_user_i, slave_data_i, slave_resp_i, slave_last_i}; + assign {master_id_o, master_user_o, master_data_o, master_resp_o, master_last_o} = s_data_out; + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(3+DATA_WIDTH+USER_WIDTH+ID_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv new file mode 100644 index 00000000..fe7fbbc5 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv @@ -0,0 +1,51 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Wrapper for a generic fifo +module axi_single_slice #( + parameter int BUFFER_DEPTH = -1, + parameter int DATA_WIDTH = -1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, + input logic valid_i, + output logic ready_o, + input logic [DATA_WIDTH-1:0] data_i, + + input logic ready_i, + output logic valid_o, + output logic [DATA_WIDTH-1:0] data_o +); + + logic full, empty; + + assign ready_o = ~full; + assign valid_o = ~empty; + + fifo #( + .FALL_THROUGH ( 1'b0 ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( BUFFER_DEPTH ) + ) i_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .threshold_o (), // NC + .testmode_i ( testmode_i ), + .full_o ( full ), + .empty_o ( empty ), + .data_i ( data_i ), + .push_i ( valid_i & ready_o ), + .data_o ( data_o ), + .pop_i ( ready_i & valid_o ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv new file mode 100644 index 00000000..91072d62 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice.sv @@ -0,0 +1,311 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_slice +#( + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_DATA_WIDTH = 64, + parameter AXI_USER_WIDTH = 6, + parameter AXI_ID_WIDTH = 3, + parameter SLICE_DEPTH = 2, + parameter AXI_STRB_WIDTH = AXI_DATA_WIDTH/8 +) +( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + // AXI4 SLAVE + //*************************************** + // WRITE ADDRESS CHANNEL + input logic axi_slave_aw_valid_i, + input logic [AXI_ADDR_WIDTH-1:0] axi_slave_aw_addr_i, + input logic [2:0] axi_slave_aw_prot_i, + input logic [3:0] axi_slave_aw_region_i, + input logic [7:0] axi_slave_aw_len_i, + input logic [2:0] axi_slave_aw_size_i, + input logic [1:0] axi_slave_aw_burst_i, + input logic axi_slave_aw_lock_i, + input logic [3:0] axi_slave_aw_cache_i, + input logic [3:0] axi_slave_aw_qos_i, + input logic [AXI_ID_WIDTH-1:0] axi_slave_aw_id_i, + input logic [AXI_USER_WIDTH-1:0] axi_slave_aw_user_i, + output logic axi_slave_aw_ready_o, + + // READ ADDRESS CHANNEL + input logic axi_slave_ar_valid_i, + input logic [AXI_ADDR_WIDTH-1:0] axi_slave_ar_addr_i, + input logic [2:0] axi_slave_ar_prot_i, + input logic [3:0] axi_slave_ar_region_i, + input logic [7:0] axi_slave_ar_len_i, + input logic [2:0] axi_slave_ar_size_i, + input logic [1:0] axi_slave_ar_burst_i, + input logic axi_slave_ar_lock_i, + input logic [3:0] axi_slave_ar_cache_i, + input logic [3:0] axi_slave_ar_qos_i, + input logic [AXI_ID_WIDTH-1:0] axi_slave_ar_id_i, + input logic [AXI_USER_WIDTH-1:0] axi_slave_ar_user_i, + output logic axi_slave_ar_ready_o, + + // WRITE DATA CHANNEL + input logic axi_slave_w_valid_i, + input logic [AXI_DATA_WIDTH-1:0] axi_slave_w_data_i, + input logic [AXI_STRB_WIDTH-1:0] axi_slave_w_strb_i, + input logic [AXI_USER_WIDTH-1:0] axi_slave_w_user_i, + input logic axi_slave_w_last_i, + output logic axi_slave_w_ready_o, + + // READ DATA CHANNEL + output logic axi_slave_r_valid_o, + output logic [AXI_DATA_WIDTH-1:0] axi_slave_r_data_o, + output logic [1:0] axi_slave_r_resp_o, + output logic axi_slave_r_last_o, + output logic [AXI_ID_WIDTH-1:0] axi_slave_r_id_o, + output logic [AXI_USER_WIDTH-1:0] axi_slave_r_user_o, + input logic axi_slave_r_ready_i, + + // WRITE RESPONSE CHANNEL + output logic axi_slave_b_valid_o, + output logic [1:0] axi_slave_b_resp_o, + output logic [AXI_ID_WIDTH-1:0] axi_slave_b_id_o, + output logic [AXI_USER_WIDTH-1:0] axi_slave_b_user_o, + input logic axi_slave_b_ready_i, + + // AXI4 MASTER + //*************************************** + // WRITE ADDRESS CHANNEL + output logic axi_master_aw_valid_o, + output logic [AXI_ADDR_WIDTH-1:0] axi_master_aw_addr_o, + output logic [2:0] axi_master_aw_prot_o, + output logic [3:0] axi_master_aw_region_o, + output logic [7:0] axi_master_aw_len_o, + output logic [2:0] axi_master_aw_size_o, + output logic [1:0] axi_master_aw_burst_o, + output logic axi_master_aw_lock_o, + output logic [3:0] axi_master_aw_cache_o, + output logic [3:0] axi_master_aw_qos_o, + output logic [AXI_ID_WIDTH-1:0] axi_master_aw_id_o, + output logic [AXI_USER_WIDTH-1:0] axi_master_aw_user_o, + input logic axi_master_aw_ready_i, + + // READ ADDRESS CHANNEL + output logic axi_master_ar_valid_o, + output logic [AXI_ADDR_WIDTH-1:0] axi_master_ar_addr_o, + output logic [2:0] axi_master_ar_prot_o, + output logic [3:0] axi_master_ar_region_o, + output logic [7:0] axi_master_ar_len_o, + output logic [2:0] axi_master_ar_size_o, + output logic [1:0] axi_master_ar_burst_o, + output logic axi_master_ar_lock_o, + output logic [3:0] axi_master_ar_cache_o, + output logic [3:0] axi_master_ar_qos_o, + output logic [AXI_ID_WIDTH-1:0] axi_master_ar_id_o, + output logic [AXI_USER_WIDTH-1:0] axi_master_ar_user_o, + input logic axi_master_ar_ready_i, + + // WRITE DATA CHANNEL + output logic axi_master_w_valid_o, + output logic [AXI_DATA_WIDTH-1:0] axi_master_w_data_o, + output logic [AXI_STRB_WIDTH-1:0] axi_master_w_strb_o, + output logic [AXI_USER_WIDTH-1:0] axi_master_w_user_o, + output logic axi_master_w_last_o, + input logic axi_master_w_ready_i, + + // READ DATA CHANNEL + input logic axi_master_r_valid_i, + input logic [AXI_DATA_WIDTH-1:0] axi_master_r_data_i, + input logic [1:0] axi_master_r_resp_i, + input logic axi_master_r_last_i, + input logic [AXI_ID_WIDTH-1:0] axi_master_r_id_i, + input logic [AXI_USER_WIDTH-1:0] axi_master_r_user_i, + output logic axi_master_r_ready_o, + + // WRITE RESPONSE CHANNEL + input logic axi_master_b_valid_i, + input logic [1:0] axi_master_b_resp_i, + input logic [AXI_ID_WIDTH-1:0] axi_master_b_id_i, + input logic [AXI_USER_WIDTH-1:0] axi_master_b_user_i, + output logic axi_master_b_ready_o +); + + // AXI WRITE ADDRESS CHANNEL BUFFER + axi_aw_buffer + #( + .ID_WIDTH (AXI_ID_WIDTH), + .ADDR_WIDTH (AXI_ADDR_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + aw_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_slave_aw_valid_i ), + .slave_addr_i ( axi_slave_aw_addr_i ), + .slave_prot_i ( axi_slave_aw_prot_i ), + .slave_region_i ( axi_slave_aw_region_i ), + .slave_len_i ( axi_slave_aw_len_i ), + .slave_size_i ( axi_slave_aw_size_i ), + .slave_burst_i ( axi_slave_aw_burst_i ), + .slave_lock_i ( axi_slave_aw_lock_i ), + .slave_cache_i ( axi_slave_aw_cache_i ), + .slave_qos_i ( axi_slave_aw_qos_i ), + .slave_id_i ( axi_slave_aw_id_i ), + .slave_user_i ( axi_slave_aw_user_i ), + .slave_ready_o ( axi_slave_aw_ready_o ), + + .master_valid_o ( axi_master_aw_valid_o ), + .master_addr_o ( axi_master_aw_addr_o ), + .master_prot_o ( axi_master_aw_prot_o ), + .master_region_o ( axi_master_aw_region_o ), + .master_len_o ( axi_master_aw_len_o ), + .master_size_o ( axi_master_aw_size_o ), + .master_burst_o ( axi_master_aw_burst_o ), + .master_lock_o ( axi_master_aw_lock_o ), + .master_cache_o ( axi_master_aw_cache_o ), + .master_qos_o ( axi_master_aw_qos_o ), + .master_id_o ( axi_master_aw_id_o ), + .master_user_o ( axi_master_aw_user_o ), + .master_ready_i ( axi_master_aw_ready_i ) + ); + + // AXI READ ADDRESS CHANNEL BUFFER + axi_ar_buffer + #( + .ID_WIDTH (AXI_ID_WIDTH), + .ADDR_WIDTH (AXI_ADDR_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + ar_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_slave_ar_valid_i ), + .slave_addr_i ( axi_slave_ar_addr_i ), + .slave_prot_i ( axi_slave_ar_prot_i ), + .slave_region_i ( axi_slave_ar_region_i ), + .slave_len_i ( axi_slave_ar_len_i ), + .slave_size_i ( axi_slave_ar_size_i ), + .slave_burst_i ( axi_slave_ar_burst_i ), + .slave_lock_i ( axi_slave_ar_lock_i ), + .slave_cache_i ( axi_slave_ar_cache_i ), + .slave_qos_i ( axi_slave_ar_qos_i ), + .slave_id_i ( axi_slave_ar_id_i ), + .slave_user_i ( axi_slave_ar_user_i ), + .slave_ready_o ( axi_slave_ar_ready_o ), + + .master_valid_o ( axi_master_ar_valid_o ), + .master_addr_o ( axi_master_ar_addr_o ), + .master_prot_o ( axi_master_ar_prot_o ), + .master_region_o ( axi_master_ar_region_o ), + .master_len_o ( axi_master_ar_len_o ), + .master_size_o ( axi_master_ar_size_o ), + .master_burst_o ( axi_master_ar_burst_o ), + .master_lock_o ( axi_master_ar_lock_o ), + .master_cache_o ( axi_master_ar_cache_o ), + .master_qos_o ( axi_master_ar_qos_o ), + .master_id_o ( axi_master_ar_id_o ), + .master_user_o ( axi_master_ar_user_o ), + .master_ready_i ( axi_master_ar_ready_i ) + ); + + // WRITE DATA CHANNEL BUFFER + axi_w_buffer + #( + .DATA_WIDTH (AXI_DATA_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + w_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_slave_w_valid_i ), + .slave_data_i ( axi_slave_w_data_i ), + .slave_strb_i ( axi_slave_w_strb_i ), + .slave_user_i ( axi_slave_w_user_i ), + .slave_last_i ( axi_slave_w_last_i ), + .slave_ready_o ( axi_slave_w_ready_o ), + + .master_valid_o ( axi_master_w_valid_o ), + .master_data_o ( axi_master_w_data_o ), + .master_strb_o ( axi_master_w_strb_o ), + .master_user_o ( axi_master_w_user_o ), + .master_last_o ( axi_master_w_last_o ), + .master_ready_i ( axi_master_w_ready_i ) + ); + + // READ DATA CHANNEL BUFFER + axi_r_buffer + #( + .ID_WIDTH (AXI_ID_WIDTH), + .DATA_WIDTH (AXI_DATA_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + r_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_master_r_valid_i ), + .slave_data_i ( axi_master_r_data_i ), + .slave_resp_i ( axi_master_r_resp_i ), + .slave_user_i ( axi_master_r_user_i ), + .slave_id_i ( axi_master_r_id_i ), + .slave_last_i ( axi_master_r_last_i ), + .slave_ready_o ( axi_master_r_ready_o ), + + .master_valid_o ( axi_slave_r_valid_o ), + .master_data_o ( axi_slave_r_data_o ), + .master_resp_o ( axi_slave_r_resp_o ), + .master_user_o ( axi_slave_r_user_o ), + .master_id_o ( axi_slave_r_id_o ), + .master_last_o ( axi_slave_r_last_o ), + .master_ready_i ( axi_slave_r_ready_i ) + ); + + // WRITE RESPONSE CHANNEL BUFFER + axi_b_buffer + #( + .ID_WIDTH (AXI_ID_WIDTH), + .USER_WIDTH (AXI_USER_WIDTH), + .BUFFER_DEPTH (SLICE_DEPTH) + ) + b_buffer_i + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_en_i ( test_en_i ), + + .slave_valid_i ( axi_master_b_valid_i ), + .slave_resp_i ( axi_master_b_resp_i ), + .slave_id_i ( axi_master_b_id_i ), + .slave_user_i ( axi_master_b_user_i ), + .slave_ready_o ( axi_master_b_ready_o ), + + .master_valid_o ( axi_slave_b_valid_o ), + .master_resp_o ( axi_slave_b_resp_o ), + .master_id_o ( axi_slave_b_id_o ), + .master_user_o ( axi_slave_b_user_o ), + .master_ready_i ( axi_slave_b_ready_i ) + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv new file mode 100644 index 00000000..2e643a4c --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv @@ -0,0 +1,115 @@ +module axi_slice_wrap #( + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_DATA_WIDTH = 64, + parameter AXI_USER_WIDTH = 6, + parameter AXI_ID_WIDTH = 3, + parameter SLICE_DEPTH = 2, + parameter AXI_STRB_WIDTH = AXI_DATA_WIDTH/8 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_en_i, + AXI_BUS.Slave axi_slave, + AXI_BUS.Master axi_master +); + + axi_slice #( + .AXI_ADDR_WIDTH ( AXI_ADDR_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .SLICE_DEPTH ( SLICE_DEPTH ), + .AXI_STRB_WIDTH ( AXI_STRB_WIDTH ) + ) i_axi_slice ( + .axi_slave_aw_valid_i ( axi_slave.aw_valid ), + .axi_slave_aw_addr_i ( axi_slave.aw_addr ), + .axi_slave_aw_prot_i ( axi_slave.aw_prot ), + .axi_slave_aw_region_i ( axi_slave.aw_region ), + .axi_slave_aw_len_i ( axi_slave.aw_len ), + .axi_slave_aw_size_i ( axi_slave.aw_size ), + .axi_slave_aw_burst_i ( axi_slave.aw_burst ), + .axi_slave_aw_lock_i ( axi_slave.aw_lock ), + .axi_slave_aw_cache_i ( axi_slave.aw_cache ), + .axi_slave_aw_qos_i ( axi_slave.aw_qos ), + .axi_slave_aw_id_i ( axi_slave.aw_id ), + .axi_slave_aw_user_i ( axi_slave.aw_user ), + .axi_slave_aw_ready_o ( axi_slave.aw_ready ), + .axi_slave_ar_valid_i ( axi_slave.ar_valid ), + .axi_slave_ar_addr_i ( axi_slave.ar_addr ), + .axi_slave_ar_prot_i ( axi_slave.ar_prot ), + .axi_slave_ar_region_i ( axi_slave.ar_region ), + .axi_slave_ar_len_i ( axi_slave.ar_len ), + .axi_slave_ar_size_i ( axi_slave.ar_size ), + .axi_slave_ar_burst_i ( axi_slave.ar_burst ), + .axi_slave_ar_lock_i ( axi_slave.ar_lock ), + .axi_slave_ar_cache_i ( axi_slave.ar_cache ), + .axi_slave_ar_qos_i ( axi_slave.ar_qos ), + .axi_slave_ar_id_i ( axi_slave.ar_id ), + .axi_slave_ar_user_i ( axi_slave.ar_user ), + .axi_slave_ar_ready_o ( axi_slave.ar_ready ), + .axi_slave_w_valid_i ( axi_slave.w_valid ), + .axi_slave_w_data_i ( axi_slave.w_data ), + .axi_slave_w_strb_i ( axi_slave.w_strb ), + .axi_slave_w_user_i ( axi_slave.w_user ), + .axi_slave_w_last_i ( axi_slave.w_last ), + .axi_slave_w_ready_o ( axi_slave.w_ready ), + .axi_slave_r_valid_o ( axi_slave.r_valid ), + .axi_slave_r_data_o ( axi_slave.r_data ), + .axi_slave_r_resp_o ( axi_slave.r_resp ), + .axi_slave_r_last_o ( axi_slave.r_last ), + .axi_slave_r_id_o ( axi_slave.r_id ), + .axi_slave_r_user_o ( axi_slave.r_user ), + .axi_slave_r_ready_i ( axi_slave.r_ready ), + .axi_slave_b_valid_o ( axi_slave.b_valid ), + .axi_slave_b_resp_o ( axi_slave.b_resp ), + .axi_slave_b_id_o ( axi_slave.b_id ), + .axi_slave_b_user_o ( axi_slave.b_user ), + .axi_slave_b_ready_i ( axi_slave.b_ready ), + .axi_master_aw_valid_o ( axi_master.aw_valid ), + .axi_master_aw_addr_o ( axi_master.aw_addr ), + .axi_master_aw_prot_o ( axi_master.aw_prot ), + .axi_master_aw_region_o ( axi_master.aw_region ), + .axi_master_aw_len_o ( axi_master.aw_len ), + .axi_master_aw_size_o ( axi_master.aw_size ), + .axi_master_aw_burst_o ( axi_master.aw_burst ), + .axi_master_aw_lock_o ( axi_master.aw_lock ), + .axi_master_aw_cache_o ( axi_master.aw_cache ), + .axi_master_aw_qos_o ( axi_master.aw_qos ), + .axi_master_aw_id_o ( axi_master.aw_id ), + .axi_master_aw_user_o ( axi_master.aw_user ), + .axi_master_aw_ready_i ( axi_master.aw_ready ), + .axi_master_ar_valid_o ( axi_master.ar_valid ), + .axi_master_ar_addr_o ( axi_master.ar_addr ), + .axi_master_ar_prot_o ( axi_master.ar_prot ), + .axi_master_ar_region_o ( axi_master.ar_region ), + .axi_master_ar_len_o ( axi_master.ar_len ), + .axi_master_ar_size_o ( axi_master.ar_size ), + .axi_master_ar_burst_o ( axi_master.ar_burst ), + .axi_master_ar_lock_o ( axi_master.ar_lock ), + .axi_master_ar_cache_o ( axi_master.ar_cache ), + .axi_master_ar_qos_o ( axi_master.ar_qos ), + .axi_master_ar_id_o ( axi_master.ar_id ), + .axi_master_ar_user_o ( axi_master.ar_user ), + .axi_master_ar_ready_i ( axi_master.ar_ready ), + .axi_master_w_valid_o ( axi_master.w_valid ), + .axi_master_w_data_o ( axi_master.w_data ), + .axi_master_w_strb_o ( axi_master.w_strb ), + .axi_master_w_user_o ( axi_master.w_user ), + .axi_master_w_last_o ( axi_master.w_last ), + .axi_master_w_ready_i ( axi_master.w_ready ), + .axi_master_r_valid_i ( axi_master.r_valid ), + .axi_master_r_data_i ( axi_master.r_data ), + .axi_master_r_resp_i ( axi_master.r_resp ), + .axi_master_r_last_i ( axi_master.r_last ), + .axi_master_r_id_i ( axi_master.r_id ), + .axi_master_r_user_i ( axi_master.r_user ), + .axi_master_r_ready_o ( axi_master.r_ready ), + .axi_master_b_valid_i ( axi_master.b_valid ), + .axi_master_b_resp_i ( axi_master.b_resp ), + .axi_master_b_id_i ( axi_master.b_id ), + .axi_master_b_user_i ( axi_master.b_user ), + .axi_master_b_ready_o ( axi_master.b_ready ), + .* + ); + +endmodule diff --git a/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv new file mode 100644 index 00000000..0e89a474 --- /dev/null +++ b/test/type_param/corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv @@ -0,0 +1,55 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module axi_w_buffer #( + parameter int DATA_WIDTH = -1, + parameter int USER_WIDTH = -1, + parameter int BUFFER_DEPTH = -1, + parameter int STRB_WIDTH = DATA_WIDTH/8 // DO NOT OVERRIDE +)( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + + input logic slave_valid_i, + input logic [DATA_WIDTH-1:0] slave_data_i, + input logic [STRB_WIDTH-1:0] slave_strb_i, + input logic [USER_WIDTH-1:0] slave_user_i, + input logic slave_last_i, + output logic slave_ready_o, + + output logic master_valid_o, + output logic [DATA_WIDTH-1:0] master_data_o, + output logic [STRB_WIDTH-1:0] master_strb_o, + output logic [USER_WIDTH-1:0] master_user_o, + output logic master_last_o, + input logic master_ready_i +); + + logic [DATA_WIDTH+STRB_WIDTH+USER_WIDTH:0] s_data_in; + logic [DATA_WIDTH+STRB_WIDTH+USER_WIDTH:0] s_data_out; + + assign s_data_in = { slave_user_i, slave_strb_i, slave_data_i, slave_last_i }; + assign { master_user_o, master_strb_o, master_data_o, master_last_o } = s_data_out; + + axi_single_slice #(.BUFFER_DEPTH(BUFFER_DEPTH), .DATA_WIDTH(1+DATA_WIDTH+STRB_WIDTH+USER_WIDTH)) i_axi_single_slice ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .valid_i ( slave_valid_i ), + .ready_o ( slave_ready_o ), + .data_i ( s_data_in ), + .ready_i ( master_ready_i ), + .valid_o ( master_valid_o ), + .data_o ( s_data_out ) + ); +endmodule diff --git a/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh b/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh new file mode 100644 index 00000000..30c44acf --- /dev/null +++ b/test/type_param/corev_apu/register_interface/include/register_interface/assign.svh @@ -0,0 +1,46 @@ +// Copyright (c) 2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Florian Zaruba +/// Macros to define register bus request/response structs. + +`ifndef REGISTER_INTERFACE_ASSIGN_SVH_ +`define REGISTER_INTERFACE_ASSIGN_SVH_ + +`define REG_BUS_ASSIGN_TO_REQ(lhs, rhs) \ + assign lhs = '{ \ + addr: rhs.addr, \ + write: rhs.write, \ + wdata: rhs.wdata, \ + wstrb: rhs.wstrb, \ + valid: rhs.valid \ + }; + +`define REG_BUS_ASSIGN_FROM_REQ(lhs, rhs) \ + assign lhs.addr = rhs.addr; \ + assign lhs.write = rhs.write; \ + assign lhs.wdata = rhs.wdata; \ + assign lhs.wstrb = rhs.wstrb; \ + assign lhs.valid = rhs.valid; \ + +`define REG_BUS_ASSIGN_TO_RSP(lhs, rhs) \ + assign lhs = '{ \ + rdata: rhs.rdata, \ + error: rhs.error, \ + ready: rhs.ready \ + }; + +`define REG_BUS_ASSIGN_FROM_RSP(lhs, rhs) \ + assign lhs.rdata = rhs.rdata; \ + assign lhs.error = rhs.error; \ + assign lhs.ready = rhs.ready; + +`endif \ No newline at end of file diff --git a/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh b/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh new file mode 100644 index 00000000..350d79e1 --- /dev/null +++ b/test/type_param/corev_apu/register_interface/include/register_interface/typedef.svh @@ -0,0 +1,38 @@ +// Copyright (c) 2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Florian Zaruba +/// Macros to define register bus request/response structs. + +`ifndef REGISTER_INTERFACE_TYPEDEF_SVH_ +`define REGISTER_INTERFACE_TYPEDEF_SVH_ + +`define REG_BUS_TYPEDEF_REQ(req_t, addr_t, data_t, strb_t) \ + typedef struct packed { \ + addr_t addr; \ + logic write; \ + data_t wdata; \ + strb_t wstrb; \ + logic valid; \ + } req_t; + +`define REG_BUS_TYPEDEF_RSP(rsp_t, data_t) \ + typedef struct packed { \ + data_t rdata; \ + logic error; \ + logic ready; \ + } rsp_t; + +`define REG_BUS_TYPEDEF_ALL(name, addr_t, data_t, strb_t) \ + `REG_BUS_TYPEDEF_REQ(name``_req_t, addr_t, data_t, strb_t) \ + `REG_BUS_TYPEDEF_RSP(name``_rsp_t, data_t) + +`endif diff --git a/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv b/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv new file mode 100644 index 00000000..f2f14dc8 --- /dev/null +++ b/test/type_param/corev_apu/register_interface/src/apb_to_reg.sv @@ -0,0 +1,39 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Florian Zaruba + +module apb_to_reg ( + input logic clk_i, + input logic rst_ni, + + input logic penable_i, + input logic pwrite_i, + input logic [31:0] paddr_i, + input logic psel_i, + input logic [31:0] pwdata_i, + output logic [31:0] prdata_o, + output logic pready_o, + output logic pslverr_o, + + REG_BUS.out reg_o +); + + always_comb begin + reg_o.addr = paddr_i; + reg_o.write = pwrite_i; + reg_o.wdata = pwdata_i; + reg_o.wstrb = '1; + reg_o.valid = psel_i & penable_i; + pready_o = reg_o.ready; + pslverr_o = reg_o.error; + prdata_o = reg_o.rdata; + end +endmodule diff --git a/test/type_param/corev_apu/register_interface/src/reg_intf.sv b/test/type_param/corev_apu/register_interface/src/reg_intf.sv new file mode 100644 index 00000000..5923ae3a --- /dev/null +++ b/test/type_param/corev_apu/register_interface/src/reg_intf.sv @@ -0,0 +1,43 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A simple register interface. +/// +/// This is pretty much as simple as it gets. Transactions consist of only one +/// phase. The master sets the address, write, write data, and write strobe +/// signals and pulls valid high. Once pulled high, valid must remain high and +/// none of the signals may change. The transaction completes when both valid +/// and ready are high. Valid must not depend on ready. The slave presents the +/// read data and error signals. These signals must be constant while valid and +/// ready are both high. +interface REG_BUS #( + /// The width of the address. + parameter int ADDR_WIDTH = -1, + /// The width of the data. + parameter int DATA_WIDTH = -1 +)( + input logic clk_i +); + + logic [ADDR_WIDTH-1:0] addr; + logic write; // 0=read, 1=write + logic [DATA_WIDTH-1:0] rdata; + logic [DATA_WIDTH-1:0] wdata; + logic [DATA_WIDTH/8-1:0] wstrb; // byte-wise strobe + logic error; // 0=ok, 1=error + logic valid; + logic ready; + + modport in (input addr, write, wdata, wstrb, valid, output rdata, error, ready); + modport out (output addr, write, wdata, wstrb, valid, input rdata, error, ready); + +endinterface diff --git a/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv b/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv new file mode 100644 index 00000000..0299db6b --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/debug_rom/debug_rom.sv @@ -0,0 +1,66 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: $filename.v + * + * Description: Auto-generated bootrom + */ + +// Auto-generated code +module debug_rom ( + input logic clk_i, + input logic req_i, + input logic [63:0] addr_i, + output logic [63:0] rdata_o +); + + localparam int unsigned RomSize = 19; + + logic [RomSize-1:0][63:0] mem; + assign mem = { + 64'h00000000_7b200073, + 64'h7b202473_7b302573, + 64'h10852423_f1402473, + 64'ha85ff06f_7b202473, + 64'h7b302573_10052223, + 64'h00100073_7b202473, + 64'h7b302573_10052623, + 64'h00c51513_00c55513, + 64'h00000517_fd5ff06f, + 64'hfa041ce3_00247413, + 64'h40044403_00a40433, + 64'hf1402473_02041c63, + 64'h00147413_40044403, + 64'h00a40433_10852023, + 64'hf1402473_00c51513, + 64'h00c55513_00000517, + 64'h7b351073_7b241073, + 64'h0ff0000f_04c0006f, + 64'h07c0006f_00c0006f + }; + + logic [$clog2(RomSize)-1:0] addr_q; + + always_ff @(posedge clk_i) begin + if (req_i) begin + addr_q <= addr_i[$clog2(RomSize)-1+3:3]; + end + end + + // this prevents spurious Xes from propagating into + // the speculative fetch stage of the core + always_comb begin : p_outmux + rdata_o = '0; + if (addr_q < $clog2(RomSize)'(RomSize)) begin + rdata_o = mem[addr_q]; + end + end + +endmodule diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv new file mode 100644 index 00000000..45e8878d --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_csrs.sv @@ -0,0 +1,634 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: dm_csrs.sv + * Author: Florian Zaruba + * Date: 30.6.2018 + * + * Description: Debug CSRs. Communication over Debug Transport Module (DTM) + */ + +module dm_csrs #( + parameter int unsigned NrHarts = 1, + parameter int unsigned BusWidth = 32, + parameter logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}} +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, + input logic dmi_rst_ni, // Debug Module Intf reset active-low + input logic dmi_req_valid_i, + output logic dmi_req_ready_o, + input dm::dmi_req_t dmi_req_i, + // every request needs a response one cycle later + output logic dmi_resp_valid_o, + input logic dmi_resp_ready_i, + output dm::dmi_resp_t dmi_resp_o, + // global ctrl + output logic ndmreset_o, // non-debug module reset active-high + output logic dmactive_o, // 1 -> debug-module is active, + // 0 -> synchronous re-set + // hart status + input dm::hartinfo_t [NrHarts-1:0] hartinfo_i, // static hartinfo + input logic [NrHarts-1:0] halted_i, // hart is halted + input logic [NrHarts-1:0] unavailable_i, // e.g.: powered down + input logic [NrHarts-1:0] resumeack_i, // hart acknowledged resume request + // hart control + output logic [19:0] hartsel_o, // hartselect to ctrl module + output logic [NrHarts-1:0] haltreq_o, // request to halt a hart + output logic [NrHarts-1:0] resumereq_o, // request hart to resume + output logic clear_resumeack_o, + + output logic cmd_valid_o, // debugger writing to cmd field + output dm::command_t cmd_o, // abstract command + input logic cmderror_valid_i, // an error occurred + input dm::cmderr_e cmderror_i, // this error occurred + input logic cmdbusy_i, // cmd is currently busy executing + + output logic [dm::ProgBufSize-1:0][31:0] progbuf_o, // to system bus + output logic [dm::DataCount-1:0][31:0] data_o, + + input logic [dm::DataCount-1:0][31:0] data_i, + input logic data_valid_i, + // system bus access module (SBA) + output logic [BusWidth-1:0] sbaddress_o, + input logic [BusWidth-1:0] sbaddress_i, + output logic sbaddress_write_valid_o, + // control signals in + output logic sbreadonaddr_o, + output logic sbautoincrement_o, + output logic [2:0] sbaccess_o, + // data out + output logic sbreadondata_o, + output logic [BusWidth-1:0] sbdata_o, + output logic sbdata_read_valid_o, + output logic sbdata_write_valid_o, + // read data in + input logic [BusWidth-1:0] sbdata_i, + input logic sbdata_valid_i, + // control signals + input logic sbbusy_i, + input logic sberror_valid_i, // bus error occurred + input logic [2:0] sberror_i // bus error occurred +); + // the amount of bits we need to represent all harts + localparam int unsigned HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts); + localparam int unsigned NrHartsAligned = 2**HartSelLen; + + dm::dtm_op_e dtm_op; + assign dtm_op = dm::dtm_op_e'(dmi_req_i.op); + + logic resp_queue_full; + logic resp_queue_empty; + logic resp_queue_push; + logic resp_queue_pop; + logic [31:0] resp_queue_data; + + localparam dm::dm_csr_e DataEnd = dm::dm_csr_e'(dm::Data0 + {4'h0, dm::DataCount} - 8'h1); + localparam dm::dm_csr_e ProgBufEnd = dm::dm_csr_e'(dm::ProgBuf0 + {4'h0, dm::ProgBufSize} - 8'h1); + + logic [31:0] haltsum0, haltsum1, haltsum2, haltsum3; + logic [((NrHarts-1)/2**5 + 1) * 32 - 1 : 0] halted; + logic [(NrHarts-1)/2**5:0][31:0] halted_reshaped0; + logic [(NrHarts-1)/2**10:0][31:0] halted_reshaped1; + logic [(NrHarts-1)/2**15:0][31:0] halted_reshaped2; + logic [((NrHarts-1)/2**10+1)*32-1:0] halted_flat1; + logic [((NrHarts-1)/2**15+1)*32-1:0] halted_flat2; + logic [31:0] halted_flat3; + + // haltsum0 + logic [14:0] hartsel_idx0; + always_comb begin : p_haltsum0 + halted = '0; + haltsum0 = '0; + hartsel_idx0 = hartsel_o[19:5]; + halted[NrHarts-1:0] = halted_i; + halted_reshaped0 = halted; + if (hartsel_idx0 < 15'((NrHarts-1)/2**5+1)) begin + haltsum0 = halted_reshaped0[hartsel_idx0]; + end + end + + // haltsum1 + logic [9:0] hartsel_idx1; + always_comb begin : p_reduction1 + halted_flat1 = '0; + haltsum1 = '0; + hartsel_idx1 = hartsel_o[19:10]; + + for (int unsigned k = 0; k < (NrHarts-1)/2**5+1; k++) begin + halted_flat1[k] = |halted_reshaped0[k]; + end + halted_reshaped1 = halted_flat1; + + if (hartsel_idx1 < 10'(((NrHarts-1)/2**10+1))) begin + haltsum1 = halted_reshaped1[hartsel_idx1]; + end + end + + // haltsum2 + logic [4:0] hartsel_idx2; + always_comb begin : p_reduction2 + halted_flat2 = '0; + haltsum2 = '0; + hartsel_idx2 = hartsel_o[19:15]; + + for (int unsigned k = 0; k < (NrHarts-1)/2**10+1; k++) begin + halted_flat2[k] = |halted_reshaped1[k]; + end + halted_reshaped2 = halted_flat2; + + if (hartsel_idx2 < 5'(((NrHarts-1)/2**15+1))) begin + haltsum2 = halted_reshaped2[hartsel_idx2]; + end + end + + // haltsum3 + always_comb begin : p_reduction3 + halted_flat3 = '0; + for (int unsigned k = 0; k < NrHarts/2**15+1; k++) begin + halted_flat3[k] = |halted_reshaped2[k]; + end + haltsum3 = halted_flat3; + end + + + dm::dmstatus_t dmstatus; + dm::dmcontrol_t dmcontrol_d, dmcontrol_q; + dm::abstractcs_t abstractcs; + dm::cmderr_e cmderr_d, cmderr_q; + dm::command_t command_d, command_q; + logic cmd_valid_d, cmd_valid_q; + dm::abstractauto_t abstractauto_d, abstractauto_q; + dm::sbcs_t sbcs_d, sbcs_q; + logic [63:0] sbaddr_d, sbaddr_q; + logic [63:0] sbdata_d, sbdata_q; + + logic [NrHarts-1:0] havereset_d, havereset_q; + // program buffer + logic [dm::ProgBufSize-1:0][31:0] progbuf_d, progbuf_q; + logic [dm::DataCount-1:0][31:0] data_d, data_q; + + logic [HartSelLen-1:0] selected_hart; + + // a successful response returns zero + assign dmi_resp_o.resp = dm::DTM_SUCCESS; + assign dmi_resp_valid_o = ~resp_queue_empty; + assign dmi_req_ready_o = ~resp_queue_full; + assign resp_queue_push = dmi_req_valid_i & dmi_req_ready_o; + // SBA + assign sbautoincrement_o = sbcs_q.sbautoincrement; + assign sbreadonaddr_o = sbcs_q.sbreadonaddr; + assign sbreadondata_o = sbcs_q.sbreadondata; + assign sbaccess_o = sbcs_q.sbaccess; + assign sbdata_o = sbdata_q[BusWidth-1:0]; + assign sbaddress_o = sbaddr_q[BusWidth-1:0]; + + assign hartsel_o = {dmcontrol_q.hartselhi, dmcontrol_q.hartsello}; + + // needed to avoid lint warnings + logic [NrHartsAligned-1:0] havereset_d_aligned, havereset_q_aligned, + resumeack_aligned, unavailable_aligned, + halted_aligned; + assign resumeack_aligned = NrHartsAligned'(resumeack_i); + assign unavailable_aligned = NrHartsAligned'(unavailable_i); + assign halted_aligned = NrHartsAligned'(halted_i); + + assign havereset_d = NrHarts'(havereset_d_aligned); + assign havereset_q_aligned = NrHartsAligned'(havereset_q); + + dm::hartinfo_t [NrHartsAligned-1:0] hartinfo_aligned; + always_comb begin : p_hartinfo_align + hartinfo_aligned = '0; + hartinfo_aligned[NrHarts-1:0] = hartinfo_i; + end + + // helper variables + dm::dm_csr_e dm_csr_addr; + dm::sbcs_t sbcs; + dm::abstractcs_t a_abstractcs; + logic [3:0] autoexecdata_idx; // 0 == Data0 ... 11 == Data11 + + // Get the data index, i.e. 0 for dm::Data0 up to 11 for dm::Data11 + assign dm_csr_addr = dm::dm_csr_e'({1'b0, dmi_req_i.addr}); + // Xilinx Vivado 2020.1 does not allow subtraction of two enums; do the subtraction with logic + // types instead. + assign autoexecdata_idx = 4'({dm_csr_addr} - {dm::Data0}); + + always_comb begin : csr_read_write + // -------------------- + // Static Values (R/O) + // -------------------- + // dmstatus + dmstatus = '0; + dmstatus.version = dm::DbgVersion013; + // no authentication implemented + dmstatus.authenticated = 1'b1; + // we do not support halt-on-reset sequence + dmstatus.hasresethaltreq = 1'b0; + // TODO(zarubaf) things need to change here if we implement the array mask + dmstatus.allhavereset = havereset_q_aligned[selected_hart]; + dmstatus.anyhavereset = havereset_q_aligned[selected_hart]; + + dmstatus.allresumeack = resumeack_aligned[selected_hart]; + dmstatus.anyresumeack = resumeack_aligned[selected_hart]; + + dmstatus.allunavail = unavailable_aligned[selected_hart]; + dmstatus.anyunavail = unavailable_aligned[selected_hart]; + + // as soon as we are out of the legal Hart region tell the debugger + // that there are only non-existent harts + dmstatus.allnonexistent = logic'(32'(hartsel_o) > (NrHarts - 1)); + dmstatus.anynonexistent = logic'(32'(hartsel_o) > (NrHarts - 1)); + + // We are not allowed to be in multiple states at once. This is a to + // make the running/halted and unavailable states exclusive. + dmstatus.allhalted = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart]; + dmstatus.anyhalted = halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart]; + + dmstatus.allrunning = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart]; + dmstatus.anyrunning = ~halted_aligned[selected_hart] & ~unavailable_aligned[selected_hart]; + + // abstractcs + abstractcs = '0; + abstractcs.datacount = dm::DataCount; + abstractcs.progbufsize = dm::ProgBufSize; + abstractcs.busy = cmdbusy_i; + abstractcs.cmderr = cmderr_q; + + // abstractautoexec + abstractauto_d = abstractauto_q; + abstractauto_d.zero0 = '0; + + // default assignments + havereset_d_aligned = NrHartsAligned'(havereset_q); + dmcontrol_d = dmcontrol_q; + cmderr_d = cmderr_q; + command_d = command_q; + progbuf_d = progbuf_q; + data_d = data_q; + sbcs_d = sbcs_q; + sbaddr_d = 64'(sbaddress_i); + sbdata_d = sbdata_q; + + resp_queue_data = 32'h0; + cmd_valid_d = 1'b0; + sbaddress_write_valid_o = 1'b0; + sbdata_read_valid_o = 1'b0; + sbdata_write_valid_o = 1'b0; + clear_resumeack_o = 1'b0; + + // helper variables + sbcs = '0; + a_abstractcs = '0; + + // reads + if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) begin + unique case (dm_csr_addr) inside + [(dm::Data0):DataEnd]: begin + resp_queue_data = data_q[$clog2(dm::DataCount)'(autoexecdata_idx)]; + if (!cmdbusy_i) begin + // check whether we need to re-execute the command (just give a cmd_valid) + cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx]; + // An abstract command was executing while one of the data registers was read + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::DMControl: resp_queue_data = dmcontrol_q; + dm::DMStatus: resp_queue_data = dmstatus; + dm::Hartinfo: resp_queue_data = hartinfo_aligned[selected_hart]; + dm::AbstractCS: resp_queue_data = abstractcs; + dm::AbstractAuto: resp_queue_data = abstractauto_q; + // command is read-only + dm::Command: resp_queue_data = '0; + [(dm::ProgBuf0):ProgBufEnd]: begin + resp_queue_data = progbuf_q[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]]; + if (!cmdbusy_i) begin + // check whether we need to re-execute the command (just give a cmd_valid) + // range of autoexecprogbuf is 31:16 + cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}]; + + // An abstract command was executing while one of the progbuf registers was read + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::HaltSum0: resp_queue_data = haltsum0; + dm::HaltSum1: resp_queue_data = haltsum1; + dm::HaltSum2: resp_queue_data = haltsum2; + dm::HaltSum3: resp_queue_data = haltsum3; + dm::SBCS: begin + resp_queue_data = sbcs_q; + end + dm::SBAddress0: begin + resp_queue_data = sbaddr_q[31:0]; + end + dm::SBAddress1: begin + resp_queue_data = sbaddr_q[63:32]; + end + dm::SBData0: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbdata_read_valid_o = (sbcs_q.sberror == '0); + resp_queue_data = sbdata_q[31:0]; + end + end + dm::SBData1: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + resp_queue_data = sbdata_q[63:32]; + end + end + default:; + endcase + end + + // write + if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_WRITE) begin + unique case (dm_csr_addr) inside + [(dm::Data0):DataEnd]: begin + if (dm::DataCount > 0) begin + // attempts to write them while busy is set does not change their value + if (!cmdbusy_i) begin + data_d[dmi_req_i.addr[$clog2(dm::DataCount)-1:0]] = dmi_req_i.data; + // check whether we need to re-execute the command (just give a cmd_valid) + cmd_valid_d = abstractauto_q.autoexecdata[autoexecdata_idx]; + //An abstract command was executing while one of the data registers was written + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + end + dm::DMControl: begin + dmcontrol_d = dmi_req_i.data; + // clear the havreset of the selected hart + if (dmcontrol_d.ackhavereset) begin + havereset_d_aligned[selected_hart] = 1'b0; + end + end + dm::DMStatus:; // write are ignored to R/O register + dm::Hartinfo:; // hartinfo is R/O + // only command error is write-able + dm::AbstractCS: begin // W1C + // Gets set if an abstract command fails. The bits in this + // field remain set until they are cleared by writing 1 to + // them. No abstract command is started until the value is + // reset to 0. + a_abstractcs = dm::abstractcs_t'(dmi_req_i.data); + // reads during abstract command execution are not allowed + if (!cmdbusy_i) begin + cmderr_d = dm::cmderr_e'(~a_abstractcs.cmderr & cmderr_q); + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::Command: begin + // writes are ignored if a command is already busy + if (!cmdbusy_i) begin + cmd_valid_d = 1'b1; + command_d = dm::command_t'(dmi_req_i.data); + // if there was an attempted to write during a busy execution + // and the cmderror field is zero set the busy error + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::AbstractAuto: begin + // this field can only be written legally when there is no command executing + if (!cmdbusy_i) begin + abstractauto_d = 32'h0; + abstractauto_d.autoexecdata = 12'(dmi_req_i.data[dm::DataCount-1:0]); + abstractauto_d.autoexecprogbuf = 16'(dmi_req_i.data[dm::ProgBufSize-1+16:16]); + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + [(dm::ProgBuf0):ProgBufEnd]: begin + // attempts to write them while busy is set does not change their value + if (!cmdbusy_i) begin + progbuf_d[dmi_req_i.addr[$clog2(dm::ProgBufSize)-1:0]] = dmi_req_i.data; + // check whether we need to re-execute the command (just give a cmd_valid) + // this should probably throw an error if executed during another command + // was busy + // range of autoexecprogbuf is 31:16 + cmd_valid_d = abstractauto_q.autoexecprogbuf[{1'b1, dmi_req_i.addr[3:0]}]; + //An abstract command was executing while one of the progbuf registers was written + end else if (cmderr_q == dm::CmdErrNone) begin + cmderr_d = dm::CmdErrBusy; + end + end + dm::SBCS: begin + // access while the SBA was busy + if (sbbusy_i) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbcs = dm::sbcs_t'(dmi_req_i.data); + sbcs_d = sbcs; + // R/W1C + sbcs_d.sbbusyerror = sbcs_q.sbbusyerror & (~sbcs.sbbusyerror); + sbcs_d.sberror = sbcs_q.sberror & (~sbcs.sberror); + end + end + dm::SBAddress0: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbaddr_d[31:0] = dmi_req_i.data; + sbaddress_write_valid_o = (sbcs_q.sberror == '0); + end + end + dm::SBAddress1: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbaddr_d[63:32] = dmi_req_i.data; + end + end + dm::SBData0: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbdata_d[31:0] = dmi_req_i.data; + sbdata_write_valid_o = (sbcs_q.sberror == '0); + end + end + dm::SBData1: begin + // access while the SBA was busy + if (sbbusy_i || sbcs_q.sbbusyerror) begin + sbcs_d.sbbusyerror = 1'b1; + end else begin + sbdata_d[63:32] = dmi_req_i.data; + end + end + default:; + endcase + end + // hart threw a command error and has precedence over bus writes + if (cmderror_valid_i) begin + cmderr_d = cmderror_i; + end + + // update data registers + if (data_valid_i) begin + data_d = data_i; + end + + // set the havereset flag when we did a ndmreset + if (ndmreset_o) begin + havereset_d_aligned[NrHarts-1:0] = '1; + end + // ------------- + // System Bus + // ------------- + // set bus error + if (sberror_valid_i) begin + sbcs_d.sberror = sberror_i; + end + // update read data + if (sbdata_valid_i) begin + sbdata_d = 64'(sbdata_i); + end + + // dmcontrol + // TODO(zarubaf) we currently do not implement the hartarry mask + dmcontrol_d.hasel = 1'b0; + // we do not support resetting an individual hart + dmcontrol_d.hartreset = 1'b0; + dmcontrol_d.setresethaltreq = 1'b0; + dmcontrol_d.clrresethaltreq = 1'b0; + dmcontrol_d.zero1 = '0; + dmcontrol_d.zero0 = '0; + // Non-writeable, clear only + dmcontrol_d.ackhavereset = 1'b0; + if (!dmcontrol_q.resumereq && dmcontrol_d.resumereq) begin + clear_resumeack_o = 1'b1; + end + if (dmcontrol_q.resumereq && resumeack_i) begin + dmcontrol_d.resumereq = 1'b0; + end + // static values for dcsr + sbcs_d.sbversion = 3'd1; + sbcs_d.sbbusy = sbbusy_i; + sbcs_d.sbasize = $bits(sbcs_d.sbasize)'(BusWidth); + sbcs_d.sbaccess128 = 1'b0; + sbcs_d.sbaccess64 = logic'(BusWidth == 32'd64); + sbcs_d.sbaccess32 = logic'(BusWidth == 32'd32); + sbcs_d.sbaccess16 = 1'b0; + sbcs_d.sbaccess8 = 1'b0; + sbcs_d.sbaccess = (BusWidth == 32'd64) ? 3'd3 : 3'd2; + end + + // output multiplexer + always_comb begin : p_outmux + selected_hart = hartsel_o[HartSelLen-1:0]; + // default assignment + haltreq_o = '0; + resumereq_o = '0; + if (selected_hart <= HartSelLen'(NrHarts-1)) begin + haltreq_o[selected_hart] = dmcontrol_q.haltreq; + resumereq_o[selected_hart] = dmcontrol_q.resumereq; + end + end + + assign dmactive_o = dmcontrol_q.dmactive; + assign cmd_o = command_q; + assign cmd_valid_o = cmd_valid_q; + assign progbuf_o = progbuf_q; + assign data_o = data_q; + + assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty; + + assign ndmreset_o = dmcontrol_q.ndmreset; + + // response FIFO + fifo_v2 #( + .dtype ( logic [31:0] ), + .DEPTH ( 2 ) + ) i_fifo ( + .clk_i ( clk_i ), + .rst_ni ( dmi_rst_ni ), // reset only when system is re-set + .flush_i ( 1'b0 ), // we do not need to flush this queue + .testmode_i ( testmode_i ), + .full_o ( resp_queue_full ), + .empty_o ( resp_queue_empty ), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i ( resp_queue_data ), + .push_i ( resp_queue_push ), + .data_o ( dmi_resp_o.data ), + .pop_i ( resp_queue_pop ) + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + // PoR + if (!rst_ni) begin + dmcontrol_q <= '0; + // this is the only write-able bit during reset + cmderr_q <= dm::CmdErrNone; + command_q <= '0; + cmd_valid_q <= '0; + abstractauto_q <= '0; + progbuf_q <= '0; + data_q <= '0; + sbcs_q <= '0; + sbaddr_q <= '0; + sbdata_q <= '0; + havereset_q <= '1; + end else begin + havereset_q <= SelectableHarts & havereset_d; + // synchronous re-set of debug module, active-low, except for dmactive + if (!dmcontrol_q.dmactive) begin + dmcontrol_q.haltreq <= '0; + dmcontrol_q.resumereq <= '0; + dmcontrol_q.hartreset <= '0; + dmcontrol_q.ackhavereset <= '0; + dmcontrol_q.zero1 <= '0; + dmcontrol_q.hasel <= '0; + dmcontrol_q.hartsello <= '0; + dmcontrol_q.hartselhi <= '0; + dmcontrol_q.zero0 <= '0; + dmcontrol_q.setresethaltreq <= '0; + dmcontrol_q.clrresethaltreq <= '0; + dmcontrol_q.ndmreset <= '0; + // this is the only write-able bit during reset + dmcontrol_q.dmactive <= dmcontrol_d.dmactive; + cmderr_q <= dm::CmdErrNone; + command_q <= '0; + cmd_valid_q <= '0; + abstractauto_q <= '0; + progbuf_q <= '0; + data_q <= '0; + sbcs_q <= '0; + sbaddr_q <= '0; + sbdata_q <= '0; + end else begin + dmcontrol_q <= dmcontrol_d; + cmderr_q <= cmderr_d; + command_q <= command_d; + cmd_valid_q <= cmd_valid_d; + abstractauto_q <= abstractauto_d; + progbuf_q <= progbuf_d; + data_q <= data_d; + sbcs_q <= sbcs_d; + sbaddr_q <= sbaddr_d; + sbdata_q <= sbdata_d; + end + end + end + +endmodule : dm_csrs diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv new file mode 100755 index 00000000..178259f6 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_mem.sv @@ -0,0 +1,523 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: dm_mem.sv +* Author: Florian Zaruba +* Date: 11.7.2018 +* +* Description: Memory module for execution-based debug clients +* +*/ + +module dm_mem #( + parameter int unsigned NrHarts = 1, + parameter int unsigned BusWidth = 32, + parameter logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}}, + parameter int unsigned DmBaseAddress = '0 +) ( + input logic clk_i, // Clock + input logic rst_ni, // debug module reset + + output logic [NrHarts-1:0] debug_req_o, + input logic [19:0] hartsel_i, + // from Ctrl and Status register + input logic [NrHarts-1:0] haltreq_i, + input logic [NrHarts-1:0] resumereq_i, + input logic clear_resumeack_i, + + // state bits + output logic [NrHarts-1:0] halted_o, // hart acknowledge halt + output logic [NrHarts-1:0] resuming_o, // hart is resuming + + input logic [dm::ProgBufSize-1:0][31:0] progbuf_i, // program buffer to expose + + input logic [dm::DataCount-1:0][31:0] data_i, // data in + output logic [dm::DataCount-1:0][31:0] data_o, // data out + output logic data_valid_o, // data out is valid + // abstract command interface + input logic cmd_valid_i, + input dm::command_t cmd_i, + output logic cmderror_valid_o, + output dm::cmderr_e cmderror_o, + output logic cmdbusy_o, + // data interface + + // SRAM interface + input logic req_i, + input logic we_i, + input logic [BusWidth-1:0] addr_i, + input logic [BusWidth-1:0] wdata_i, + input logic [BusWidth/8-1:0] be_i, + output logic [BusWidth-1:0] rdata_o +); + localparam int unsigned DbgAddressBits = 12; + localparam int unsigned HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts); + localparam int unsigned NrHartsAligned = 2**HartSelLen; + localparam int unsigned MaxAar = (BusWidth == 64) ? 4 : 3; + localparam bit HasSndScratch = (DmBaseAddress != 0); + // Depending on whether we are at the zero page or not we either use `x0` or `x10/a0` + localparam logic [4:0] LoadBaseAddr = (DmBaseAddress == 0) ? 5'd0 : 5'd10; + + localparam logic [DbgAddressBits-1:0] DataBaseAddr = (dm::DataAddr); + localparam logic [DbgAddressBits-1:0] DataEndAddr = (dm::DataAddr + 4*dm::DataCount - 1); + localparam logic [DbgAddressBits-1:0] ProgBufBaseAddr = (dm::DataAddr - 4*dm::ProgBufSize); + localparam logic [DbgAddressBits-1:0] ProgBufEndAddr = (dm::DataAddr - 1); + localparam logic [DbgAddressBits-1:0] AbstractCmdBaseAddr = (ProgBufBaseAddr - 4*10); + localparam logic [DbgAddressBits-1:0] AbstractCmdEndAddr = (ProgBufBaseAddr - 1); + + localparam logic [DbgAddressBits-1:0] WhereToAddr = 'h300; + localparam logic [DbgAddressBits-1:0] FlagsBaseAddr = 'h400; + localparam logic [DbgAddressBits-1:0] FlagsEndAddr = 'h7FF; + + localparam logic [DbgAddressBits-1:0] HaltedAddr = 'h100; + localparam logic [DbgAddressBits-1:0] GoingAddr = 'h104; + localparam logic [DbgAddressBits-1:0] ResumingAddr = 'h108; + localparam logic [DbgAddressBits-1:0] ExceptionAddr = 'h10C; + + logic [dm::ProgBufSize/2-1:0][63:0] progbuf; + logic [7:0][63:0] abstract_cmd; + logic [NrHarts-1:0] halted_d, halted_q; + logic [NrHarts-1:0] resuming_d, resuming_q; + logic resume, go, going; + + logic exception; + logic unsupported_command; + + logic [63:0] rom_rdata; + logic [63:0] rdata_d, rdata_q; + logic word_enable32_q; + + // this is needed to avoid lint warnings related to array indexing + // resize hartsel to valid range + logic [HartSelLen-1:0] hartsel, wdata_hartsel; + + assign hartsel = hartsel_i[HartSelLen-1:0]; + assign wdata_hartsel = wdata_i[HartSelLen-1:0]; + + logic [NrHartsAligned-1:0] resumereq_aligned, haltreq_aligned, + halted_d_aligned, halted_q_aligned, + halted_aligned, resumereq_wdata_aligned, + resuming_d_aligned, resuming_q_aligned; + + assign resumereq_aligned = NrHartsAligned'(resumereq_i); + assign haltreq_aligned = NrHartsAligned'(haltreq_i); + assign resumereq_wdata_aligned = NrHartsAligned'(resumereq_i); + + assign halted_q_aligned = NrHartsAligned'(halted_q); + assign halted_d = NrHarts'(halted_d_aligned); + assign resuming_q_aligned = NrHartsAligned'(resuming_q); + assign resuming_d = NrHarts'(resuming_d_aligned); + + // distinguish whether we need to forward data from the ROM or the FSM + // latch the address for this + logic fwd_rom_d, fwd_rom_q; + dm::ac_ar_cmd_t ac_ar; + + // Abstract Command Access Register + assign ac_ar = dm::ac_ar_cmd_t'(cmd_i.control); + assign debug_req_o = haltreq_i; + assign halted_o = halted_q; + assign resuming_o = resuming_q; + + // reshape progbuf + assign progbuf = progbuf_i; + + typedef enum logic [1:0] { Idle, Go, Resume, CmdExecuting } state_e; + state_e state_d, state_q; + + // hart ctrl queue + always_comb begin : p_hart_ctrl_queue + cmderror_valid_o = 1'b0; + cmderror_o = dm::CmdErrNone; + state_d = state_q; + go = 1'b0; + resume = 1'b0; + cmdbusy_o = 1'b1; + + unique case (state_q) + Idle: begin + cmdbusy_o = 1'b0; + if (cmd_valid_i && halted_q_aligned[hartsel] && !unsupported_command) begin + // give the go signal + state_d = Go; + end else if (cmd_valid_i) begin + // hart must be halted for all requests + cmderror_valid_o = 1'b1; + cmderror_o = dm::CmdErrorHaltResume; + end + // CSRs want to resume, the request is ignored when the hart is + // requested to halt or it didn't clear the resuming_q bit before + if (resumereq_aligned[hartsel] && !resuming_q_aligned[hartsel] && + !haltreq_aligned[hartsel] && halted_q_aligned[hartsel]) begin + state_d = Resume; + end + end + + Go: begin + // we are already busy here since we scheduled the execution of a program + cmdbusy_o = 1'b1; + go = 1'b1; + // the thread is now executing the command, track its state + if (going) begin + state_d = CmdExecuting; + end + end + + Resume: begin + cmdbusy_o = 1'b1; + resume = 1'b1; + if (resuming_q_aligned[hartsel]) begin + state_d = Idle; + end + end + + CmdExecuting: begin + cmdbusy_o = 1'b1; + go = 1'b0; + // wait until the hart has halted again + if (halted_aligned[hartsel]) begin + state_d = Idle; + end + end + + default: ; + endcase + + // only signal once that cmd is unsupported so that we can clear cmderr + // in subsequent writes to abstractcs + if (unsupported_command && cmd_valid_i) begin + cmderror_valid_o = 1'b1; + cmderror_o = dm::CmdErrNotSupported; + end + + if (exception) begin + cmderror_valid_o = 1'b1; + cmderror_o = dm::CmdErrorException; + end + end + + // word mux for 32bit and 64bit buses + logic [63:0] word_mux; + assign word_mux = (fwd_rom_q) ? rom_rdata : rdata_q; + + if (BusWidth == 64) begin : gen_word_mux64 + assign rdata_o = word_mux; + end else begin : gen_word_mux32 + assign rdata_o = (word_enable32_q) ? word_mux[32 +: 32] : word_mux[0 +: 32]; + end + + // read/write logic + logic [63:0] data_bits; + logic [7:0][7:0] rdata; + always_comb begin : p_rw_logic + + halted_d_aligned = NrHartsAligned'(halted_q); + resuming_d_aligned = NrHartsAligned'(resuming_q); + rdata_d = rdata_q; + // convert the data in bits representation + data_bits = data_i; + rdata = '0; + + // write data in csr register + data_valid_o = 1'b0; + exception = 1'b0; + halted_aligned = '0; + going = 1'b0; + + // The resume ack signal is lowered when the resume request is deasserted + if (clear_resumeack_i) begin + resuming_d_aligned[hartsel] = 1'b0; + end + // we've got a new request + if (req_i) begin + // this is a write + if (we_i) begin + unique case (addr_i[DbgAddressBits-1:0]) inside + HaltedAddr: begin + halted_aligned[wdata_hartsel] = 1'b1; + halted_d_aligned[wdata_hartsel] = 1'b1; + end + GoingAddr: begin + going = 1'b1; + end + ResumingAddr: begin + // clear the halted flag as the hart resumed execution + halted_d_aligned[wdata_hartsel] = 1'b0; + // set the resuming flag which needs to be cleared by the debugger + resuming_d_aligned[wdata_hartsel] = 1'b1; + end + // an exception occurred during execution + ExceptionAddr: exception = 1'b1; + // core can write data registers + [DataBaseAddr:DataEndAddr]: begin + data_valid_o = 1'b1; + for (int i = 0; i < $bits(be_i); i++) begin + if (be_i[i]) begin + data_bits[i*8+:8] = wdata_i[i*8+:8]; + end + end + end + default ; + endcase + + // this is a read + end else begin + unique case (addr_i[DbgAddressBits-1:0]) inside + // variable ROM content + WhereToAddr: begin + // variable jump to abstract cmd, program_buffer or resume + if (resumereq_wdata_aligned[wdata_hartsel]) begin + rdata_d = {32'b0, dm::jal('0, 21'(dm::ResumeAddress[11:0])-21'(WhereToAddr))}; + end + + // there is a command active so jump there + if (cmdbusy_o) begin + // transfer not set is shortcut to the program buffer if postexec is set + // keep this statement narrow to not catch invalid commands + if (cmd_i.cmdtype == dm::AccessRegister && + !ac_ar.transfer && ac_ar.postexec) begin + rdata_d = {32'b0, dm::jal('0, 21'(ProgBufBaseAddr)-21'(WhereToAddr))}; + // this is a legit abstract cmd -> execute it + end else begin + rdata_d = {32'b0, dm::jal('0, 21'(AbstractCmdBaseAddr)-21'(WhereToAddr))}; + end + end + end + + [DataBaseAddr:DataEndAddr]: begin + rdata_d = { + data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] - + DataBaseAddr[DbgAddressBits-1:3] + 1'b1)], + data_i[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] - + DataBaseAddr[DbgAddressBits-1:3])] + }; + end + + [ProgBufBaseAddr:ProgBufEndAddr]: begin + rdata_d = progbuf[$clog2(dm::ProgBufSize)'(addr_i[DbgAddressBits-1:3] - + ProgBufBaseAddr[DbgAddressBits-1:3])]; + end + + // two slots for abstract command + [AbstractCmdBaseAddr:AbstractCmdEndAddr]: begin + // return the correct address index + rdata_d = abstract_cmd[3'(addr_i[DbgAddressBits-1:3] - + AbstractCmdBaseAddr[DbgAddressBits-1:3])]; + end + // harts are polling for flags here + [FlagsBaseAddr:FlagsEndAddr]: begin + // release the corresponding hart + if (({addr_i[DbgAddressBits-1:3], 3'b0} - FlagsBaseAddr[DbgAddressBits-1:0]) == + (DbgAddressBits'(hartsel) & {{(DbgAddressBits-3){1'b1}}, 3'b0})) begin + rdata[DbgAddressBits'(hartsel) & DbgAddressBits'(3'b111)] = {6'b0, resume, go}; + end + rdata_d = rdata; + end + default: ; + endcase + end + end + + data_o = data_bits; + end + + always_comb begin : p_abstract_cmd_rom + // this abstract command is currently unsupported + unsupported_command = 1'b0; + // default memory + // if ac_ar.transfer is not set then we can take a shortcut to the program buffer + abstract_cmd[0][31:0] = dm::illegal(); + // load debug module base address into a0, this is shared among all commands + abstract_cmd[0][63:32] = HasSndScratch ? dm::auipc(5'd10, '0) : dm::nop(); + // clr lowest 12b -> DM base offset + abstract_cmd[1][31:0] = HasSndScratch ? dm::srli(5'd10, 5'd10, 6'd12) : dm::nop(); + abstract_cmd[1][63:32] = HasSndScratch ? dm::slli(5'd10, 5'd10, 6'd12) : dm::nop(); + abstract_cmd[2][31:0] = dm::nop(); + abstract_cmd[2][63:32] = dm::nop(); + abstract_cmd[3][31:0] = dm::nop(); + abstract_cmd[3][63:32] = dm::nop(); + abstract_cmd[4][31:0] = HasSndScratch ? dm::csrr(dm::CSR_DSCRATCH1, 5'd10) : dm::nop(); + abstract_cmd[4][63:32] = dm::ebreak(); + abstract_cmd[7:5] = '0; + + // this depends on the command being executed + unique case (cmd_i.cmdtype) + // -------------------- + // Access Register + // -------------------- + dm::AccessRegister: begin + if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && ac_ar.write) begin + // store a0 in dscratch1 + abstract_cmd[0][31:0] = HasSndScratch ? dm::csrw(dm::CSR_DSCRATCH1, 5'd10) : dm::nop(); + // this range is reserved + if (ac_ar.regno[15:14] != '0) begin + abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap + unsupported_command = 1'b1; + // A0 access needs to be handled separately, as we use A0 to load + // the DM address offset need to access DSCRATCH1 in this case + end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) && + (ac_ar.regno[4:0] == 5'd10)) begin + // store s0 in dscratch + abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8); + // load from data register + abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr); + // and store it in the corresponding CSR + abstract_cmd[3][31:0] = dm::csrw(dm::CSR_DSCRATCH1, 5'd8); + // restore s0 again from dscratch + abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8); + // GPR/FPR access + end else if (ac_ar.regno[12]) begin + // determine whether we want to access the floating point register or not + if (ac_ar.regno[5]) begin + abstract_cmd[2][31:0] = + dm::float_load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr); + end else begin + abstract_cmd[2][31:0] = + dm::load(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr); + end + // CSR access + end else begin + // data register to CSR + // store s0 in dscratch + abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8); + // load from data register + abstract_cmd[2][63:32] = dm::load(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr); + // and store it in the corresponding CSR + abstract_cmd[3][31:0] = dm::csrw(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8); + // restore s0 again from dscratch + abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8); + end + end else if (32'(ac_ar.aarsize) < MaxAar && ac_ar.transfer && !ac_ar.write) begin + // store a0 in dscratch1 + abstract_cmd[0][31:0] = HasSndScratch ? + dm::csrw(dm::CSR_DSCRATCH1, LoadBaseAddr) : + dm::nop(); + // this range is reserved + if (ac_ar.regno[15:14] != '0) begin + abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap + unsupported_command = 1'b1; + // A0 access needs to be handled separately, as we use A0 to load + // the DM address offset need to access DSCRATCH1 in this case + end else if (HasSndScratch && ac_ar.regno[12] && (!ac_ar.regno[5]) && + (ac_ar.regno[4:0] == 5'd10)) begin + // store s0 in dscratch + abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8); + // read value from CSR into s0 + abstract_cmd[2][63:32] = dm::csrr(dm::CSR_DSCRATCH1, 5'd8); + // and store s0 into data section + abstract_cmd[3][31:0] = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr); + // restore s0 again from dscratch + abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8); + // GPR/FPR access + end else if (ac_ar.regno[12]) begin + // determine whether we want to access the floating point register or not + if (ac_ar.regno[5]) begin + abstract_cmd[2][31:0] = + dm::float_store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr); + end else begin + abstract_cmd[2][31:0] = + dm::store(ac_ar.aarsize, ac_ar.regno[4:0], LoadBaseAddr, dm::DataAddr); + end + // CSR access + end else begin + // CSR register to data + // store s0 in dscratch + abstract_cmd[2][31:0] = dm::csrw(dm::CSR_DSCRATCH0, 5'd8); + // read value from CSR into s0 + abstract_cmd[2][63:32] = dm::csrr(dm::csr_reg_t'(ac_ar.regno[11:0]), 5'd8); + // and store s0 into data section + abstract_cmd[3][31:0] = dm::store(ac_ar.aarsize, 5'd8, LoadBaseAddr, dm::DataAddr); + // restore s0 again from dscratch + abstract_cmd[3][63:32] = dm::csrr(dm::CSR_DSCRATCH0, 5'd8); + end + end else if (32'(ac_ar.aarsize) >= MaxAar || ac_ar.aarpostincrement == 1'b1) begin + // this should happend when e.g. ac_ar.aarsize >= MaxAar + // Openocd will try to do an access with aarsize=64 bits + // first before falling back to 32 bits. + abstract_cmd[0][31:0] = dm::ebreak(); // we leave asap + unsupported_command = 1'b1; + end + + // Check whether we need to execute the program buffer. When we + // get an unsupported command we really should abort instead of + // still trying to execute the program buffer, makes it easier + // for the debugger to recover + if (ac_ar.postexec && !unsupported_command) begin + // issue a nop, we will automatically run into the program buffer + abstract_cmd[4][63:32] = dm::nop(); + end + end + // not supported at the moment + // dm::QuickAccess:; + // dm::AccessMemory:; + default: begin + abstract_cmd[0][31:0] = dm::ebreak(); + unsupported_command = 1'b1; + end + endcase + end + + logic [63:0] rom_addr; + assign rom_addr = 64'(addr_i); + + // Depending on whether the debug module is located + // at the zero page we can instantiate a simplified version + // which only requires one scratch register per hart. + // For all other cases we need to set aside + // two registers per hart, hence we also need + // two scratch registers. + if (HasSndScratch) begin : gen_rom_snd_scratch + debug_rom i_debug_rom ( + .clk_i, + .req_i, + .addr_i ( rom_addr ), + .rdata_o ( rom_rdata ) + ); + end else begin : gen_rom_one_scratch + // It uses the zero register (`x0`) as the base + // for its loads. The zero register does not need to + // be saved. + debug_rom_one_scratch i_debug_rom ( + .clk_i, + .req_i, + .addr_i ( rom_addr ), + .rdata_o ( rom_rdata ) + ); + end + + // ROM starts at the HaltAddress of the core e.g.: it immediately jumps to + // the ROM base address + assign fwd_rom_d = logic'(addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]); + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + fwd_rom_q <= 1'b0; + rdata_q <= '0; + state_q <= Idle; + word_enable32_q <= 1'b0; + end else begin + fwd_rom_q <= fwd_rom_d; + rdata_q <= rdata_d; + state_q <= state_d; + word_enable32_q <= addr_i[2]; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + halted_q <= 1'b0; + resuming_q <= 1'b0; + end else begin + halted_q <= SelectableHarts & halted_d; + resuming_q <= SelectableHarts & resuming_d; + end + end + +endmodule : dm_mem diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv new file mode 100644 index 00000000..971f1281 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_pkg.sv @@ -0,0 +1,436 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: dm_pkg.sv + * Author: Florian Zaruba + * Date: 30.6.2018 + * + * Description: Debug-module package, contains common system definitions. + * + */ + +package dm; + localparam logic [3:0] DbgVersion013 = 4'h2; + // size of program buffer in junks of 32-bit words + localparam logic [4:0] ProgBufSize = 5'h8; + + // amount of data count registers implemented + localparam logic [3:0] DataCount = 4'h2; + + // address to which a hart should jump when it was requested to halt + localparam logic [63:0] HaltAddress = 64'h800; + localparam logic [63:0] ResumeAddress = HaltAddress + 4; + localparam logic [63:0] ExceptionAddress = HaltAddress + 8; + + // address where data0-15 is shadowed or if shadowed in a CSR + // address of the first CSR used for shadowing the data + localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here + + // debug registers + typedef enum logic [7:0] { + Data0 = 8'h04, + Data1 = 8'h05, + Data2 = 8'h06, + Data3 = 8'h07, + Data4 = 8'h08, + Data5 = 8'h09, + Data6 = 8'h0A, + Data7 = 8'h0B, + Data8 = 8'h0C, + Data9 = 8'h0D, + Data10 = 8'h0E, + Data11 = 8'h0F, + DMControl = 8'h10, + DMStatus = 8'h11, // r/o + Hartinfo = 8'h12, + HaltSum1 = 8'h13, + HAWindowSel = 8'h14, + HAWindow = 8'h15, + AbstractCS = 8'h16, + Command = 8'h17, + AbstractAuto = 8'h18, + DevTreeAddr0 = 8'h19, + DevTreeAddr1 = 8'h1A, + DevTreeAddr2 = 8'h1B, + DevTreeAddr3 = 8'h1C, + NextDM = 8'h1D, + ProgBuf0 = 8'h20, + ProgBuf1 = 8'h21, + ProgBuf2 = 8'h22, + ProgBuf3 = 8'h23, + ProgBuf4 = 8'h24, + ProgBuf5 = 8'h25, + ProgBuf6 = 8'h26, + ProgBuf7 = 8'h27, + ProgBuf8 = 8'h28, + ProgBuf9 = 8'h29, + ProgBuf10 = 8'h2A, + ProgBuf11 = 8'h2B, + ProgBuf12 = 8'h2C, + ProgBuf13 = 8'h2D, + ProgBuf14 = 8'h2E, + ProgBuf15 = 8'h2F, + AuthData = 8'h30, + HaltSum2 = 8'h34, + HaltSum3 = 8'h35, + SBAddress3 = 8'h37, + SBCS = 8'h38, + SBAddress0 = 8'h39, + SBAddress1 = 8'h3A, + SBAddress2 = 8'h3B, + SBData0 = 8'h3C, + SBData1 = 8'h3D, + SBData2 = 8'h3E, + SBData3 = 8'h3F, + HaltSum0 = 8'h40 + } dm_csr_e; + + // debug causes + localparam logic [2:0] CauseBreakpoint = 3'h1; + localparam logic [2:0] CauseTrigger = 3'h2; + localparam logic [2:0] CauseRequest = 3'h3; + localparam logic [2:0] CauseSingleStep = 3'h4; + + typedef struct packed { + logic [31:23] zero1; + logic impebreak; + logic [21:20] zero0; + logic allhavereset; + logic anyhavereset; + logic allresumeack; + logic anyresumeack; + logic allnonexistent; + logic anynonexistent; + logic allunavail; + logic anyunavail; + logic allrunning; + logic anyrunning; + logic allhalted; + logic anyhalted; + logic authenticated; + logic authbusy; + logic hasresethaltreq; + logic devtreevalid; + logic [3:0] version; + } dmstatus_t; + + typedef struct packed { + logic haltreq; + logic resumereq; + logic hartreset; + logic ackhavereset; + logic zero1; + logic hasel; + logic [25:16] hartsello; + logic [15:6] hartselhi; + logic [5:4] zero0; + logic setresethaltreq; + logic clrresethaltreq; + logic ndmreset; + logic dmactive; + } dmcontrol_t; + + typedef struct packed { + logic [31:24] zero1; + logic [23:20] nscratch; + logic [19:17] zero0; + logic dataaccess; + logic [15:12] datasize; + logic [11:0] dataaddr; + } hartinfo_t; + + typedef enum logic [2:0] { + CmdErrNone, CmdErrBusy, CmdErrNotSupported, + CmdErrorException, CmdErrorHaltResume, + CmdErrorBus, CmdErrorOther = 7 + } cmderr_e; + + typedef struct packed { + logic [31:29] zero3; + logic [28:24] progbufsize; + logic [23:13] zero2; + logic busy; + logic zero1; + cmderr_e cmderr; + logic [7:4] zero0; + logic [3:0] datacount; + } abstractcs_t; + + typedef enum logic [7:0] { + AccessRegister = 8'h0, + QuickAccess = 8'h1, + AccessMemory = 8'h2 + } cmd_e; + + typedef struct packed { + cmd_e cmdtype; + logic [23:0] control; + } command_t; + + typedef struct packed { + logic [31:16] autoexecprogbuf; + logic [15:12] zero0; + logic [11:0] autoexecdata; + } abstractauto_t; + + typedef struct packed { + logic zero1; + logic [22:20] aarsize; + logic aarpostincrement; + logic postexec; + logic transfer; + logic write; + logic [15:0] regno; + } ac_ar_cmd_t; + + // DTM + typedef enum logic [1:0] { + DTM_NOP = 2'h0, + DTM_READ = 2'h1, + DTM_WRITE = 2'h2 + } dtm_op_e; + + typedef struct packed { + logic [31:29] sbversion; + logic [28:23] zero0; + logic sbbusyerror; + logic sbbusy; + logic sbreadonaddr; + logic [19:17] sbaccess; + logic sbautoincrement; + logic sbreadondata; + logic [14:12] sberror; + logic [11:5] sbasize; + logic sbaccess128; + logic sbaccess64; + logic sbaccess32; + logic sbaccess16; + logic sbaccess8; + } sbcs_t; + + localparam logic [1:0] DTM_SUCCESS = 2'h0; + + typedef struct packed { + logic [6:0] addr; + dtm_op_e op; + logic [31:0] data; + } dmi_req_t; + + typedef struct packed { + logic [31:0] data; + logic [1:0] resp; + } dmi_resp_t; + + // privilege levels + typedef enum logic[1:0] { + PRIV_LVL_M = 2'b11, + PRIV_LVL_S = 2'b01, + PRIV_LVL_U = 2'b00 + } priv_lvl_t; + + // debugregs in core + typedef struct packed { + logic [31:28] xdebugver; + logic [27:16] zero2; + logic ebreakm; + logic zero1; + logic ebreaks; + logic ebreaku; + logic stepie; + logic stopcount; + logic stoptime; + logic [8:6] cause; + logic zero0; + logic mprven; + logic nmip; + logic step; + priv_lvl_t prv; + } dcsr_t; + + // CSRs + typedef enum logic [11:0] { + // Floating-Point CSRs + CSR_FFLAGS = 12'h001, + CSR_FRM = 12'h002, + CSR_FCSR = 12'h003, + CSR_FTRAN = 12'h800, + // Supervisor Mode CSRs + CSR_SSTATUS = 12'h100, + CSR_SIE = 12'h104, + CSR_STVEC = 12'h105, + CSR_SCOUNTEREN = 12'h106, + CSR_SSCRATCH = 12'h140, + CSR_SEPC = 12'h141, + CSR_SCAUSE = 12'h142, + CSR_STVAL = 12'h143, + CSR_SIP = 12'h144, + CSR_SATP = 12'h180, + // Machine Mode CSRs + CSR_MSTATUS = 12'h300, + CSR_MISA = 12'h301, + CSR_MEDELEG = 12'h302, + CSR_MIDELEG = 12'h303, + CSR_MIE = 12'h304, + CSR_MTVEC = 12'h305, + CSR_MCOUNTEREN = 12'h306, + CSR_MSCRATCH = 12'h340, + CSR_MEPC = 12'h341, + CSR_MCAUSE = 12'h342, + CSR_MTVAL = 12'h343, + CSR_MIP = 12'h344, + CSR_PMPCFG0 = 12'h3A0, + CSR_PMPADDR0 = 12'h3B0, + CSR_MVENDORID = 12'hF11, + CSR_MARCHID = 12'hF12, + CSR_MIMPID = 12'hF13, + CSR_MHARTID = 12'hF14, + CSR_MCYCLE = 12'hB00, + CSR_MINSTRET = 12'hB02, + CSR_DCACHE = 12'h701, + CSR_ICACHE = 12'h700, + + CSR_TSELECT = 12'h7A0, + CSR_TDATA1 = 12'h7A1, + CSR_TDATA2 = 12'h7A2, + CSR_TDATA3 = 12'h7A3, + CSR_TINFO = 12'h7A4, + + // Debug CSR + CSR_DCSR = 12'h7b0, + CSR_DPC = 12'h7b1, + CSR_DSCRATCH0 = 12'h7b2, // optional + CSR_DSCRATCH1 = 12'h7b3, // optional + + // Counters and Timers + CSR_CYCLE = 12'hC00, + CSR_TIME = 12'hC01, + CSR_INSTRET = 12'hC02 + } csr_reg_t; + + // SBA state + typedef enum logic [2:0] { + Idle, + Read, + Write, + WaitRead, + WaitWrite + } sba_state_e; + + // Instruction Generation Helpers + function automatic logic [31:0] jal (logic [4:0] rd, + logic [20:0] imm); + // OpCode Jal + return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h6f}; + endfunction + + function automatic logic [31:0] jalr (logic [4:0] rd, + logic [4:0] rs1, + logic [11:0] offset); + // OpCode Jal + return {offset[11:0], rs1, 3'b0, rd, 7'h67}; + endfunction + + function automatic logic [31:0] andi (logic [4:0] rd, + logic [4:0] rs1, + logic [11:0] imm); + // OpCode andi + return {imm[11:0], rs1, 3'h7, rd, 7'h13}; + endfunction + + function automatic logic [31:0] slli (logic [4:0] rd, + logic [4:0] rs1, + logic [5:0] shamt); + // OpCode slli + return {6'b0, shamt[5:0], rs1, 3'h1, rd, 7'h13}; + endfunction + + function automatic logic [31:0] srli (logic [4:0] rd, + logic [4:0] rs1, + logic [5:0] shamt); + // OpCode srli + return {6'b0, shamt[5:0], rs1, 3'h5, rd, 7'h13}; + endfunction + + function automatic logic [31:0] load (logic [2:0] size, + logic [4:0] dest, + logic [4:0] base, + logic [11:0] offset); + // OpCode Load + return {offset[11:0], base, size, dest, 7'h03}; + endfunction + + function automatic logic [31:0] auipc (logic [4:0] rd, + logic [20:0] imm); + // OpCode Auipc + return {imm[20], imm[10:1], imm[11], imm[19:12], rd, 7'h17}; + endfunction + + function automatic logic [31:0] store (logic [2:0] size, + logic [4:0] src, + logic [4:0] base, + logic [11:0] offset); + // OpCode Store + return {offset[11:5], src, base, size, offset[4:0], 7'h23}; + endfunction + + function automatic logic [31:0] float_load (logic [2:0] size, + logic [4:0] dest, + logic [4:0] base, + logic [11:0] offset); + // OpCode Load + return {offset[11:0], base, size, dest, 7'b00_001_11}; + endfunction + + function automatic logic [31:0] float_store (logic [2:0] size, + logic [4:0] src, + logic [4:0] base, + logic [11:0] offset); + // OpCode Store + return {offset[11:5], src, base, size, offset[4:0], 7'b01_001_11}; + endfunction + + function automatic logic [31:0] csrw (csr_reg_t csr, + logic [4:0] rs1); + // CSRRW, rd, OpCode System + return {csr, rs1, 3'h1, 5'h0, 7'h73}; + endfunction + + function automatic logic [31:0] csrr (csr_reg_t csr, + logic [4:0] dest); + // rs1, CSRRS, rd, OpCode System + return {csr, 5'h0, 3'h2, dest, 7'h73}; + endfunction + + function automatic logic [31:0] branch(logic [4:0] src2, + logic [4:0] src1, + logic [2:0] funct3, + logic [11:0] offset); + // OpCode Branch + return {offset[11], offset[9:4], src2, src1, funct3, + offset[3:0], offset[10], 7'b11_000_11}; + endfunction + + function automatic logic [31:0] ebreak (); + return 32'h00100073; + endfunction + + function automatic logic [31:0] wfi (); + return 32'h10500073; + endfunction + + function automatic logic [31:0] nop (); + return 32'h00000013; + endfunction + + function automatic logic [31:0] illegal (); + return 32'h00000000; + endfunction + +endpackage : dm diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv new file mode 100644 index 00000000..98c586c6 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_sba.sv @@ -0,0 +1,170 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: dm_sba.sv +* Author: Florian Zaruba +* Date: 1.8.2018 +* +* Description: System Bus Access Module +* +*/ +module dm_sba #( + parameter int unsigned BusWidth = 32, + parameter bit ReadByteEnable = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, + input logic dmactive_i, // synchronous reset active low + + output logic master_req_o, + output logic [BusWidth-1:0] master_add_o, + output logic master_we_o, + output logic [BusWidth-1:0] master_wdata_o, + output logic [BusWidth/8-1:0] master_be_o, + input logic master_gnt_i, + input logic master_r_valid_i, + input logic [BusWidth-1:0] master_r_rdata_i, + + input logic [BusWidth-1:0] sbaddress_i, + input logic sbaddress_write_valid_i, + // control signals in + input logic sbreadonaddr_i, + output logic [BusWidth-1:0] sbaddress_o, + input logic sbautoincrement_i, + input logic [2:0] sbaccess_i, + // data in + input logic sbreadondata_i, + input logic [BusWidth-1:0] sbdata_i, + input logic sbdata_read_valid_i, + input logic sbdata_write_valid_i, + // read data out + output logic [BusWidth-1:0] sbdata_o, + output logic sbdata_valid_o, + // control signals + output logic sbbusy_o, + output logic sberror_valid_o, // bus error occurred + output logic [2:0] sberror_o // bus error occurred +); + + dm::sba_state_e state_d, state_q; + + logic [BusWidth-1:0] address; + logic req; + logic gnt; + logic we; + logic [BusWidth/8-1:0] be; + logic [BusWidth/8-1:0] be_mask; + logic [$clog2(BusWidth/8)-1:0] be_idx; + + assign sbbusy_o = logic'(state_q != dm::Idle); + + always_comb begin : p_be_mask + be_mask = '0; + + // generate byte enable mask + unique case (sbaccess_i) + 3'b000: begin + be_mask[be_idx] = '1; + end + 3'b001: begin + be_mask[int'({be_idx[$high(be_idx):1], 1'b0}) +: 2] = '1; + end + 3'b010: begin + if (BusWidth == 32'd64) be_mask[int'({be_idx[$high(be_idx)], 2'h0}) +: 4] = '1; + else be_mask = '1; + end + 3'b011: be_mask = '1; + default: ; + endcase + end + + always_comb begin : p_fsm + req = 1'b0; + address = sbaddress_i; + we = 1'b0; + be = '0; + be_idx = sbaddress_i[$clog2(BusWidth/8)-1:0]; + + sberror_o = '0; + sberror_valid_o = 1'b0; + sbaddress_o = sbaddress_i; + + state_d = state_q; + + unique case (state_q) + dm::Idle: begin + // debugger requested a read + if (sbaddress_write_valid_i && sbreadonaddr_i) state_d = dm::Read; + // debugger requested a write + if (sbdata_write_valid_i) state_d = dm::Write; + // perform another read + if (sbdata_read_valid_i && sbreadondata_i) state_d = dm::Read; + end + + dm::Read: begin + req = 1'b1; + if (ReadByteEnable) be = be_mask; + if (gnt) state_d = dm::WaitRead; + end + + dm::Write: begin + req = 1'b1; + we = 1'b1; + be = be_mask; + if (gnt) state_d = dm::WaitWrite; + end + + dm::WaitRead: begin + if (sbdata_valid_o) begin + state_d = dm::Idle; + // auto-increment address + if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i); + end + end + + dm::WaitWrite: begin + if (sbdata_valid_o) begin + state_d = dm::Idle; + // auto-increment address + if (sbautoincrement_i) sbaddress_o = sbaddress_i + (32'h1 << sbaccess_i); + end + end + + default: state_d = dm::Idle; // catch parasitic state + endcase + + // handle error case + if (sbaccess_i > 3 && state_q != dm::Idle) begin + req = 1'b0; + state_d = dm::Idle; + sberror_valid_o = 1'b1; + sberror_o = 3'd3; + end + // further error handling should go here ... + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + state_q <= dm::Idle; + end else begin + state_q <= state_d; + end + end + + assign master_req_o = req; + assign master_add_o = address[BusWidth-1:0]; + assign master_we_o = we; + assign master_wdata_o = sbdata_i[BusWidth-1:0]; + assign master_be_o = be[BusWidth/8-1:0]; + assign gnt = master_gnt_i; + assign sbdata_valid_o = master_r_valid_i; + assign sbdata_o = master_r_rdata_i[BusWidth-1:0]; + +endmodule : dm_sba diff --git a/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv b/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv new file mode 100644 index 00000000..9887aef6 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dm_top.sv @@ -0,0 +1,218 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: dm_top.sv +* Author: Florian Zaruba +* Date: 30.6.2018 +* +* Description: Top-level of debug module (DM). This is an AXI-Slave. +* DTM protocol is equal to SiFives debug protocol to leverage +* SW infrastructure re-use. As of version 0.13 +*/ + +module dm_top #( + parameter int unsigned NrHarts = 1, + parameter int unsigned BusWidth = 32, + parameter int unsigned DmBaseAddress = 'h1000, // default to non-zero page + // Bitmask to select physically available harts for systems + // that don't use hart numbers in a contiguous fashion. + parameter logic [NrHarts-1:0] SelectableHarts = {NrHarts{1'b1}}, + parameter bit ReadByteEnable = 1 // toggle new behavior to drive master_be_o during a read +) ( + input logic clk_i, // clock + input logic rst_ni, // asynchronous reset active low, connect PoR here, not the system reset + input logic testmode_i, + output logic ndmreset_o, // non-debug module reset + output logic dmactive_o, // debug module is active + output logic [NrHarts-1:0] debug_req_o, // async debug request + input logic [NrHarts-1:0] unavailable_i, // communicate whether the hart is unavailable (e.g.: power down) + input dm::hartinfo_t [NrHarts-1:0] hartinfo_i, + + input logic slave_req_i, + input logic slave_we_i, + input logic [BusWidth-1:0] slave_addr_i, + input logic [BusWidth/8-1:0] slave_be_i, + input logic [BusWidth-1:0] slave_wdata_i, + output logic [BusWidth-1:0] slave_rdata_o, + + output logic master_req_o, + output logic [BusWidth-1:0] master_add_o, + output logic master_we_o, + output logic [BusWidth-1:0] master_wdata_o, + output logic [BusWidth/8-1:0] master_be_o, + input logic master_gnt_i, + input logic master_r_valid_i, + input logic [BusWidth-1:0] master_r_rdata_i, + + // Connection to DTM - compatible to RocketChip Debug Module + input logic dmi_rst_ni, + input logic dmi_req_valid_i, + output logic dmi_req_ready_o, + input dm::dmi_req_t dmi_req_i, + + output logic dmi_resp_valid_o, + input logic dmi_resp_ready_i, + output dm::dmi_resp_t dmi_resp_o +); + + // Debug CSRs + logic [NrHarts-1:0] halted; + // logic [NrHarts-1:0] running; + logic [NrHarts-1:0] resumeack; + logic [NrHarts-1:0] haltreq; + logic [NrHarts-1:0] resumereq; + logic clear_resumeack; + logic cmd_valid; + dm::command_t cmd; + + logic cmderror_valid; + dm::cmderr_e cmderror; + logic cmdbusy; + logic [dm::ProgBufSize-1:0][31:0] progbuf; + logic [dm::DataCount-1:0][31:0] data_csrs_mem; + logic [dm::DataCount-1:0][31:0] data_mem_csrs; + logic data_valid; + logic [19:0] hartsel; + // System Bus Access Module + logic [BusWidth-1:0] sbaddress_csrs_sba; + logic [BusWidth-1:0] sbaddress_sba_csrs; + logic sbaddress_write_valid; + logic sbreadonaddr; + logic sbautoincrement; + logic [2:0] sbaccess; + logic sbreadondata; + logic [BusWidth-1:0] sbdata_write; + logic sbdata_read_valid; + logic sbdata_write_valid; + logic [BusWidth-1:0] sbdata_read; + logic sbdata_valid; + logic sbbusy; + logic sberror_valid; + logic [2:0] sberror; + + + dm_csrs #( + .NrHarts(NrHarts), + .BusWidth(BusWidth), + .SelectableHarts(SelectableHarts) + ) i_dm_csrs ( + .clk_i, + .rst_ni, + .testmode_i, + .dmi_rst_ni, + .dmi_req_valid_i, + .dmi_req_ready_o, + .dmi_req_i, + .dmi_resp_valid_o, + .dmi_resp_ready_i, + .dmi_resp_o, + .ndmreset_o, + .dmactive_o, + .hartsel_o ( hartsel ), + .hartinfo_i, + .halted_i ( halted ), + .unavailable_i, + .resumeack_i ( resumeack ), + .haltreq_o ( haltreq ), + .resumereq_o ( resumereq ), + .clear_resumeack_o ( clear_resumeack ), + .cmd_valid_o ( cmd_valid ), + .cmd_o ( cmd ), + .cmderror_valid_i ( cmderror_valid ), + .cmderror_i ( cmderror ), + .cmdbusy_i ( cmdbusy ), + .progbuf_o ( progbuf ), + .data_i ( data_mem_csrs ), + .data_valid_i ( data_valid ), + .data_o ( data_csrs_mem ), + .sbaddress_o ( sbaddress_csrs_sba ), + .sbaddress_i ( sbaddress_sba_csrs ), + .sbaddress_write_valid_o ( sbaddress_write_valid ), + .sbreadonaddr_o ( sbreadonaddr ), + .sbautoincrement_o ( sbautoincrement ), + .sbaccess_o ( sbaccess ), + .sbreadondata_o ( sbreadondata ), + .sbdata_o ( sbdata_write ), + .sbdata_read_valid_o ( sbdata_read_valid ), + .sbdata_write_valid_o ( sbdata_write_valid ), + .sbdata_i ( sbdata_read ), + .sbdata_valid_i ( sbdata_valid ), + .sbbusy_i ( sbbusy ), + .sberror_valid_i ( sberror_valid ), + .sberror_i ( sberror ) + ); + + dm_sba #( + .BusWidth(BusWidth), + .ReadByteEnable(ReadByteEnable) + ) i_dm_sba ( + .clk_i, + .rst_ni, + .dmactive_i ( dmactive_o ), + + .master_req_o, + .master_add_o, + .master_we_o, + .master_wdata_o, + .master_be_o, + .master_gnt_i, + .master_r_valid_i, + .master_r_rdata_i, + + .sbaddress_i ( sbaddress_csrs_sba ), + .sbaddress_o ( sbaddress_sba_csrs ), + .sbaddress_write_valid_i ( sbaddress_write_valid ), + .sbreadonaddr_i ( sbreadonaddr ), + .sbautoincrement_i ( sbautoincrement ), + .sbaccess_i ( sbaccess ), + .sbreadondata_i ( sbreadondata ), + .sbdata_i ( sbdata_write ), + .sbdata_read_valid_i ( sbdata_read_valid ), + .sbdata_write_valid_i ( sbdata_write_valid ), + .sbdata_o ( sbdata_read ), + .sbdata_valid_o ( sbdata_valid ), + .sbbusy_o ( sbbusy ), + .sberror_valid_o ( sberror_valid ), + .sberror_o ( sberror ) + ); + + dm_mem #( + .NrHarts(NrHarts), + .BusWidth(BusWidth), + .SelectableHarts(SelectableHarts), + .DmBaseAddress(DmBaseAddress) + ) i_dm_mem ( + .clk_i, + .rst_ni, + .debug_req_o, + .hartsel_i ( hartsel ), + .haltreq_i ( haltreq ), + .resumereq_i ( resumereq ), + .clear_resumeack_i ( clear_resumeack ), + .halted_o ( halted ), + .resuming_o ( resumeack ), + .cmd_valid_i ( cmd_valid ), + .cmd_i ( cmd ), + .cmderror_valid_o ( cmderror_valid ), + .cmderror_o ( cmderror ), + .cmdbusy_o ( cmdbusy ), + .progbuf_i ( progbuf ), + .data_i ( data_csrs_mem ), + .data_o ( data_mem_csrs ), + .data_valid_o ( data_valid ), + .req_i ( slave_req_i ), + .we_i ( slave_we_i ), + .addr_i ( slave_addr_i ), + .wdata_i ( slave_wdata_i ), + .be_i ( slave_be_i ), + .rdata_o ( slave_rdata_o ) + ); + +endmodule : dm_top diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv new file mode 100644 index 00000000..4665c917 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_cdc.sv @@ -0,0 +1,73 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: axi_riscv_debug_module.sv +* Author: Andreas Traber +* Author: Florian Zaruba +* +* Description: Clock domain crossings for JTAG to DMI very heavily based +* on previous work by Andreas Traber for the PULP project. +* This is mainly a wrapper around the existing CDCs. +*/ +module dmi_cdc ( + // JTAG side (master side) + input logic tck_i, + input logic trst_ni, + + input dm::dmi_req_t jtag_dmi_req_i, + output logic jtag_dmi_ready_o, + input logic jtag_dmi_valid_i, + + output dm::dmi_resp_t jtag_dmi_resp_o, + output logic jtag_dmi_valid_o, + input logic jtag_dmi_ready_i, + + // core side (slave side) + input logic clk_i, + input logic rst_ni, + + output dm::dmi_req_t core_dmi_req_o, + output logic core_dmi_valid_o, + input logic core_dmi_ready_i, + + input dm::dmi_resp_t core_dmi_resp_i, + output logic core_dmi_ready_o, + input logic core_dmi_valid_i +); + + cdc_2phase #(.T(dm::dmi_req_t)) i_cdc_req ( + .src_rst_ni ( trst_ni ), + .src_clk_i ( tck_i ), + .src_data_i ( jtag_dmi_req_i ), + .src_valid_i ( jtag_dmi_valid_i ), + .src_ready_o ( jtag_dmi_ready_o ), + + .dst_rst_ni ( rst_ni ), + .dst_clk_i ( clk_i ), + .dst_data_o ( core_dmi_req_o ), + .dst_valid_o ( core_dmi_valid_o ), + .dst_ready_i ( core_dmi_ready_i ) + ); + + cdc_2phase #(.T(dm::dmi_resp_t)) i_cdc_resp ( + .src_rst_ni ( rst_ni ), + .src_clk_i ( clk_i ), + .src_data_i ( core_dmi_resp_i ), + .src_valid_i ( core_dmi_valid_i ), + .src_ready_o ( core_dmi_ready_o ), + + .dst_rst_ni ( trst_ni ), + .dst_clk_i ( tck_i ), + .dst_data_o ( jtag_dmi_resp_o ), + .dst_valid_o ( jtag_dmi_valid_o ), + .dst_ready_i ( jtag_dmi_ready_i ) + ); + +endmodule : dmi_cdc diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv new file mode 100644 index 00000000..c4c7b525 --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag.sv @@ -0,0 +1,271 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. +* Copyright and related rights are licensed under the Solderpad Hardware +* License, Version 0.51 (the “License”); you may not use this file except in +* compliance with the License. You may obtain a copy of the License at +* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +* or agreed to in writing, software, hardware and materials distributed under +* this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +* CONDITIONS OF ANY KIND, either express or implied. See the License for the +* specific language governing permissions and limitations under the License. +* +* File: axi_riscv_debug_module.sv +* Author: Florian Zaruba +* Date: 19.7.2018 +* +* Description: JTAG DMI (debug module interface) +* +*/ + +module dmi_jtag #( + parameter logic [31:0] IdcodeValue = 32'h00000001 +) ( + input logic clk_i, // DMI Clock + input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, + + output logic dmi_rst_no, // hard reset + output dm::dmi_req_t dmi_req_o, + output logic dmi_req_valid_o, + input logic dmi_req_ready_i, + + input dm::dmi_resp_t dmi_resp_i, + output logic dmi_resp_ready_o, + input logic dmi_resp_valid_i, + + input logic tck_i, // JTAG test clock pad + input logic tms_i, // JTAG test mode select pad + input logic trst_ni, // JTAG test reset pad + input logic td_i, // JTAG test data input pad + output logic td_o, // JTAG test data output pad + output logic tdo_oe_o // Data out output enable +); + assign dmi_rst_no = rst_ni; + + logic test_logic_reset; + logic shift_dr; + logic update_dr; + logic capture_dr; + logic dmi_access; + logic dtmcs_select; + logic dmi_reset; + logic dmi_tdi; + logic dmi_tdo; + + dm::dmi_req_t dmi_req; + logic dmi_req_ready; + logic dmi_req_valid; + + dm::dmi_resp_t dmi_resp; + logic dmi_resp_valid; + logic dmi_resp_ready; + + typedef struct packed { + logic [6:0] address; + logic [31:0] data; + logic [1:0] op; + } dmi_t; + + typedef enum logic [1:0] { + DMINoError = 2'h0, DMIReservedError = 2'h1, + DMIOPFailed = 2'h2, DMIBusy = 2'h3 + } dmi_error_e; + + typedef enum logic [2:0] { Idle, Read, WaitReadValid, Write, WaitWriteValid } state_e; + state_e state_d, state_q; + + logic [$bits(dmi_t)-1:0] dr_d, dr_q; + logic [6:0] address_d, address_q; + logic [31:0] data_d, data_q; + + dmi_t dmi; + assign dmi = dmi_t'(dr_q); + assign dmi_req.addr = address_q; + assign dmi_req.data = data_q; + assign dmi_req.op = (state_q == Write) ? dm::DTM_WRITE : dm::DTM_READ; + // we'will always be ready to accept the data we requested + assign dmi_resp_ready = 1'b1; + + logic error_dmi_busy; + dmi_error_e error_d, error_q; + + always_comb begin : p_fsm + error_dmi_busy = 1'b0; + // default assignments + state_d = state_q; + address_d = address_q; + data_d = data_q; + error_d = error_q; + + dmi_req_valid = 1'b0; + + unique case (state_q) + Idle: begin + // make sure that no error is sticky + if (dmi_access && update_dr && (error_q == DMINoError)) begin + // save address and value + address_d = dmi.address; + data_d = dmi.data; + if (dm::dtm_op_e'(dmi.op) == dm::DTM_READ) begin + state_d = Read; + end else if (dm::dtm_op_e'(dmi.op) == dm::DTM_WRITE) begin + state_d = Write; + end + // else this is a nop and we can stay here + end + end + + Read: begin + dmi_req_valid = 1'b1; + if (dmi_req_ready) begin + state_d = WaitReadValid; + end + end + + WaitReadValid: begin + // load data into register and shift out + if (dmi_resp_valid) begin + data_d = dmi_resp.data; + state_d = Idle; + end + end + + Write: begin + dmi_req_valid = 1'b1; + // request sent, wait for response before going back to idle + if (dmi_req_ready) begin + state_d = WaitWriteValid; + end + end + + WaitWriteValid: begin + // got a valid answer go back to idle + if (dmi_resp_valid) begin + state_d = Idle; + end + end + + default: begin + // just wait for idle here + if (dmi_resp_valid) begin + state_d = Idle; + end + end + endcase + + // update_dr means we got another request but we didn't finish + // the one in progress, this state is sticky + if (update_dr && state_q != Idle) begin + error_dmi_busy = 1'b1; + end + + // if capture_dr goes high while we are in the read state + // or in the corresponding wait state we are not giving back a valid word + // -> throw an error + if (capture_dr && state_q inside {Read, WaitReadValid}) begin + error_dmi_busy = 1'b1; + end + + if (error_dmi_busy) begin + error_d = DMIBusy; + end + // clear sticky error flag + if (update_dr && dmi_reset && dtmcs_select) begin + error_d = DMINoError; + end + end + + // shift register + assign dmi_tdo = dr_q[0]; + + always_comb begin : p_shift + dr_d = dr_q; + + if (capture_dr) begin + if (dmi_access) begin + if (error_q == DMINoError && !error_dmi_busy) begin + dr_d = {address_q, data_q, DMINoError}; + // DMI was busy, report an error + end else if (error_q == DMIBusy || error_dmi_busy) begin + dr_d = {address_q, data_q, DMIBusy}; + end + end + end + + if (shift_dr) begin + if (dmi_access) begin + dr_d = {dmi_tdi, dr_q[$bits(dr_q)-1:1]}; + end + end + + if (test_logic_reset) begin + dr_d = '0; + end + end + + always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs + if (!trst_ni) begin + dr_q <= '0; + state_q <= Idle; + address_q <= '0; + data_q <= '0; + error_q <= DMINoError; + end else begin + dr_q <= dr_d; + state_q <= state_d; + address_q <= address_d; + data_q <= data_d; + error_q <= error_d; + end + end + + // --------- + // TAP + // --------- + dmi_jtag_tap #( + .IrLength (5), + .IdcodeValue(IdcodeValue) + ) i_dmi_jtag_tap ( + .tck_i, + .tms_i, + .trst_ni, + .td_i, + .td_o, + .tdo_oe_o, + .testmode_i, + .test_logic_reset_o ( test_logic_reset ), + .shift_dr_o ( shift_dr ), + .update_dr_o ( update_dr ), + .capture_dr_o ( capture_dr ), + .dmi_access_o ( dmi_access ), + .dtmcs_select_o ( dtmcs_select ), + .dmi_reset_o ( dmi_reset ), + .dmi_error_i ( error_q ), + .dmi_tdi_o ( dmi_tdi ), + .dmi_tdo_i ( dmi_tdo ) + ); + + // --------- + // CDC + // --------- + dmi_cdc i_dmi_cdc ( + // JTAG side (master side) + .tck_i, + .trst_ni, + .jtag_dmi_req_i ( dmi_req ), + .jtag_dmi_ready_o ( dmi_req_ready ), + .jtag_dmi_valid_i ( dmi_req_valid ), + .jtag_dmi_resp_o ( dmi_resp ), + .jtag_dmi_valid_o ( dmi_resp_valid ), + .jtag_dmi_ready_i ( dmi_resp_ready ), + // core side + .clk_i, + .rst_ni, + .core_dmi_req_o ( dmi_req_o ), + .core_dmi_valid_o ( dmi_req_valid_o ), + .core_dmi_ready_i ( dmi_req_ready_i ), + .core_dmi_resp_i ( dmi_resp_i ), + .core_dmi_ready_o ( dmi_resp_ready_o ), + .core_dmi_valid_i ( dmi_resp_valid_i ) + ); + +endmodule : dmi_jtag diff --git a/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv new file mode 100644 index 00000000..c2e8d6ef --- /dev/null +++ b/test/type_param/corev_apu/riscv-dbg/src/dmi_jtag_tap.sv @@ -0,0 +1,349 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: dmi_jtag_tap.sv + * Author: Florian Zaruba + * Date: 19.7.2018 + * + * Description: JTAG TAP for DMI (according to debug spec 0.13) + * + */ + +module dmi_jtag_tap #( + parameter int unsigned IrLength = 5, + // JTAG IDCODE Value + parameter logic [31:0] IdcodeValue = 32'h00000001 + // xxxx version + // xxxxxxxxxxxxxxxx part number + // xxxxxxxxxxx manufacturer id + // 1 required by standard +) ( + input logic tck_i, // JTAG test clock pad + input logic tms_i, // JTAG test mode select pad + input logic trst_ni, // JTAG test reset pad + input logic td_i, // JTAG test data input pad + output logic td_o, // JTAG test data output pad + output logic tdo_oe_o, // Data out output enable + input logic testmode_i, + output logic test_logic_reset_o, + output logic shift_dr_o, + output logic update_dr_o, + output logic capture_dr_o, + + // we want to access DMI register + output logic dmi_access_o, + // JTAG is interested in writing the DTM CSR register + output logic dtmcs_select_o, + // clear error state + output logic dmi_reset_o, + input logic [1:0] dmi_error_i, + // test data to submodule + output logic dmi_tdi_o, + // test data in from submodule + input logic dmi_tdo_i +); + + // to submodule + assign dmi_tdi_o = td_i; + + typedef enum logic [3:0] { + TestLogicReset, RunTestIdle, SelectDrScan, + CaptureDr, ShiftDr, Exit1Dr, PauseDr, Exit2Dr, + UpdateDr, SelectIrScan, CaptureIr, ShiftIr, + Exit1Ir, PauseIr, Exit2Ir, UpdateIr + } tap_state_e; + + tap_state_e tap_state_q, tap_state_d; + + typedef enum logic [IrLength-1:0] { + BYPASS0 = 'h0, + IDCODE = 'h1, + DTMCSR = 'h10, + DMIACCESS = 'h11, + BYPASS1 = 'h1f + } ir_reg_e; + + typedef struct packed { + logic [31:18] zero1; + logic dmihardreset; + logic dmireset; + logic zero0; + logic [14:12] idle; + logic [11:10] dmistat; + logic [9:4] abits; + logic [3:0] version; + } dtmcs_t; + + // ---------------- + // IR logic + // ---------------- + + // shift register + logic [IrLength-1:0] jtag_ir_shift_d, jtag_ir_shift_q; + // IR register -> this gets captured from shift register upon update_ir + ir_reg_e jtag_ir_d, jtag_ir_q; + logic capture_ir, shift_ir, update_ir; // pause_ir + + always_comb begin : p_jtag + jtag_ir_shift_d = jtag_ir_shift_q; + jtag_ir_d = jtag_ir_q; + + // IR shift register + if (shift_ir) begin + jtag_ir_shift_d = {td_i, jtag_ir_shift_q[IrLength-1:1]}; + end + + // capture IR register + if (capture_ir) begin + jtag_ir_shift_d = IrLength'(4'b0101); + end + + // update IR register + if (update_ir) begin + jtag_ir_d = ir_reg_e'(jtag_ir_shift_q); + end + + // synchronous test-logic reset + if (test_logic_reset_o) begin + jtag_ir_shift_d = '0; + jtag_ir_d = IDCODE; + end + end + + always_ff @(posedge tck_i, negedge trst_ni) begin : p_jtag_ir_reg + if (!trst_ni) begin + jtag_ir_shift_q <= '0; + jtag_ir_q <= IDCODE; + end else begin + jtag_ir_shift_q <= jtag_ir_shift_d; + jtag_ir_q <= jtag_ir_d; + end + end + + // ---------------- + // TAP DR Regs + // ---------------- + // - Bypass + // - IDCODE + // - DTM CS + logic [31:0] idcode_d, idcode_q; + logic idcode_select; + logic bypass_select; + dtmcs_t dtmcs_d, dtmcs_q; + logic bypass_d, bypass_q; // this is a 1-bit register + + assign dmi_reset_o = dtmcs_q.dmireset; + + always_comb begin + idcode_d = idcode_q; + bypass_d = bypass_q; + dtmcs_d = dtmcs_q; + + if (capture_dr_o) begin + if (idcode_select) idcode_d = IdcodeValue; + if (bypass_select) bypass_d = 1'b0; + if (dtmcs_select_o) begin + dtmcs_d = '{ + zero1 : '0, + dmihardreset : 1'b0, + dmireset : 1'b0, + zero0 : '0, + idle : 3'd1, // 1: Enter Run-Test/Idle and leave it immediately + dmistat : dmi_error_i, // 0: No error, 2: Op failed, 3: too fast + abits : 6'd7, // The size of address in dmi + version : 4'd1 // Version described in spec version 0.13 (and later?) + }; + end + end + + if (shift_dr_o) begin + if (idcode_select) idcode_d = {td_i, 31'(idcode_q >> 1)}; + if (bypass_select) bypass_d = td_i; + if (dtmcs_select_o) dtmcs_d = {td_i, 31'(dtmcs_q >> 1)}; + end + + if (test_logic_reset_o) begin + idcode_d = IdcodeValue; + bypass_d = 1'b0; + end + end + + // ---------------- + // Data reg select + // ---------------- + always_comb begin : p_data_reg_sel + dmi_access_o = 1'b0; + dtmcs_select_o = 1'b0; + idcode_select = 1'b0; + bypass_select = 1'b0; + unique case (jtag_ir_q) + BYPASS0: bypass_select = 1'b1; + IDCODE: idcode_select = 1'b1; + DTMCSR: dtmcs_select_o = 1'b1; + DMIACCESS: dmi_access_o = 1'b1; + BYPASS1: bypass_select = 1'b1; + default: bypass_select = 1'b1; + endcase + end + + // ---------------- + // Output select + // ---------------- + logic tdo_mux; + + always_comb begin : p_out_sel + // we are shifting out the IR register + if (shift_ir) begin + tdo_mux = jtag_ir_shift_q[0]; + // here we are shifting the DR register + end else begin + unique case (jtag_ir_q) + IDCODE: tdo_mux = idcode_q[0]; // Reading ID code + DTMCSR: tdo_mux = dtmcs_q.version[0]; + DMIACCESS: tdo_mux = dmi_tdo_i; // Read from DMI TDO + default: tdo_mux = bypass_q; // BYPASS instruction + endcase + end + end + + // ---------------- + // DFT + // ---------------- + logic tck_n, tck_ni; + + cluster_clock_inverter i_tck_inv ( + .clk_i ( tck_i ), + .clk_o ( tck_ni ) + ); + + pulp_clock_mux2 i_dft_tck_mux ( + .clk0_i ( tck_ni ), + .clk1_i ( tck_i ), // bypass the inverted clock for testing + .clk_sel_i ( testmode_i ), + .clk_o ( tck_n ) + ); + + // TDO changes state at negative edge of TCK + always_ff @(posedge tck_n, negedge trst_ni) begin : p_tdo_regs + if (!trst_ni) begin + td_o <= 1'b0; + tdo_oe_o <= 1'b0; + end else begin + td_o <= tdo_mux; + tdo_oe_o <= (shift_ir | shift_dr_o); + end + end + // ---------------- + // TAP FSM + // ---------------- + // Determination of next state; purely combinatorial + always_comb begin : p_tap_fsm + + test_logic_reset_o = 1'b0; + + capture_dr_o = 1'b0; + shift_dr_o = 1'b0; + update_dr_o = 1'b0; + + capture_ir = 1'b0; + shift_ir = 1'b0; + // pause_ir = 1'b0; unused + update_ir = 1'b0; + + unique case (tap_state_q) + TestLogicReset: begin + tap_state_d = (tms_i) ? TestLogicReset : RunTestIdle; + test_logic_reset_o = 1'b1; + end + RunTestIdle: begin + tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle; + end + // DR Path + SelectDrScan: begin + tap_state_d = (tms_i) ? SelectIrScan : CaptureDr; + end + CaptureDr: begin + capture_dr_o = 1'b1; + tap_state_d = (tms_i) ? Exit1Dr : ShiftDr; + end + ShiftDr: begin + shift_dr_o = 1'b1; + tap_state_d = (tms_i) ? Exit1Dr : ShiftDr; + end + Exit1Dr: begin + tap_state_d = (tms_i) ? UpdateDr : PauseDr; + end + PauseDr: begin + tap_state_d = (tms_i) ? Exit2Dr : PauseDr; + end + Exit2Dr: begin + tap_state_d = (tms_i) ? UpdateDr : ShiftDr; + end + UpdateDr: begin + update_dr_o = 1'b1; + tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle; + end + // IR Path + SelectIrScan: begin + tap_state_d = (tms_i) ? TestLogicReset : CaptureIr; + end + // In this controller state, the shift register bank in the + // Instruction Register parallel loads a pattern of fixed values on + // the rising edge of TCK. The last two significant bits must always + // be "01". + CaptureIr: begin + capture_ir = 1'b1; + tap_state_d = (tms_i) ? Exit1Ir : ShiftIr; + end + // In this controller state, the instruction register gets connected + // between TDI and TDO, and the captured pattern gets shifted on + // each rising edge of TCK. The instruction available on the TDI + // pin is also shifted in to the instruction register. + ShiftIr: begin + shift_ir = 1'b1; + tap_state_d = (tms_i) ? Exit1Ir : ShiftIr; + end + Exit1Ir: begin + tap_state_d = (tms_i) ? UpdateIr : PauseIr; + end + PauseIr: begin + // pause_ir = 1'b1; // unused + tap_state_d = (tms_i) ? Exit2Ir : PauseIr; + end + Exit2Ir: begin + tap_state_d = (tms_i) ? UpdateIr : ShiftIr; + end + // In this controller state, the instruction in the instruction + // shift register is latched to the latch bank of the Instruction + // Register on every falling edge of TCK. This instruction becomes + // the current instruction once it is latched. + UpdateIr: begin + update_ir = 1'b1; + tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle; + end + default: ; // can't actually happen since case is full + endcase + end + + always_ff @(posedge tck_i or negedge trst_ni) begin : p_regs + if (!trst_ni) begin + tap_state_q <= RunTestIdle; + idcode_q <= IdcodeValue; + bypass_q <= 1'b0; + dtmcs_q <= '0; + end else begin + tap_state_q <= tap_state_d; + idcode_q <= idcode_d; + bypass_q <= bypass_d; + dtmcs_q <= dtmcs_d; + end + end + +endmodule : dmi_jtag_tap diff --git a/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv b/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv new file mode 100644 index 00000000..bbf6f948 --- /dev/null +++ b/test/type_param/corev_apu/rv_plic/rtl/plic_regmap.sv @@ -0,0 +1,357 @@ +// Do not edit - auto-generated +module plic_regs #( + parameter type reg_req_t = logic, + parameter type reg_rsp_t = logic +)( + input logic [30:0][2:0] prio_i, + output logic [30:0][2:0] prio_o, + output logic [30:0] prio_we_o, + output logic [30:0] prio_re_o, + input logic [0:0][30:0] ip_i, + output logic [0:0] ip_re_o, + input logic [1:0][30:0] ie_i, + output logic [1:0][30:0] ie_o, + output logic [1:0] ie_we_o, + output logic [1:0] ie_re_o, + input logic [1:0][2:0] threshold_i, + output logic [1:0][2:0] threshold_o, + output logic [1:0] threshold_we_o, + output logic [1:0] threshold_re_o, + input logic [1:0][4:0] cc_i, + output logic [1:0][4:0] cc_o, + output logic [1:0] cc_we_o, + output logic [1:0] cc_re_o, + // Bus Interface + input reg_req_t req_i, + output reg_rsp_t resp_o +); +always_comb begin + resp_o.ready = 1'b1; + resp_o.rdata = '0; + resp_o.error = '0; + prio_o = '0; + prio_we_o = '0; + prio_re_o = '0; + ie_o = '0; + ie_we_o = '0; + ie_re_o = '0; + threshold_o = '0; + threshold_we_o = '0; + threshold_re_o = '0; + cc_o = '0; + cc_we_o = '0; + cc_re_o = '0; + if (req_i.valid) begin + if (req_i.write) begin + unique case(req_i.addr) + 32'hc000000: begin + prio_o[0][2:0] = req_i.wdata[2:0]; + prio_we_o[0] = 1'b1; + end + 32'hc000004: begin + prio_o[1][2:0] = req_i.wdata[2:0]; + prio_we_o[1] = 1'b1; + end + 32'hc000008: begin + prio_o[2][2:0] = req_i.wdata[2:0]; + prio_we_o[2] = 1'b1; + end + 32'hc00000c: begin + prio_o[3][2:0] = req_i.wdata[2:0]; + prio_we_o[3] = 1'b1; + end + 32'hc000010: begin + prio_o[4][2:0] = req_i.wdata[2:0]; + prio_we_o[4] = 1'b1; + end + 32'hc000014: begin + prio_o[5][2:0] = req_i.wdata[2:0]; + prio_we_o[5] = 1'b1; + end + 32'hc000018: begin + prio_o[6][2:0] = req_i.wdata[2:0]; + prio_we_o[6] = 1'b1; + end + 32'hc00001c: begin + prio_o[7][2:0] = req_i.wdata[2:0]; + prio_we_o[7] = 1'b1; + end + 32'hc000020: begin + prio_o[8][2:0] = req_i.wdata[2:0]; + prio_we_o[8] = 1'b1; + end + 32'hc000024: begin + prio_o[9][2:0] = req_i.wdata[2:0]; + prio_we_o[9] = 1'b1; + end + 32'hc000028: begin + prio_o[10][2:0] = req_i.wdata[2:0]; + prio_we_o[10] = 1'b1; + end + 32'hc00002c: begin + prio_o[11][2:0] = req_i.wdata[2:0]; + prio_we_o[11] = 1'b1; + end + 32'hc000030: begin + prio_o[12][2:0] = req_i.wdata[2:0]; + prio_we_o[12] = 1'b1; + end + 32'hc000034: begin + prio_o[13][2:0] = req_i.wdata[2:0]; + prio_we_o[13] = 1'b1; + end + 32'hc000038: begin + prio_o[14][2:0] = req_i.wdata[2:0]; + prio_we_o[14] = 1'b1; + end + 32'hc00003c: begin + prio_o[15][2:0] = req_i.wdata[2:0]; + prio_we_o[15] = 1'b1; + end + 32'hc000040: begin + prio_o[16][2:0] = req_i.wdata[2:0]; + prio_we_o[16] = 1'b1; + end + 32'hc000044: begin + prio_o[17][2:0] = req_i.wdata[2:0]; + prio_we_o[17] = 1'b1; + end + 32'hc000048: begin + prio_o[18][2:0] = req_i.wdata[2:0]; + prio_we_o[18] = 1'b1; + end + 32'hc00004c: begin + prio_o[19][2:0] = req_i.wdata[2:0]; + prio_we_o[19] = 1'b1; + end + 32'hc000050: begin + prio_o[20][2:0] = req_i.wdata[2:0]; + prio_we_o[20] = 1'b1; + end + 32'hc000054: begin + prio_o[21][2:0] = req_i.wdata[2:0]; + prio_we_o[21] = 1'b1; + end + 32'hc000058: begin + prio_o[22][2:0] = req_i.wdata[2:0]; + prio_we_o[22] = 1'b1; + end + 32'hc00005c: begin + prio_o[23][2:0] = req_i.wdata[2:0]; + prio_we_o[23] = 1'b1; + end + 32'hc000060: begin + prio_o[24][2:0] = req_i.wdata[2:0]; + prio_we_o[24] = 1'b1; + end + 32'hc000064: begin + prio_o[25][2:0] = req_i.wdata[2:0]; + prio_we_o[25] = 1'b1; + end + 32'hc000068: begin + prio_o[26][2:0] = req_i.wdata[2:0]; + prio_we_o[26] = 1'b1; + end + 32'hc00006c: begin + prio_o[27][2:0] = req_i.wdata[2:0]; + prio_we_o[27] = 1'b1; + end + 32'hc000070: begin + prio_o[28][2:0] = req_i.wdata[2:0]; + prio_we_o[28] = 1'b1; + end + 32'hc000074: begin + prio_o[29][2:0] = req_i.wdata[2:0]; + prio_we_o[29] = 1'b1; + end + 32'hc000078: begin + prio_o[30][2:0] = req_i.wdata[2:0]; + prio_we_o[30] = 1'b1; + end + 32'hc002000: begin + ie_o[0][30:0] = req_i.wdata[30:0]; + ie_we_o[0] = 1'b1; + end + 32'hc002080: begin + ie_o[1][30:0] = req_i.wdata[30:0]; + ie_we_o[1] = 1'b1; + end + 32'hc200000: begin + threshold_o[0][2:0] = req_i.wdata[2:0]; + threshold_we_o[0] = 1'b1; + end + 32'hc201000: begin + threshold_o[1][2:0] = req_i.wdata[2:0]; + threshold_we_o[1] = 1'b1; + end + 32'hc200004: begin + cc_o[0][4:0] = req_i.wdata[4:0]; + cc_we_o[0] = 1'b1; + end + 32'hc201004: begin + cc_o[1][4:0] = req_i.wdata[4:0]; + cc_we_o[1] = 1'b1; + end + default: resp_o.error = 1'b1; + endcase + end else begin + unique case(req_i.addr) + 32'hc000000: begin + resp_o.rdata[2:0] = prio_i[0][2:0]; + prio_re_o[0] = 1'b1; + end + 32'hc000004: begin + resp_o.rdata[2:0] = prio_i[1][2:0]; + prio_re_o[1] = 1'b1; + end + 32'hc000008: begin + resp_o.rdata[2:0] = prio_i[2][2:0]; + prio_re_o[2] = 1'b1; + end + 32'hc00000c: begin + resp_o.rdata[2:0] = prio_i[3][2:0]; + prio_re_o[3] = 1'b1; + end + 32'hc000010: begin + resp_o.rdata[2:0] = prio_i[4][2:0]; + prio_re_o[4] = 1'b1; + end + 32'hc000014: begin + resp_o.rdata[2:0] = prio_i[5][2:0]; + prio_re_o[5] = 1'b1; + end + 32'hc000018: begin + resp_o.rdata[2:0] = prio_i[6][2:0]; + prio_re_o[6] = 1'b1; + end + 32'hc00001c: begin + resp_o.rdata[2:0] = prio_i[7][2:0]; + prio_re_o[7] = 1'b1; + end + 32'hc000020: begin + resp_o.rdata[2:0] = prio_i[8][2:0]; + prio_re_o[8] = 1'b1; + end + 32'hc000024: begin + resp_o.rdata[2:0] = prio_i[9][2:0]; + prio_re_o[9] = 1'b1; + end + 32'hc000028: begin + resp_o.rdata[2:0] = prio_i[10][2:0]; + prio_re_o[10] = 1'b1; + end + 32'hc00002c: begin + resp_o.rdata[2:0] = prio_i[11][2:0]; + prio_re_o[11] = 1'b1; + end + 32'hc000030: begin + resp_o.rdata[2:0] = prio_i[12][2:0]; + prio_re_o[12] = 1'b1; + end + 32'hc000034: begin + resp_o.rdata[2:0] = prio_i[13][2:0]; + prio_re_o[13] = 1'b1; + end + 32'hc000038: begin + resp_o.rdata[2:0] = prio_i[14][2:0]; + prio_re_o[14] = 1'b1; + end + 32'hc00003c: begin + resp_o.rdata[2:0] = prio_i[15][2:0]; + prio_re_o[15] = 1'b1; + end + 32'hc000040: begin + resp_o.rdata[2:0] = prio_i[16][2:0]; + prio_re_o[16] = 1'b1; + end + 32'hc000044: begin + resp_o.rdata[2:0] = prio_i[17][2:0]; + prio_re_o[17] = 1'b1; + end + 32'hc000048: begin + resp_o.rdata[2:0] = prio_i[18][2:0]; + prio_re_o[18] = 1'b1; + end + 32'hc00004c: begin + resp_o.rdata[2:0] = prio_i[19][2:0]; + prio_re_o[19] = 1'b1; + end + 32'hc000050: begin + resp_o.rdata[2:0] = prio_i[20][2:0]; + prio_re_o[20] = 1'b1; + end + 32'hc000054: begin + resp_o.rdata[2:0] = prio_i[21][2:0]; + prio_re_o[21] = 1'b1; + end + 32'hc000058: begin + resp_o.rdata[2:0] = prio_i[22][2:0]; + prio_re_o[22] = 1'b1; + end + 32'hc00005c: begin + resp_o.rdata[2:0] = prio_i[23][2:0]; + prio_re_o[23] = 1'b1; + end + 32'hc000060: begin + resp_o.rdata[2:0] = prio_i[24][2:0]; + prio_re_o[24] = 1'b1; + end + 32'hc000064: begin + resp_o.rdata[2:0] = prio_i[25][2:0]; + prio_re_o[25] = 1'b1; + end + 32'hc000068: begin + resp_o.rdata[2:0] = prio_i[26][2:0]; + prio_re_o[26] = 1'b1; + end + 32'hc00006c: begin + resp_o.rdata[2:0] = prio_i[27][2:0]; + prio_re_o[27] = 1'b1; + end + 32'hc000070: begin + resp_o.rdata[2:0] = prio_i[28][2:0]; + prio_re_o[28] = 1'b1; + end + 32'hc000074: begin + resp_o.rdata[2:0] = prio_i[29][2:0]; + prio_re_o[29] = 1'b1; + end + 32'hc000078: begin + resp_o.rdata[2:0] = prio_i[30][2:0]; + prio_re_o[30] = 1'b1; + end + 32'hc001000: begin + resp_o.rdata[30:0] = ip_i[0][30:0]; + ip_re_o[0] = 1'b1; + end + 32'hc002000: begin + resp_o.rdata[30:0] = ie_i[0][30:0]; + ie_re_o[0] = 1'b1; + end + 32'hc002080: begin + resp_o.rdata[30:0] = ie_i[1][30:0]; + ie_re_o[1] = 1'b1; + end + 32'hc200000: begin + resp_o.rdata[2:0] = threshold_i[0][2:0]; + threshold_re_o[0] = 1'b1; + end + 32'hc201000: begin + resp_o.rdata[2:0] = threshold_i[1][2:0]; + threshold_re_o[1] = 1'b1; + end + 32'hc200004: begin + resp_o.rdata[4:0] = cc_i[0][4:0]; + cc_re_o[0] = 1'b1; + end + 32'hc201004: begin + resp_o.rdata[4:0] = cc_i[1][4:0]; + cc_re_o[1] = 1'b1; + end + default: resp_o.error = 1'b1; + endcase + end + end +end +endmodule + diff --git a/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv b/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv new file mode 100644 index 00000000..2a321027 --- /dev/null +++ b/test/type_param/corev_apu/rv_plic/rtl/plic_top.sv @@ -0,0 +1,157 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba +// +// Description: Platform level interrupt controller + +module plic_top #( + parameter int N_SOURCE = 30, + parameter int N_TARGET = 2, + parameter int MAX_PRIO = 7, + parameter int SRCW = $clog2(N_SOURCE+1), + parameter type reg_req_t = logic, + parameter type reg_rsp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // Bus Interface + input reg_req_t req_i, + output reg_rsp_t resp_o, + input logic [N_SOURCE-1:0] le_i, // 0:level 1:edge + // Interrupt Sources + input logic [N_SOURCE-1:0] irq_sources_i, + // Interrupt notification to targets + output logic [N_TARGET-1:0] eip_targets_o +); + localparam PRIOW = $clog2(MAX_PRIO+1); + + logic [N_SOURCE-1:0] ip; + + logic [N_TARGET-1:0][PRIOW-1:0] threshold_q; + + logic [N_TARGET-1:0] claim_re; //Target read indicator + logic [N_TARGET-1:0][SRCW-1:0] claim_id; + logic [N_SOURCE-1:0] claim; //Converted from claim_re/claim_id + + logic [N_TARGET-1:0] complete_we; //Target write indicator + logic [N_TARGET-1:0][SRCW-1:0] complete_id; + logic [N_SOURCE-1:0] complete; //Converted from complete_re/complete_id + + logic [N_SOURCE-1:0][PRIOW-1:0] prio_q; + logic [N_TARGET-1:0][N_SOURCE-1:0] ie_q; + + always_comb begin + claim = '0; + complete = '0; + for (int i = 0 ; i < N_TARGET ; i++) begin + if (claim_re[i] && claim_id[i] != 0) claim[claim_id[i]-1] = 1'b1; + if (complete_we[i] && complete_id[i] != 0) complete[complete_id[i]-1] = 1'b1; + end + end + + // Gateways + rv_plic_gateway #( + .N_SOURCE (N_SOURCE) + ) i_rv_plic_gateway ( + .clk_i, + .rst_ni, + .src(irq_sources_i), + .le(le_i), + .claim(claim), + .complete(complete), + .ip(ip) + ); + + // Target interrupt notification + for (genvar i = 0 ; i < N_TARGET; i++) begin : gen_target + rv_plic_target #( + .N_SOURCE ( N_SOURCE ), + .MAX_PRIO ( MAX_PRIO ), + .ALGORITHM ( "SEQUENTIAL" ) + ) i_target ( + .clk_i, + .rst_ni, + .ip(ip), + .ie(ie_q[i]), + .prio(prio_q), + .threshold(threshold_q[i]), + .irq(eip_targets_o[i]), + .irq_id(claim_id[i]) + ); + end + + logic [N_TARGET-1:0] threshold_we_o; + logic [N_TARGET-1:0][PRIOW-1:0] threshold_o; + + logic [N_SOURCE:0][PRIOW-1:0] prio_i, prio_o; + logic [N_SOURCE:0] prio_we_o; + + // TODO(zarubaf): This needs more graceful handling + // it will break if the number of sources is larger than 32 + logic [N_TARGET-1:0][N_SOURCE:0] ie_i, ie_o; + logic [N_TARGET-1:0] ie_we_o; + + plic_regs #( + .reg_req_t ( reg_req_t ), + .reg_rsp_t ( reg_rsp_t ) + ) i_plic_regs ( + .prio_i(prio_i), + .prio_o(prio_o), + .prio_we_o(prio_we_o), + .prio_re_o(), // don't care + // source zero is always zero + .ip_i({ip, 1'b0}), + .ip_re_o(), // don't care + .ie_i(ie_i), + .ie_o(ie_o), + .ie_we_o(ie_we_o), + .ie_re_o(), // don't care + .threshold_i(threshold_q), + .threshold_o(threshold_o), + .threshold_we_o(threshold_we_o), + .threshold_re_o(), // don't care + .cc_i(claim_id), + .cc_o(complete_id), + .cc_we_o(complete_we), + .cc_re_o(claim_re), + .req_i, + .resp_o + ); + + assign prio_i[0] = '0; + + for (genvar i = 0; i < N_TARGET; i++) begin + assign ie_i[i] = {ie_q[i][N_SOURCE-1:0], 1'b0}; + end + + for (genvar i = 1; i < N_SOURCE + 1; i++) begin + assign prio_i[i] = prio_q[i - 1]; + end + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + prio_q <= '0; + ie_q <= '0; + threshold_q <= '0; + end else begin + // source zero is 0 + for (int i = 0; i < N_SOURCE; i++) begin + prio_q[i] <= prio_we_o[i + 1] ? prio_o[i + 1] : prio_q[i]; + end + for (int i = 0; i < N_TARGET; i++) begin + threshold_q[i] <= threshold_we_o[i] ? threshold_o[i] : threshold_q[i]; + ie_q[i] <= ie_we_o[i] ? ie_o[i][N_SOURCE:1] : ie_q[i]; + end + + end + end +endmodule diff --git a/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv new file mode 100644 index 00000000..c68f78cf --- /dev/null +++ b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_gateway.sv @@ -0,0 +1,60 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// RISC-V Platform-Level Interrupt Gateways module + +module rv_plic_gateway #( + parameter int N_SOURCE = 32 +) ( + input clk_i, + input rst_ni, + + input [N_SOURCE-1:0] src, + input [N_SOURCE-1:0] le, // Level0 Edge1 + + input [N_SOURCE-1:0] claim, // $onehot0(claim) + input [N_SOURCE-1:0] complete, // $onehot0(complete) + + output logic [N_SOURCE-1:0] ip +); + +logic [N_SOURCE-1:0] ia; // Interrupt Active + +logic [N_SOURCE-1:0] set; // Set: (le) ? src & ~src_d : src ; +logic [N_SOURCE-1:0] src_d; + +always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) src_d <= '0; + else src_d <= src; +end + +always_comb begin + for (int i = 0 ; i < N_SOURCE; i++) begin + set[i] = (le[i]) ? src[i] & ~src_d[i] : src[i] ; + end +end + +// Interrupt pending is set by source (depends on le), cleared by claim. +// Until interrupt is claimed, set doesn't affect ip. +// RISC-V PLIC spec mentioned it can have counter for edge triggered +// But skipped the feature as counter consumes substantial logic size. +always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + ip <= '0; + end else begin + ip <= (ip | (set & ~ia & ~ip)) & (~claim); + end +end + +// Interrupt active is to control ip. If ip is set then until completed +// by target, ip shouldn't be set by source even claim can clear ip. +always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + ia <= '0; + end else begin + ia <= (ia | (set & ~ia)) & (~complete); + end +end + +endmodule diff --git a/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv new file mode 100644 index 00000000..26bd69c0 --- /dev/null +++ b/test/type_param/corev_apu/rv_plic/rtl/rv_plic_target.sv @@ -0,0 +1,125 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// RISC-V Platform-Level Interrupt Generator for Target +// +// This module basically doing IE & IP based on priority and threshold. +// Keep in mind that increasing MAX_PRIO affects logic size a lot. + +module rv_plic_target #( + parameter int N_SOURCE = 32, + parameter int MAX_PRIO = 7, + parameter ALGORITHM = "SEQUENTIAL", // SEQUENTIAL | MATRIX + + // Local param (Do not change this through parameter + parameter int unsigned SRCW = $clog2(N_SOURCE+1), + parameter int unsigned PRIOW = $clog2(MAX_PRIO+1) // Bits to represent MAX_PRIO +) ( + input clk_i, + input rst_ni, + + input [N_SOURCE-1:0] ip, + input [N_SOURCE-1:0] ie, + + input [N_SOURCE-1:0][PRIOW-1:0] prio, + input [PRIOW-1:0] threshold, + + output logic irq, + output logic [SRCW-1:0] irq_id +); + + +//always_ff @(posedge clk_i, negedge rst_ni) begin +// if (!rst_ni) begin +// gt_th <= '0; +// end else begin +// for (int i = 0 ; i < N_SOURCE ; i++) begin +// gt_th[i] = (prio[i] > threshold) ? 1'b1 : 1'b0 ; +// end +// end +//end + + +if (ALGORITHM == "SEQUENTIAL") begin : gen_sequential + // Let first implementation be brute-force + // As N_SOURCE increasing logic depth increases O(logN) + // This approach slows down the simulation. + logic [PRIOW-1:0] max_prio; + logic irq_next; + logic [SRCW-1:0] irq_id_next; + always_comb begin + max_prio = threshold + 1'b1; // Priority strictly greater than threshold + irq_id_next = '0; // default: No Interrupt + irq_next = 1'b0; + for (int i = N_SOURCE-1 ; i >= 0 ; i--) begin + if ((ip[i] & ie[i]) == 1'b1 && prio[i] >= max_prio) begin + max_prio = prio[i]; + irq_id_next = SRCW'(i+1); + irq_next = 1'b1; + end + end // for i + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + irq <= 1'b0; + irq_id <= '0; + end else begin + irq <= irq_next; + irq_id <= irq_id_next; + end + end +end else if (ALGORITHM == "MATRIX") begin : gen_mat + // Second trial : N X N matrix + // Set mat[i][j] to 1 if prio[i] >= prio[j] and ip[i] & ie[i] & ip[j] & ie[j] + // Comparator depth is just 1 then logN AND gate then Leading One detector + // It is to find the max value of priority + // + // This uses a lot of comparators: (N x (N-1))/2. + // So if above approach(ALGORITHM 1) meets timing, don't use this algorithm. + logic [N_SOURCE-1:0] is; + + logic [N_SOURCE-1:0][N_SOURCE-1:0] mat; + logic [N_SOURCE-1:0] merged_row; + + assign is = ip & ie; + always_comb begin + merged_row[N_SOURCE-1] = is[N_SOURCE-1] & (prio[N_SOURCE-1] > threshold); + for (int i = 0 ; i < N_SOURCE-1 ; i++) begin + merged_row[i] = 1'b1; + for (int j = i+1 ; j < N_SOURCE ; j++) begin + mat[i][j] = (prio[i] <= threshold) ? 1'b0 : // No compare if less than TH + (is[i] & is[j]) ? prio[i] >= prio[j] : + (is[i]) ? 1'b 1 : 1'b 0 ; + merged_row[i] = merged_row[i] & mat[i][j]; // all should be 1 + end // for j + end // for i + end // always_comb + + // Leading One detector + logic [N_SOURCE-1:0] lod; + assign lod = merged_row & (~merged_row + 1'b1); + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + irq <= 1'b0; + irq_id <= '0; // No interrupt + end else if (|lod) begin + // as $onehot0(lod), at most one bit set. + // so, safely run for loop + for (int i = N_SOURCE-1 ; i >= 0 ; i--) begin + if (lod[i] == 1'b1) begin + irq <= 1'b 1; + irq_id <= SRCW'(i + 1); + end + end // for + end else begin + // No pending interrupt + irq <= 1'b0; + irq_id <= '0; + end + end // always_ff +end // ALGORITHM + +endmodule + diff --git a/test/type_param/corev_apu/src/ariane.sv b/test/type_param/corev_apu/src/ariane.sv new file mode 100644 index 00000000..1ec15ef3 --- /dev/null +++ b/test/type_param/corev_apu/src/ariane.sv @@ -0,0 +1,86 @@ +// Copyright 2017-2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.03.2017 +// Description: Ariane Top-level module + + +module ariane import ariane_pkg::*; #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter bit IsRVFI = bit'(0), + parameter type rvfi_probes_t = logic, + parameter int unsigned AxiAddrWidth = ariane_axi::AddrWidth, + parameter int unsigned AxiDataWidth = ariane_axi::DataWidth, + parameter int unsigned AxiIdWidth = ariane_axi::IdWidth, + parameter type axi_ar_chan_t = ariane_axi::ar_chan_t, + parameter type axi_aw_chan_t = ariane_axi::aw_chan_t, + parameter type axi_w_chan_t = ariane_axi::w_chan_t, + parameter type noc_req_t = ariane_axi::req_t, + parameter type noc_resp_t = ariane_axi::resp_t +) ( + input logic clk_i, + input logic rst_ni, + // Core ID, Cluster ID and boot address are considered more or less static + input logic [riscv::VLEN-1:0] boot_addr_i, // reset boot address + input logic [riscv::XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR) + + // Interrupt inputs + input logic [1:0] irq_i, // level sensitive IR lines, mip & sip (async) + input logic ipi_i, // inter-processor interrupts (async) + // Timer facilities + input logic time_irq_i, // timer interrupt in (async) + input logic debug_req_i, // debug request (async) + // RISC-V formal interface port (`rvfi`): + // Can be left open when formal tracing is not needed. + output rvfi_probes_t rvfi_probes_o, + // memory side + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i +); + + cvxif_pkg::cvxif_req_t cvxif_req; + cvxif_pkg::cvxif_resp_t cvxif_resp; + + cva6 #( + .CVA6Cfg ( CVA6Cfg ), + .IsRVFI ( IsRVFI ), + .rvfi_probes_t ( rvfi_probes_t ), + .axi_ar_chan_t (axi_ar_chan_t), + .axi_aw_chan_t (axi_aw_chan_t), + .axi_w_chan_t (axi_w_chan_t), + .noc_req_t (noc_req_t), + .noc_resp_t (noc_resp_t) + ) i_cva6 ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .boot_addr_i ( boot_addr_i ), + .hart_id_i ( hart_id_i ), + .irq_i ( irq_i ), + .ipi_i ( ipi_i ), + .time_irq_i ( time_irq_i ), + .debug_req_i ( debug_req_i ), + .rvfi_probes_o ( rvfi_probes_o ), + .cvxif_req_o ( cvxif_req ), + .cvxif_resp_i ( cvxif_resp ), + .noc_req_o ( noc_req_o ), + .noc_resp_i ( noc_resp_i ) + ); + + if (CVA6Cfg.CvxifEn) begin : gen_example_coprocessor + cvxif_example_coprocessor i_cvxif_coprocessor ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .cvxif_req_i ( cvxif_req ), + .cvxif_resp_o ( cvxif_resp ) + ); + end + +endmodule // ariane diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv new file mode 100644 index 00000000..b3c56153 --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv @@ -0,0 +1,93 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI Reservation Table +module axi_res_tbl #( + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0 +) ( + input logic clk_i, + input logic rst_ni, + input logic [AXI_ADDR_WIDTH-1:0] clr_addr_i, + input logic clr_req_i, + output logic clr_gnt_o, + input logic [AXI_ADDR_WIDTH-1:0] set_addr_i, + input logic [AXI_ID_WIDTH-1:0] set_id_i, + input logic set_req_i, + output logic set_gnt_o, + input logic [AXI_ADDR_WIDTH-1:0] check_addr_i, + input logic [AXI_ID_WIDTH-1:0] check_id_i, + output logic check_res_o, + input logic check_req_i, + output logic check_gnt_o +); + + localparam integer N_IDS = 2**AXI_ID_WIDTH; + + // Declarations of Signals and Types + logic [N_IDS-1:0][AXI_ADDR_WIDTH-1:0] tbl_d, tbl_q; + logic clr, + set; + + generate for (genvar i = 0; i < N_IDS; ++i) begin: gen_tbl + always_comb begin + tbl_d[i] = tbl_q[i]; + if (set && i == set_id_i) begin + tbl_d[i] = set_addr_i; + end else if (clr && tbl_q[i] == clr_addr_i) begin + tbl_d[i] = '0; + end + end + end endgenerate + + // Table-Managing Logic + always_comb begin + clr = 1'b0; + set = 1'b0; + clr_gnt_o = 1'b0; + set_gnt_o = 1'b0; + check_res_o = 1'b0; + check_gnt_o = 1'b0; + + if (clr_req_i) begin + clr = 1'b1; + clr_gnt_o = 1'b1; + end else if (set_req_i) begin + set = 1'b1; + set_gnt_o = 1'b1; + end else if (check_req_i) begin + check_res_o = (tbl_q[check_id_i] == check_addr_i); + check_gnt_o = 1'b1; + end + end + + // Registers + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + tbl_q <= '0; + end else begin + tbl_q <= tbl_d; + end + end + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (AXI_ADDR_WIDTH > 0) + else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!"); + assert (AXI_ID_WIDTH > 0) + else $fatal(1, "AXI_ID_WIDTH must be greater than 0!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv new file mode 100644 index 00000000..fafdb4f6 --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv @@ -0,0 +1,1004 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI RISC-V Atomic Operations (AMOs) Adapter +// +// This adapter implements atomic memory operations in accordance with the RVWMO memory consistency +// model. +// +// Interface notes: +// - This module has combinational paths between AXI inputs and outputs for minimum latency. Add +// slices upstream or downstream or in both directions if combinatorial paths become too long. +// The module adheres to the AXI ready/valid dependency specification to prevent combinatorial +// loops. + +module axi_riscv_amos #( + // AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + // Maximum number of AXI write transactions outstanding at the same time + parameter int unsigned AXI_MAX_WRITE_TXNS = 0, + // Word width of the widest RISC-V processor that can issue requests to this module. + // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are + // supported if `aw_strb` is set correctly. + parameter int unsigned RISCV_WORD_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + + /// Slave Interface + input logic [AXI_ADDR_WIDTH-1:0] slv_aw_addr_i, + input logic [2:0] slv_aw_prot_i, + input logic [3:0] slv_aw_region_i, + input logic [5:0] slv_aw_atop_i, + input logic [7:0] slv_aw_len_i, + input logic [2:0] slv_aw_size_i, + input logic [1:0] slv_aw_burst_i, + input logic slv_aw_lock_i, + input logic [3:0] slv_aw_cache_i, + input logic [3:0] slv_aw_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_aw_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_aw_user_i, + output logic slv_aw_ready_o, + input logic slv_aw_valid_i, + + input logic [AXI_ADDR_WIDTH-1:0] slv_ar_addr_i, + input logic [2:0] slv_ar_prot_i, + input logic [3:0] slv_ar_region_i, + input logic [7:0] slv_ar_len_i, + input logic [2:0] slv_ar_size_i, + input logic [1:0] slv_ar_burst_i, + input logic slv_ar_lock_i, + input logic [3:0] slv_ar_cache_i, + input logic [3:0] slv_ar_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_ar_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_ar_user_i, + output logic slv_ar_ready_o, + input logic slv_ar_valid_i, + + input logic [AXI_DATA_WIDTH-1:0] slv_w_data_i, + input logic [AXI_STRB_WIDTH-1:0] slv_w_strb_i, + input logic [AXI_USER_WIDTH-1:0] slv_w_user_i, + input logic slv_w_last_i, + output logic slv_w_ready_o, + input logic slv_w_valid_i, + + output logic [AXI_DATA_WIDTH-1:0] slv_r_data_o, + output logic [1:0] slv_r_resp_o, + output logic slv_r_last_o, + output logic [AXI_ID_WIDTH-1:0] slv_r_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_r_user_o, + input logic slv_r_ready_i, + output logic slv_r_valid_o, + + output logic [1:0] slv_b_resp_o, + output logic [AXI_ID_WIDTH-1:0] slv_b_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_b_user_o, + input logic slv_b_ready_i, + output logic slv_b_valid_o, + + /// Master Interface + output logic [AXI_ADDR_WIDTH-1:0] mst_aw_addr_o, + output logic [2:0] mst_aw_prot_o, + output logic [3:0] mst_aw_region_o, + output logic [5:0] mst_aw_atop_o, + output logic [7:0] mst_aw_len_o, + output logic [2:0] mst_aw_size_o, + output logic [1:0] mst_aw_burst_o, + output logic mst_aw_lock_o, + output logic [3:0] mst_aw_cache_o, + output logic [3:0] mst_aw_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_aw_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_aw_user_o, + input logic mst_aw_ready_i, + output logic mst_aw_valid_o, + + output logic [AXI_ADDR_WIDTH-1:0] mst_ar_addr_o, + output logic [2:0] mst_ar_prot_o, + output logic [3:0] mst_ar_region_o, + output logic [7:0] mst_ar_len_o, + output logic [2:0] mst_ar_size_o, + output logic [1:0] mst_ar_burst_o, + output logic mst_ar_lock_o, + output logic [3:0] mst_ar_cache_o, + output logic [3:0] mst_ar_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_ar_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_ar_user_o, + input logic mst_ar_ready_i, + output logic mst_ar_valid_o, + + output logic [AXI_DATA_WIDTH-1:0] mst_w_data_o, + output logic [AXI_STRB_WIDTH-1:0] mst_w_strb_o, + output logic [AXI_USER_WIDTH-1:0] mst_w_user_o, + output logic mst_w_last_o, + input logic mst_w_ready_i, + output logic mst_w_valid_o, + + input logic [AXI_DATA_WIDTH-1:0] mst_r_data_i, + input logic [1:0] mst_r_resp_i, + input logic mst_r_last_i, + input logic [AXI_ID_WIDTH-1:0] mst_r_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_r_user_i, + output logic mst_r_ready_o, + input logic mst_r_valid_i, + + input logic [1:0] mst_b_resp_i, + input logic [AXI_ID_WIDTH-1:0] mst_b_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_b_user_i, + output logic mst_b_ready_o, + input logic mst_b_valid_i +); + + localparam int unsigned OUTSTND_BURSTS_WIDTH = $clog2(AXI_MAX_WRITE_TXNS+1); + localparam int unsigned AXI_ALU_RATIO = AXI_DATA_WIDTH/RISCV_WORD_WIDTH; + + // State types + typedef enum logic [1:0] { FEEDTHROUGH_AW, WAIT_RESULT_AW, SEND_AW } aw_state_t; + aw_state_t aw_state_d, aw_state_q; + + typedef enum logic [2:0] { FEEDTHROUGH_W, WAIT_DATA_W, WAIT_RESULT_W, WAIT_CHANNEL_W, SEND_W } w_state_t; + w_state_t w_state_d, w_state_q; + + typedef enum logic [1:0] { FEEDTHROUGH_B, WAIT_COMPLETE_B, WAIT_CHANNEL_B, SEND_B } b_state_t; + b_state_t b_state_d, b_state_q; + + typedef enum logic [1:0] { FEEDTHROUGH_AR, WAIT_CHANNEL_AR, SEND_AR } ar_state_t; + ar_state_t ar_state_d, ar_state_q; + + typedef enum logic [1:0] { FEEDTHROUGH_R, WAIT_DATA_R, WAIT_CHANNEL_R, SEND_R } r_state_t; + r_state_t r_state_d, r_state_q; + + typedef enum logic [1:0] { NONE, INVALID, LOAD, STORE } atop_req_t; + atop_req_t atop_valid_d, atop_valid_q; + + // Signal declarations + // Transaction FF + logic [AXI_ADDR_WIDTH-1:0] addr_d, addr_q; + logic [AXI_ID_WIDTH-1:0] id_d, id_q; + logic [AXI_STRB_WIDTH-1:0] strb_d, strb_q; + logic [2:0] size_d, size_q; + logic [5:0] atop_d, atop_q; + logic [3:0] cache_d, cache_q; + logic [2:0] prot_d, prot_q; + logic [3:0] qos_d, qos_q; + logic [3:0] region_d, region_q; + logic [1:0] r_resp_d, r_resp_q; + logic [AXI_USER_WIDTH-1:0] aw_user_d, aw_user_q, + w_user_d, w_user_q, + r_user_d, r_user_q; + // Data FF + logic [AXI_DATA_WIDTH-1:0] w_data_d, w_data_q; // AMO operand + logic [AXI_DATA_WIDTH-1:0] r_data_d, r_data_q; // Data from memory + logic [AXI_DATA_WIDTH-1:0] result_d, result_q; // Result of AMO operation + logic w_d_valid_d, w_d_valid_q, // AMO operand valid + r_d_valid_d, r_d_valid_q; // Data from memory valid + // Counters + logic [OUTSTND_BURSTS_WIDTH-1:0] w_cnt_d, w_cnt_q; // Outstanding W beats + logic [OUTSTND_BURSTS_WIDTH-1:0] w_cnt_req_d, w_cnt_req_q; // W beats until AMO can read W + logic [OUTSTND_BURSTS_WIDTH-1:0] w_cnt_inj_d, w_cnt_inj_q; // W beats until AMO can insert its W + // States + logic adapter_ready; + logic transaction_collision; + logic aw_valid, aw_ready, aw_free, + w_valid, w_ready, w_free, + b_valid, b_ready, b_free, + ar_valid, ar_ready, ar_free, + r_valid, r_ready, r_free; + // ALU Signals + logic [RISCV_WORD_WIDTH-1:0] alu_operand_a; + logic [RISCV_WORD_WIDTH-1:0] alu_operand_b; + logic [RISCV_WORD_WIDTH-1:0] alu_result; + logic [AXI_DATA_WIDTH-1:0] alu_result_ext; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] op_a; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] op_b; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] op_a_sign_ext; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] op_b_sign_ext; + logic [AXI_ALU_RATIO-1:0][RISCV_WORD_WIDTH-1:0] res; + logic [AXI_STRB_WIDTH-1:0][7:0] strb_ext; + logic sign_a; + logic sign_b; + + /** + * Calculate ready signals and channel states + */ + + // Check if all state machines are ready for the next atomic request + assign adapter_ready = (aw_state_q == FEEDTHROUGH_AW) && + ( w_state_q == FEEDTHROUGH_W ) && + ( b_state_q == FEEDTHROUGH_B ) && + (ar_state_q == FEEDTHROUGH_AR) && + ( r_state_q == FEEDTHROUGH_R ); + + // Calculate if the channels are free + assign aw_free = ~aw_valid | aw_ready; + assign w_free = ~ w_valid | w_ready; + assign b_free = ~ b_valid | b_ready; + assign ar_free = ~ar_valid | ar_ready; + assign r_free = ~ r_valid | r_ready; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + aw_valid <= 0; + aw_ready <= 0; + w_valid <= 0; + w_ready <= 0; + b_valid <= 0; + b_ready <= 0; + ar_valid <= 0; + ar_ready <= 0; + r_valid <= 0; + r_ready <= 0; + end else begin + aw_valid <= mst_aw_valid_o; + aw_ready <= mst_aw_ready_i; + w_valid <= mst_w_valid_o; + w_ready <= mst_w_ready_i; + b_valid <= slv_b_valid_o; + b_ready <= slv_b_ready_i; + ar_valid <= mst_ar_valid_o; + ar_ready <= mst_ar_ready_i; + r_valid <= slv_r_valid_o; + r_ready <= slv_r_ready_i; + end + end + + // Calculate if the request interferes with the ongoing atomic transaction + // The protected bytes go from addr_q up to addr_q + (1 << size_q) - 1 + // TODO Bursts need special treatment + assign transaction_collision = (slv_aw_addr_i < ( addr_q + (8'h01 << size_q))) & + ( addr_q < (slv_aw_addr_i + (8'h01 << slv_aw_size_i))); + + always_comb begin : calc_atop_valid + atop_valid_d = atop_valid_q; + if (adapter_ready) begin + atop_valid_d = NONE; + if (slv_aw_valid_i && slv_aw_atop_i) begin + // Default is invalid request + atop_valid_d = INVALID; + // Valid load operation + if ((slv_aw_atop_i == axi_pkg::ATOP_ATOMICSWAP) || + (slv_aw_atop_i[5:3] == {axi_pkg::ATOP_ATOMICLOAD , axi_pkg::ATOP_LITTLE_END})) begin + atop_valid_d = LOAD; + end + // Valid store operation + if (slv_aw_atop_i[5:3] == {axi_pkg::ATOP_ATOMICSTORE, axi_pkg::ATOP_LITTLE_END}) begin + atop_valid_d = STORE; + end + // Invalidate valid request if control signals do not match + // Burst or exclusive access + if (slv_aw_len_i | slv_aw_lock_i) begin + atop_valid_d = INVALID; + end + // Unsupported size + if (slv_aw_size_i > $clog2(RISCV_WORD_WIDTH/8)) begin + atop_valid_d = INVALID; + end + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_atop_valid + if(~rst_ni) begin + atop_valid_q <= NONE; + end else begin + atop_valid_q <= atop_valid_d; + end + end + + /** + * Write Channel: AW, W, B + */ + + /*==================================================================== + = AW = + ====================================================================*/ + always_comb begin : axi_aw_channel + // Defaults AXI Bus + mst_aw_id_o = slv_aw_id_i; + mst_aw_addr_o = slv_aw_addr_i; + mst_aw_len_o = slv_aw_len_i; + mst_aw_size_o = slv_aw_size_i; + mst_aw_burst_o = slv_aw_burst_i; + mst_aw_lock_o = slv_aw_lock_i; + mst_aw_cache_o = slv_aw_cache_i; + mst_aw_prot_o = slv_aw_prot_i; + mst_aw_qos_o = slv_aw_qos_i; + mst_aw_region_o = slv_aw_region_i; + mst_aw_atop_o = 6'b0; + mst_aw_user_o = slv_aw_user_i; + // Defaults FF + addr_d = addr_q; + id_d = id_q; + size_d = size_q; + atop_d = atop_q; + cache_d = cache_q; + prot_d = prot_q; + qos_d = qos_q; + region_d = region_q; + aw_user_d = aw_user_q; + w_cnt_inj_d = w_cnt_inj_q; + // State Machine + aw_state_d = aw_state_q; + + // Default control: Block AW channel if... + if (slv_aw_valid_i && slv_aw_atop_i) begin + // Block if atomic request + mst_aw_valid_o = 1'b0; + slv_aw_ready_o = 1'b0; + end else if (w_cnt_q == AXI_MAX_WRITE_TXNS) begin + // Block if counter is overflowing + mst_aw_valid_o = 1'b0; + slv_aw_ready_o = 1'b0; + end else if (slv_aw_valid_i && transaction_collision && !adapter_ready) begin + // Block requests to the same address as current atomic transaction + mst_aw_valid_o = 1'b0; + slv_aw_ready_o = 1'b0; + end else begin + // Forward + mst_aw_valid_o = slv_aw_valid_i; + slv_aw_ready_o = mst_aw_ready_i; + end + + // Count W burst to know when to inject the W data + if (w_cnt_inj_q && mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_inj_d = w_cnt_inj_q - 1; + end + + unique case (aw_state_q) + + FEEDTHROUGH_AW: begin + // Feedthrough slave to master until atomic operation is detected + if (slv_aw_valid_i && slv_aw_atop_i && adapter_ready) begin + // Acknowledge atomic transaction + slv_aw_ready_o = 1'b1; + // Remember request + atop_d = slv_aw_atop_i; + addr_d = slv_aw_addr_i; + id_d = slv_aw_id_i; + size_d = slv_aw_size_i; + cache_d = slv_aw_cache_i; + prot_d = slv_aw_prot_i; + qos_d = slv_aw_qos_i; + region_d = slv_aw_region_i; + aw_user_d = slv_aw_user_i; + // If valid AMO --> wait for result + if (atop_valid_d != INVALID) begin + aw_state_d = WAIT_RESULT_AW; + end + end + + end // FEEDTHROUGH_AW + + WAIT_RESULT_AW, SEND_AW: begin + // If the result is ready and the channel is free --> inject AW request + if ((r_d_valid_q && w_d_valid_q && aw_free) || (aw_state_q == SEND_AW)) begin + // Block + slv_aw_ready_o = 1'b0; + // Make write request + mst_aw_valid_o = 1'b1; + mst_aw_addr_o = addr_q; + mst_aw_len_o = 8'h00; + mst_aw_id_o = id_q; + mst_aw_size_o = size_q; + mst_aw_burst_o = 2'b00; + mst_aw_lock_o = 1'b0; + mst_aw_cache_o = cache_q; + mst_aw_prot_o = prot_q; + mst_aw_qos_o = qos_q; + mst_aw_region_o = region_q; + mst_aw_user_o = aw_user_q; + // Check if request is acknowledged + if (mst_aw_ready_i) begin + aw_state_d = FEEDTHROUGH_AW; + end else begin + aw_state_d = SEND_AW; + end + // Remember outstanding W beats before injected request + if (aw_state_q == WAIT_RESULT_AW) begin + if (w_cnt_q && mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_inj_d = w_cnt_q - 1; + end else begin + w_cnt_inj_d = w_cnt_q; + end + end + end + end // WAIT_RESULT_AW, SEND_AW + + default: aw_state_d = FEEDTHROUGH_AW; + + endcase + end // axi_aw_channel + + /*==================================================================== + = W = + ====================================================================*/ + always_comb begin : axi_w_channel + // Defaults AXI Bus + mst_w_data_o = slv_w_data_i; + mst_w_strb_o = slv_w_strb_i; + mst_w_last_o = slv_w_last_i; + mst_w_user_o = slv_w_user_i; + // Defaults FF + strb_d = strb_q; + w_user_d = w_user_q; + w_data_d = w_data_q; + result_d = result_q; + w_d_valid_d = w_d_valid_q; + w_cnt_req_d = w_cnt_req_q; + // State Machine + w_state_d = w_state_q; + + // Default control + // Make sure no data is sent without knowing if it's atomic + if (w_cnt_q == 0) begin + // Stall W as it precedes the AW request + slv_w_ready_o = 1'b0; + mst_w_valid_o = 1'b0; + end else begin + mst_w_valid_o = slv_w_valid_i; + slv_w_ready_o = mst_w_ready_i; + end + + unique case (w_state_q) + + FEEDTHROUGH_W: begin + if (adapter_ready) begin + // Reset read flag + w_d_valid_d = 1'b0; + result_d = '0; + + if (atop_valid_d != NONE) begin + // Check if data is also available and does not belong to previous request + if (w_cnt_q == 0) begin + // Block downstream + mst_w_valid_o = 1'b0; + // Fetch data and wait for all data + slv_w_ready_o = 1'b1; + if (slv_w_valid_i) begin + if (atop_valid_d != INVALID) begin + w_data_d = slv_w_data_i; + strb_d = slv_w_strb_i; + w_user_d = slv_w_user_i; + w_d_valid_d = 1'b1; + w_state_d = WAIT_RESULT_W; + end + end else begin + w_cnt_req_d = '0; + w_state_d = WAIT_DATA_W; + end + end else begin + // Remember the amount of outstanding bursts and count down + if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_req_d = w_cnt_q - 1; + end else begin + w_cnt_req_d = w_cnt_q; + end + w_state_d = WAIT_DATA_W; + end + end + end + end // FEEDTHROUGH_W + + WAIT_DATA_W: begin + // Count W beats until data arrives that belongs to the AMO request + if (w_cnt_req_q == 0) begin + // Block downstream + mst_w_valid_o = 1'b0; + // Ready upstream + slv_w_ready_o = 1'b1; + + if (slv_w_valid_i) begin + if (atop_valid_q == INVALID) begin + w_state_d = FEEDTHROUGH_W; + end else begin + w_data_d = slv_w_data_i; + strb_d = slv_w_strb_i; + w_user_d = slv_w_user_i; + w_d_valid_d = 1'b1; + w_state_d = WAIT_RESULT_W; + end + end + end else if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_req_d = w_cnt_req_q - 1; + end + end // WAIT_DATA_W + + WAIT_RESULT_W: begin + // If the result is ready, try to write it + if (r_d_valid_q && w_d_valid_q && aw_free) begin + // Check if W channel is free and make sure data is not interleaved + result_d = alu_result_ext; + if (w_free && w_cnt_q == 0) begin + // Block + slv_w_ready_o = 1'b0; + // Send write data + mst_w_valid_o = 1'b1; + mst_w_data_o = alu_result_ext; + mst_w_last_o = 1'b1; + mst_w_strb_o = strb_q; + mst_w_user_o = w_user_q; + if (mst_w_ready_i) begin + w_state_d = FEEDTHROUGH_W; + end else begin + w_state_d = SEND_W; + end + end else begin + w_state_d = WAIT_CHANNEL_W; + end + end + end // WAIT_RESULT_W + + WAIT_CHANNEL_W, SEND_W: begin + // Wait to not interleave the data + if ((w_free && w_cnt_inj_q == 0) || (w_state_q == SEND_W)) begin + // Block + slv_w_ready_o = 1'b0; + // Send write data + mst_w_valid_o = 1'b1; + mst_w_data_o = result_q; + mst_w_last_o = 1'b1; + mst_w_strb_o = strb_q; + mst_w_user_o = w_user_q; + if (mst_w_ready_i) begin + w_state_d = FEEDTHROUGH_W; + end else begin + w_state_d = SEND_W; + end + end + end // WAIT_CHANNEL_W, SEND_W + + default: w_state_d = FEEDTHROUGH_W; + + endcase + end // axi_w_channel + + /*==================================================================== + = B = + ====================================================================*/ + always_comb begin : axi_b_channel + // Defaults AXI Bus + mst_b_ready_o = slv_b_ready_i; + slv_b_id_o = mst_b_id_i; + slv_b_resp_o = mst_b_resp_i; + slv_b_user_o = mst_b_user_i; + slv_b_valid_o = mst_b_valid_i; + // State Machine + b_state_d = b_state_q; + + unique case (b_state_q) + + FEEDTHROUGH_B: begin + if (adapter_ready) begin + if (atop_valid_d == LOAD || atop_valid_d == STORE) begin + // Wait until write is complete + b_state_d = WAIT_COMPLETE_B; + end else if (atop_valid_d == INVALID) begin + // Inject B error resp once the channel is free + if (b_free) begin + // Block downstream + mst_b_ready_o = 1'b0; + // Write B response + slv_b_valid_o = 1'b1; + slv_b_id_o = slv_aw_id_i; + slv_b_resp_o = axi_pkg::RESP_SLVERR; + slv_b_user_o = '0; + if (!slv_b_ready_i) begin + b_state_d = SEND_B; + end + end else begin + b_state_d = WAIT_CHANNEL_B; + end + end + end + end // FEEDTHROUGH_B + + WAIT_CHANNEL_B, SEND_B: begin + if (b_free || (b_state_q == SEND_B)) begin + // Block downstream + mst_b_ready_o = 1'b0; + // Write B response + slv_b_valid_o = 1'b1; + slv_b_id_o = id_q; + slv_b_resp_o = axi_pkg::RESP_SLVERR; + slv_b_user_o = '0; + if (slv_b_ready_i) begin + b_state_d = FEEDTHROUGH_B; + end else begin + b_state_d = SEND_B; + end + end + end // WAIT_CHANNEL_B, SEND_B + + WAIT_COMPLETE_B: begin + if (mst_b_valid_i && (mst_b_id_i == id_q)) begin + b_state_d = FEEDTHROUGH_B; + end + end // WAIT_COMPLETE_B + + default: b_state_d = FEEDTHROUGH_B; + + endcase + end // axi_b_channel + + // Keep track of outstanding downstream write bursts and responses. + always_comb begin + w_cnt_d = w_cnt_q; + if (mst_aw_valid_o && mst_aw_ready_i) begin + w_cnt_d += 1; + end + if (mst_w_valid_o && mst_w_ready_i && mst_w_last_o) begin + w_cnt_d -= 1; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : axi_write_channel_ff + if(~rst_ni) begin + aw_state_q <= FEEDTHROUGH_AW; + w_state_q <= FEEDTHROUGH_W; + b_state_q <= FEEDTHROUGH_B; + w_cnt_q <= '0; + w_cnt_req_q <= '0; + w_cnt_inj_q <= '0; + addr_q <= '0; + id_q <= '0; + size_q <= '0; + strb_q <= '0; + cache_q <= '0; + prot_q <= '0; + qos_q <= '0; + region_q <= '0; + aw_user_q <= '0; + w_user_q <= '0; + w_data_q <= '0; + result_q <= '0; + w_d_valid_q <= '0; + atop_q <= 6'b0; + end else begin + aw_state_q <= aw_state_d; + w_state_q <= w_state_d; + b_state_q <= b_state_d; + w_cnt_q <= w_cnt_d; + w_cnt_req_q <= w_cnt_req_d; + w_cnt_inj_q <= w_cnt_inj_d; + addr_q <= addr_d; + id_q <= id_d; + size_q <= size_d; + strb_q <= strb_d; + cache_q <= cache_d; + prot_q <= prot_d; + qos_q <= qos_d; + region_q <= region_d; + aw_user_q <= aw_user_d; + w_user_q <= w_user_d; + w_data_q <= w_data_d; + result_q <= result_d; + w_d_valid_q <= w_d_valid_d; + atop_q <= atop_d; + end + end + + /** + * Read Channel: AR, R + */ + + /*==================================================================== + = AR = + ====================================================================*/ + always_comb begin : axi_ar_channel + // Defaults AXI Bus + mst_ar_id_o = slv_ar_id_i; + mst_ar_addr_o = slv_ar_addr_i; + mst_ar_len_o = slv_ar_len_i; + mst_ar_size_o = slv_ar_size_i; + mst_ar_burst_o = slv_ar_burst_i; + mst_ar_lock_o = slv_ar_lock_i; + mst_ar_cache_o = slv_ar_cache_i; + mst_ar_prot_o = slv_ar_prot_i; + mst_ar_qos_o = slv_ar_qos_i; + mst_ar_region_o = slv_ar_region_i; + mst_ar_user_o = slv_ar_user_i; + mst_ar_valid_o = 1'b0; + slv_ar_ready_o = 1'b0; + // State Machine + ar_state_d = ar_state_q; + + unique case (ar_state_q) + + FEEDTHROUGH_AR: begin + // Feed through + mst_ar_valid_o = slv_ar_valid_i; + slv_ar_ready_o = mst_ar_ready_i; + + if (adapter_ready) begin + if (atop_valid_d == LOAD | atop_valid_d == STORE) begin + if (ar_free) begin + // Acquire channel + slv_ar_ready_o = 1'b0; + // Immediately start read request + mst_ar_valid_o = 1'b1; + mst_ar_addr_o = slv_aw_addr_i; + mst_ar_id_o = slv_aw_id_i; + mst_ar_len_o = 8'h00; + mst_ar_size_o = slv_aw_size_i; + mst_ar_burst_o = 2'b00; + mst_ar_lock_o = 1'h0; + mst_ar_cache_o = slv_aw_cache_i; + mst_ar_prot_o = slv_aw_prot_i; + mst_ar_qos_o = slv_aw_qos_i; + mst_ar_region_o = slv_aw_region_i; + mst_ar_user_o = slv_aw_user_i; + if (!mst_ar_ready_i) begin + // Hold read request but do not depend on AW + ar_state_d = SEND_AR; + end + end else begin + // Wait until AR is free + ar_state_d = WAIT_CHANNEL_AR; + end + end + end + end // FEEDTHROUGH_AR + + WAIT_CHANNEL_AR, SEND_AR: begin + // Issue read request + if (ar_free || (ar_state_q == SEND_AR)) begin + // Inject read request + mst_ar_valid_o = 1'b1; + mst_ar_addr_o = addr_q; + mst_ar_id_o = id_q; + mst_ar_len_o = 8'h00; + mst_ar_size_o = size_q; + mst_ar_burst_o = 2'b00; + mst_ar_lock_o = 1'h0; + mst_ar_cache_o = cache_q; + mst_ar_prot_o = prot_q; + mst_ar_qos_o = qos_q; + mst_ar_region_o = region_q; + mst_ar_user_o = aw_user_q; + if (mst_ar_ready_i) begin + // Request acknowledged + ar_state_d = FEEDTHROUGH_AR; + end else begin + // Hold read request + ar_state_d = SEND_AR; + end + end else begin + // Wait until AR is free + mst_ar_valid_o = slv_ar_valid_i; + slv_ar_ready_o = mst_ar_ready_i; + end + end // WAIT_CHANNEL_AR, SEND_AR + + default: ar_state_d = FEEDTHROUGH_AR; + + endcase + end // axi_ar_channel + + /*==================================================================== + = R = + ====================================================================*/ + always_comb begin : axi_r_channel + // Defaults AXI Bus + mst_r_ready_o = slv_r_ready_i; + slv_r_id_o = mst_r_id_i; + slv_r_data_o = mst_r_data_i; + slv_r_resp_o = mst_r_resp_i; + slv_r_last_o = mst_r_last_i; + slv_r_user_o = mst_r_user_i; + slv_r_valid_o = mst_r_valid_i; + // Defaults FF + r_data_d = r_data_q; + r_resp_d = r_resp_q; + r_user_d = r_user_q; + r_d_valid_d = r_d_valid_q; + // State Machine + r_state_d = r_state_q; + + unique case (r_state_q) + + FEEDTHROUGH_R: begin + if (adapter_ready) begin + // Reset read flag + r_d_valid_d = 1'b0; + + if (atop_valid_d == LOAD || atop_valid_d == STORE) begin + // Wait for R response to read data + r_state_d = WAIT_DATA_R; + end else if (atop_valid_d == INVALID) begin + // Send R response once channel is free + if (r_free) begin + // Acquire the R channel + // Block downstream + mst_r_ready_o = 1'b0; + // Send R error response + slv_r_valid_o = 1'b1; + slv_r_data_o = '0; + slv_r_id_o = slv_aw_id_i; + slv_r_last_o = 1'b1; + slv_r_resp_o = axi_pkg::RESP_SLVERR; + slv_r_user_o = '0; + if (!slv_r_ready_i) begin + // Hold R response + r_state_d = SEND_R; + end + end else begin + r_state_d = WAIT_CHANNEL_R; + end + end + end + end // FEEDTHROUGH_R + + WAIT_DATA_R: begin + // Read data + if (mst_r_valid_i && (mst_r_id_i == id_q)) begin + // Acknowledge downstream and block upstream + mst_r_ready_o = 1'b1; + slv_r_valid_o = 1'b0; + // Store data + r_data_d = mst_r_data_i; + r_resp_d = mst_r_resp_i; + r_user_d = mst_r_user_i; + r_d_valid_d = 1'b1; + if (atop_valid_q == STORE) begin + r_state_d = FEEDTHROUGH_R; + end else begin + // Wait for B resp before injecting R + r_state_d = WAIT_CHANNEL_R; + end + end + end // WAIT_DATA_R + + WAIT_CHANNEL_R, SEND_R: begin + // Wait for the R channel to become free and B response to be valid + // TODO: Use b_state_d to be one cycle quicker + if ((r_free && (b_state_q != WAIT_COMPLETE_B)) || (r_state_q == SEND_R)) begin + // Block downstream + mst_r_ready_o = 1'b0; + // Send R response + slv_r_valid_o = 1'b1; + slv_r_data_o = r_data_q; + slv_r_id_o = id_q; + slv_r_last_o = 1'b1; + slv_r_resp_o = r_resp_q; + slv_r_user_o = r_user_q; + if (atop_valid_q == INVALID) begin + slv_r_data_o = '0; + slv_r_resp_o = axi_pkg::RESP_SLVERR; + slv_r_user_o = '0; + end + if (slv_r_ready_i) begin + r_state_d = FEEDTHROUGH_R; + end else begin + r_state_d = SEND_R; + end + end + end // WAIT_CHANNEL_R, SEND_R + + default: r_state_d = FEEDTHROUGH_R; + + endcase + end // axi_r_channel + + always_ff @(posedge clk_i or negedge rst_ni) begin : axi_read_channel_ff + if(~rst_ni) begin + ar_state_q <= FEEDTHROUGH_AR; + r_state_q <= FEEDTHROUGH_R; + r_data_q <= '0; + r_resp_q <= '0; + r_user_q <= '0; + r_d_valid_q <= 1'b0; + end else begin + ar_state_q <= ar_state_d; + r_state_q <= r_state_d; + r_data_q <= r_data_d; + r_resp_q <= r_resp_d; + r_user_q <= r_user_d; + r_d_valid_q <= r_d_valid_d; + end + end + + /** + * ALU + */ + + assign op_a = r_data_q & strb_ext; + assign op_b = w_data_q & strb_ext; + assign sign_a = |(op_a & ~(strb_ext >> 1)); + assign sign_b = |(op_b & ~(strb_ext >> 1)); + assign alu_result_ext = res; + + generate + if (AXI_ALU_RATIO == 1 && RISCV_WORD_WIDTH == 32) begin + assign alu_operand_a = op_a; + assign alu_operand_b = op_b; + assign res = alu_result; + end else if (AXI_ALU_RATIO == 1 && RISCV_WORD_WIDTH == 64) begin + assign res = alu_result; + always_comb begin + op_a_sign_ext = op_a | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_a}} & ~strb_ext); + op_b_sign_ext = op_b | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_b}} & ~strb_ext); + + if (atop_q[2:0] == axi_pkg::ATOP_SMAX || atop_q[2:0] == axi_pkg::ATOP_SMIN) begin + // Sign extend + alu_operand_a = op_a_sign_ext; + alu_operand_b = op_b_sign_ext; + end else begin + // No sign extension necessary + alu_operand_a = op_a; + alu_operand_b = op_b; + end + end + end else begin + always_comb begin + op_a_sign_ext = op_a | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_a}} & ~strb_ext); + op_b_sign_ext = op_b | ({AXI_ALU_RATIO*RISCV_WORD_WIDTH{sign_b}} & ~strb_ext); + + if (atop_q[2:0] == axi_pkg::ATOP_SMAX || atop_q[2:0] == axi_pkg::ATOP_SMIN) begin + // Sign extend + alu_operand_a = op_a_sign_ext[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]]; + alu_operand_b = op_b_sign_ext[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]]; + end else begin + // No sign extension necessary + alu_operand_a = op_a[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]]; + alu_operand_b = op_b[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]]; + end + res = '0; + res[addr_q[$clog2(AXI_DATA_WIDTH/8)-1:$clog2(RISCV_WORD_WIDTH/8)]] = alu_result; + end + end + endgenerate + + generate + for (genvar i = 0; i < AXI_STRB_WIDTH; i++) begin + always_comb begin + if (strb_q[i]) begin + strb_ext[i] = 8'hFF; + end else begin + strb_ext[i] = 8'h00; + end + end + end + endgenerate + + axi_riscv_amos_alu #( + .DATA_WIDTH ( RISCV_WORD_WIDTH ) + ) i_amo_alu ( + .amo_op_i ( atop_q ), + .amo_operand_a_i ( alu_operand_a ), + .amo_operand_b_i ( alu_operand_b ), + .amo_result_o ( alu_result ) + ); + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (AXI_ADDR_WIDTH > 0) + else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!"); + assert (AXI_DATA_WIDTH > 0) + else $fatal(1, "AXI_DATA_WIDTH must be greater than 0!"); + assert (AXI_ID_WIDTH > 0) + else $fatal(1, "AXI_ID_WIDTH must be greater than 0!"); + assert (AXI_MAX_WRITE_TXNS > 0) + else $fatal(1, "AXI_MAX_WRITE_TXNS must be greater than 0!"); + assert (RISCV_WORD_WIDTH == 32 || RISCV_WORD_WIDTH == 64) + else $fatal(1, "RISCV_WORD_WIDTH must be 32 or 64!"); + assert (RISCV_WORD_WIDTH <= AXI_DATA_WIDTH) + else $fatal(1, "RISCV_WORD_WIDTH must not be greater than AXI_DATA_WIDTH!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv new file mode 100644 index 00000000..40a52b0e --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv @@ -0,0 +1,78 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI RISC-V Atomic Operations (AMOs) ALU +module axi_riscv_amos_alu # ( + parameter int unsigned DATA_WIDTH = 0 +) ( + input logic [5:0] amo_op_i, + input logic [DATA_WIDTH-1:0] amo_operand_a_i, + input logic [DATA_WIDTH-1:0] amo_operand_b_i, + output logic [DATA_WIDTH-1:0] amo_result_o +); + + logic [DATA_WIDTH:0] adder_sum; + logic [DATA_WIDTH:0] adder_operand_a, adder_operand_b; + + assign adder_sum = adder_operand_a + adder_operand_b; + + always_comb begin + + adder_operand_a = $signed(amo_operand_a_i); + adder_operand_b = $signed(amo_operand_b_i); + + amo_result_o = amo_operand_a_i; + + if (amo_op_i == axi_pkg::ATOP_ATOMICSWAP) begin + // Swap operation + amo_result_o = amo_operand_b_i; + end else if ((amo_op_i[5:4] == axi_pkg::ATOP_ATOMICLOAD) | (amo_op_i[5:4] == axi_pkg::ATOP_ATOMICSTORE)) begin + // Load operation + unique case (amo_op_i[2:0]) + // the default is to output operand_a + axi_pkg::ATOP_ADD: amo_result_o = adder_sum[DATA_WIDTH-1:0]; + axi_pkg::ATOP_CLR: amo_result_o = amo_operand_a_i & (~amo_operand_b_i); + axi_pkg::ATOP_SET: amo_result_o = amo_operand_a_i | amo_operand_b_i; + axi_pkg::ATOP_EOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i; + axi_pkg::ATOP_SMAX: begin + adder_operand_b = -$signed(amo_operand_b_i); + amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_b_i : amo_operand_a_i; + end + axi_pkg::ATOP_SMIN: begin + adder_operand_b = -$signed(amo_operand_b_i); + amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_a_i : amo_operand_b_i; + end + axi_pkg::ATOP_UMAX: begin + adder_operand_a = $unsigned(amo_operand_a_i); + adder_operand_b = -$unsigned(amo_operand_b_i); + amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_b_i : amo_operand_a_i; + end + axi_pkg::ATOP_UMIN: begin + adder_operand_a = $unsigned(amo_operand_a_i); + adder_operand_b = -$unsigned(amo_operand_b_i); + amo_result_o = adder_sum[DATA_WIDTH] ? amo_operand_a_i : amo_operand_b_i; + end + default: amo_result_o = '0; + endcase + end + end + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (DATA_WIDTH > 0) + else $fatal(1, "DATA_WIDTH must be greater than 0!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv new file mode 100644 index 00000000..f9b72d34 --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv @@ -0,0 +1,400 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI RISC-V Atomics ("A" Extension) Adapter +// +// This AXI adapter implements the RISC-V "A" extension and adheres to the RVWMO memory consistency +// model. +// +// Maintainer: Andreas Kurth + +module axi_riscv_atomics #( + /// AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + // Maximum number of AXI write bursts outstanding at the same time + parameter int unsigned AXI_MAX_WRITE_TXNS = 0, + // Word width of the widest RISC-V processor that can issue requests to this module. + // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are + // supported if `aw_strb` is set correctly. + parameter int unsigned RISCV_WORD_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + + /// Slave Interface + input logic [AXI_ADDR_WIDTH-1:0] slv_aw_addr_i, + input logic [2:0] slv_aw_prot_i, + input logic [3:0] slv_aw_region_i, + input logic [5:0] slv_aw_atop_i, + input logic [7:0] slv_aw_len_i, + input logic [2:0] slv_aw_size_i, + input logic [1:0] slv_aw_burst_i, + input logic slv_aw_lock_i, + input logic [3:0] slv_aw_cache_i, + input logic [3:0] slv_aw_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_aw_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_aw_user_i, + output logic slv_aw_ready_o, + input logic slv_aw_valid_i, + + input logic [AXI_ADDR_WIDTH-1:0] slv_ar_addr_i, + input logic [2:0] slv_ar_prot_i, + input logic [3:0] slv_ar_region_i, + input logic [7:0] slv_ar_len_i, + input logic [2:0] slv_ar_size_i, + input logic [1:0] slv_ar_burst_i, + input logic slv_ar_lock_i, + input logic [3:0] slv_ar_cache_i, + input logic [3:0] slv_ar_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_ar_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_ar_user_i, + output logic slv_ar_ready_o, + input logic slv_ar_valid_i, + + input logic [AXI_DATA_WIDTH-1:0] slv_w_data_i, + input logic [AXI_STRB_WIDTH-1:0] slv_w_strb_i, + input logic [AXI_USER_WIDTH-1:0] slv_w_user_i, + input logic slv_w_last_i, + output logic slv_w_ready_o, + input logic slv_w_valid_i, + + output logic [AXI_DATA_WIDTH-1:0] slv_r_data_o, + output logic [1:0] slv_r_resp_o, + output logic slv_r_last_o, + output logic [AXI_ID_WIDTH-1:0] slv_r_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_r_user_o, + input logic slv_r_ready_i, + output logic slv_r_valid_o, + + output logic [1:0] slv_b_resp_o, + output logic [AXI_ID_WIDTH-1:0] slv_b_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_b_user_o, + input logic slv_b_ready_i, + output logic slv_b_valid_o, + + /// Master Interface + output logic [AXI_ADDR_WIDTH-1:0] mst_aw_addr_o, + output logic [2:0] mst_aw_prot_o, + output logic [3:0] mst_aw_region_o, + output logic [5:0] mst_aw_atop_o, + output logic [7:0] mst_aw_len_o, + output logic [2:0] mst_aw_size_o, + output logic [1:0] mst_aw_burst_o, + output logic mst_aw_lock_o, + output logic [3:0] mst_aw_cache_o, + output logic [3:0] mst_aw_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_aw_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_aw_user_o, + input logic mst_aw_ready_i, + output logic mst_aw_valid_o, + + output logic [AXI_ADDR_WIDTH-1:0] mst_ar_addr_o, + output logic [2:0] mst_ar_prot_o, + output logic [3:0] mst_ar_region_o, + output logic [7:0] mst_ar_len_o, + output logic [2:0] mst_ar_size_o, + output logic [1:0] mst_ar_burst_o, + output logic mst_ar_lock_o, + output logic [3:0] mst_ar_cache_o, + output logic [3:0] mst_ar_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_ar_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_ar_user_o, + input logic mst_ar_ready_i, + output logic mst_ar_valid_o, + + output logic [AXI_DATA_WIDTH-1:0] mst_w_data_o, + output logic [AXI_STRB_WIDTH-1:0] mst_w_strb_o, + output logic [AXI_USER_WIDTH-1:0] mst_w_user_o, + output logic mst_w_last_o, + input logic mst_w_ready_i, + output logic mst_w_valid_o, + + input logic [AXI_DATA_WIDTH-1:0] mst_r_data_i, + input logic [1:0] mst_r_resp_i, + input logic mst_r_last_i, + input logic [AXI_ID_WIDTH-1:0] mst_r_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_r_user_i, + output logic mst_r_ready_o, + input logic mst_r_valid_i, + + input logic [1:0] mst_b_resp_i, + input logic [AXI_ID_WIDTH-1:0] mst_b_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_b_user_i, + output logic mst_b_ready_o, + input logic mst_b_valid_i +); + + // Make the entire address range exclusively accessible. Since the AMO adapter does not support + // address ranges, it would not make sense to expose the address range as a parameter of this + // module. + localparam longint unsigned ADDR_BEGIN = '0; + localparam longint unsigned ADDR_END = {AXI_ADDR_WIDTH{1'b1}}; + + logic [AXI_ADDR_WIDTH-1:0] int_axi_aw_addr; + logic [2:0] int_axi_aw_prot; + logic [3:0] int_axi_aw_region; + logic [5:0] int_axi_aw_atop; + logic [7:0] int_axi_aw_len; + logic [2:0] int_axi_aw_size; + logic [1:0] int_axi_aw_burst; + logic int_axi_aw_lock; + logic [3:0] int_axi_aw_cache; + logic [3:0] int_axi_aw_qos; + logic [AXI_ID_WIDTH-1:0] int_axi_aw_id; + logic [AXI_USER_WIDTH-1:0] int_axi_aw_user; + logic int_axi_aw_ready; + logic int_axi_aw_valid; + + logic [AXI_ADDR_WIDTH-1:0] int_axi_ar_addr; + logic [2:0] int_axi_ar_prot; + logic [3:0] int_axi_ar_region; + logic [7:0] int_axi_ar_len; + logic [2:0] int_axi_ar_size; + logic [1:0] int_axi_ar_burst; + logic int_axi_ar_lock; + logic [3:0] int_axi_ar_cache; + logic [3:0] int_axi_ar_qos; + logic [AXI_ID_WIDTH-1:0] int_axi_ar_id; + logic [AXI_USER_WIDTH-1:0] int_axi_ar_user; + logic int_axi_ar_ready; + logic int_axi_ar_valid; + + logic [AXI_DATA_WIDTH-1:0] int_axi_w_data; + logic [AXI_STRB_WIDTH-1:0] int_axi_w_strb; + logic [AXI_USER_WIDTH-1:0] int_axi_w_user; + logic int_axi_w_last; + logic int_axi_w_ready; + logic int_axi_w_valid; + + logic [AXI_DATA_WIDTH-1:0] int_axi_r_data; + logic [1:0] int_axi_r_resp; + logic int_axi_r_last; + logic [AXI_ID_WIDTH-1:0] int_axi_r_id; + logic [AXI_USER_WIDTH-1:0] int_axi_r_user; + logic int_axi_r_ready; + logic int_axi_r_valid; + + logic [1:0] int_axi_b_resp; + logic [AXI_ID_WIDTH-1:0] int_axi_b_id; + logic [AXI_USER_WIDTH-1:0] int_axi_b_user; + logic int_axi_b_ready; + logic int_axi_b_valid; + + axi_riscv_amos #( + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH), + .AXI_USER_WIDTH (AXI_USER_WIDTH), + .AXI_MAX_WRITE_TXNS (AXI_MAX_WRITE_TXNS), + .RISCV_WORD_WIDTH (RISCV_WORD_WIDTH) + ) i_amos ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_aw_addr_i ( slv_aw_addr_i ), + .slv_aw_prot_i ( slv_aw_prot_i ), + .slv_aw_region_i ( slv_aw_region_i ), + .slv_aw_atop_i ( slv_aw_atop_i ), + .slv_aw_len_i ( slv_aw_len_i ), + .slv_aw_size_i ( slv_aw_size_i ), + .slv_aw_burst_i ( slv_aw_burst_i ), + .slv_aw_lock_i ( slv_aw_lock_i ), + .slv_aw_cache_i ( slv_aw_cache_i ), + .slv_aw_qos_i ( slv_aw_qos_i ), + .slv_aw_id_i ( slv_aw_id_i ), + .slv_aw_user_i ( slv_aw_user_i ), + .slv_aw_ready_o ( slv_aw_ready_o ), + .slv_aw_valid_i ( slv_aw_valid_i ), + .slv_ar_addr_i ( slv_ar_addr_i ), + .slv_ar_prot_i ( slv_ar_prot_i ), + .slv_ar_region_i ( slv_ar_region_i ), + .slv_ar_len_i ( slv_ar_len_i ), + .slv_ar_size_i ( slv_ar_size_i ), + .slv_ar_burst_i ( slv_ar_burst_i ), + .slv_ar_lock_i ( slv_ar_lock_i ), + .slv_ar_cache_i ( slv_ar_cache_i ), + .slv_ar_qos_i ( slv_ar_qos_i ), + .slv_ar_id_i ( slv_ar_id_i ), + .slv_ar_user_i ( slv_ar_user_i ), + .slv_ar_ready_o ( slv_ar_ready_o ), + .slv_ar_valid_i ( slv_ar_valid_i ), + .slv_w_data_i ( slv_w_data_i ), + .slv_w_strb_i ( slv_w_strb_i ), + .slv_w_user_i ( slv_w_user_i ), + .slv_w_last_i ( slv_w_last_i ), + .slv_w_ready_o ( slv_w_ready_o ), + .slv_w_valid_i ( slv_w_valid_i ), + .slv_r_data_o ( slv_r_data_o ), + .slv_r_resp_o ( slv_r_resp_o ), + .slv_r_last_o ( slv_r_last_o ), + .slv_r_id_o ( slv_r_id_o ), + .slv_r_user_o ( slv_r_user_o ), + .slv_r_ready_i ( slv_r_ready_i ), + .slv_r_valid_o ( slv_r_valid_o ), + .slv_b_resp_o ( slv_b_resp_o ), + .slv_b_id_o ( slv_b_id_o ), + .slv_b_user_o ( slv_b_user_o ), + .slv_b_ready_i ( slv_b_ready_i ), + .slv_b_valid_o ( slv_b_valid_o ), + .mst_aw_addr_o ( int_axi_aw_addr ), + .mst_aw_prot_o ( int_axi_aw_prot ), + .mst_aw_region_o ( int_axi_aw_region ), + .mst_aw_atop_o ( int_axi_aw_atop ), + .mst_aw_len_o ( int_axi_aw_len ), + .mst_aw_size_o ( int_axi_aw_size ), + .mst_aw_burst_o ( int_axi_aw_burst ), + .mst_aw_lock_o ( int_axi_aw_lock ), + .mst_aw_cache_o ( int_axi_aw_cache ), + .mst_aw_qos_o ( int_axi_aw_qos ), + .mst_aw_id_o ( int_axi_aw_id ), + .mst_aw_user_o ( int_axi_aw_user ), + .mst_aw_ready_i ( int_axi_aw_ready ), + .mst_aw_valid_o ( int_axi_aw_valid ), + .mst_ar_addr_o ( int_axi_ar_addr ), + .mst_ar_prot_o ( int_axi_ar_prot ), + .mst_ar_region_o ( int_axi_ar_region ), + .mst_ar_len_o ( int_axi_ar_len ), + .mst_ar_size_o ( int_axi_ar_size ), + .mst_ar_burst_o ( int_axi_ar_burst ), + .mst_ar_lock_o ( int_axi_ar_lock ), + .mst_ar_cache_o ( int_axi_ar_cache ), + .mst_ar_qos_o ( int_axi_ar_qos ), + .mst_ar_id_o ( int_axi_ar_id ), + .mst_ar_user_o ( int_axi_ar_user ), + .mst_ar_ready_i ( int_axi_ar_ready ), + .mst_ar_valid_o ( int_axi_ar_valid ), + .mst_w_data_o ( int_axi_w_data ), + .mst_w_strb_o ( int_axi_w_strb ), + .mst_w_user_o ( int_axi_w_user ), + .mst_w_last_o ( int_axi_w_last ), + .mst_w_ready_i ( int_axi_w_ready ), + .mst_w_valid_o ( int_axi_w_valid ), + .mst_r_data_i ( int_axi_r_data ), + .mst_r_resp_i ( int_axi_r_resp ), + .mst_r_last_i ( int_axi_r_last ), + .mst_r_id_i ( int_axi_r_id ), + .mst_r_user_i ( int_axi_r_user ), + .mst_r_ready_o ( int_axi_r_ready ), + .mst_r_valid_i ( int_axi_r_valid ), + .mst_b_resp_i ( int_axi_b_resp ), + .mst_b_id_i ( int_axi_b_id ), + .mst_b_user_i ( int_axi_b_user ), + .mst_b_ready_o ( int_axi_b_ready ), + .mst_b_valid_i ( int_axi_b_valid ) + ); + + axi_riscv_lrsc #( + .ADDR_BEGIN (ADDR_BEGIN), + .ADDR_END (ADDR_END), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH), + .AXI_USER_WIDTH (AXI_USER_WIDTH) + ) i_lrsc ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_aw_addr_i ( int_axi_aw_addr ), + .slv_aw_prot_i ( int_axi_aw_prot ), + .slv_aw_region_i ( int_axi_aw_region ), + .slv_aw_atop_i ( int_axi_aw_atop ), + .slv_aw_len_i ( int_axi_aw_len ), + .slv_aw_size_i ( int_axi_aw_size ), + .slv_aw_burst_i ( int_axi_aw_burst ), + .slv_aw_lock_i ( int_axi_aw_lock ), + .slv_aw_cache_i ( int_axi_aw_cache ), + .slv_aw_qos_i ( int_axi_aw_qos ), + .slv_aw_id_i ( int_axi_aw_id ), + .slv_aw_user_i ( int_axi_aw_user ), + .slv_aw_ready_o ( int_axi_aw_ready ), + .slv_aw_valid_i ( int_axi_aw_valid ), + .slv_ar_addr_i ( int_axi_ar_addr ), + .slv_ar_prot_i ( int_axi_ar_prot ), + .slv_ar_region_i ( int_axi_ar_region ), + .slv_ar_len_i ( int_axi_ar_len ), + .slv_ar_size_i ( int_axi_ar_size ), + .slv_ar_burst_i ( int_axi_ar_burst ), + .slv_ar_lock_i ( int_axi_ar_lock ), + .slv_ar_cache_i ( int_axi_ar_cache ), + .slv_ar_qos_i ( int_axi_ar_qos ), + .slv_ar_id_i ( int_axi_ar_id ), + .slv_ar_user_i ( int_axi_ar_user ), + .slv_ar_ready_o ( int_axi_ar_ready ), + .slv_ar_valid_i ( int_axi_ar_valid ), + .slv_w_data_i ( int_axi_w_data ), + .slv_w_strb_i ( int_axi_w_strb ), + .slv_w_user_i ( int_axi_w_user ), + .slv_w_last_i ( int_axi_w_last ), + .slv_w_ready_o ( int_axi_w_ready ), + .slv_w_valid_i ( int_axi_w_valid ), + .slv_r_data_o ( int_axi_r_data ), + .slv_r_resp_o ( int_axi_r_resp ), + .slv_r_last_o ( int_axi_r_last ), + .slv_r_id_o ( int_axi_r_id ), + .slv_r_user_o ( int_axi_r_user ), + .slv_r_ready_i ( int_axi_r_ready ), + .slv_r_valid_o ( int_axi_r_valid ), + .slv_b_resp_o ( int_axi_b_resp ), + .slv_b_id_o ( int_axi_b_id ), + .slv_b_user_o ( int_axi_b_user ), + .slv_b_ready_i ( int_axi_b_ready ), + .slv_b_valid_o ( int_axi_b_valid ), + .mst_aw_addr_o ( mst_aw_addr_o ), + .mst_aw_prot_o ( mst_aw_prot_o ), + .mst_aw_region_o ( mst_aw_region_o ), + .mst_aw_atop_o ( mst_aw_atop_o ), + .mst_aw_len_o ( mst_aw_len_o ), + .mst_aw_size_o ( mst_aw_size_o ), + .mst_aw_burst_o ( mst_aw_burst_o ), + .mst_aw_lock_o ( mst_aw_lock_o ), + .mst_aw_cache_o ( mst_aw_cache_o ), + .mst_aw_qos_o ( mst_aw_qos_o ), + .mst_aw_id_o ( mst_aw_id_o ), + .mst_aw_user_o ( mst_aw_user_o ), + .mst_aw_ready_i ( mst_aw_ready_i ), + .mst_aw_valid_o ( mst_aw_valid_o ), + .mst_ar_addr_o ( mst_ar_addr_o ), + .mst_ar_prot_o ( mst_ar_prot_o ), + .mst_ar_region_o ( mst_ar_region_o ), + .mst_ar_len_o ( mst_ar_len_o ), + .mst_ar_size_o ( mst_ar_size_o ), + .mst_ar_burst_o ( mst_ar_burst_o ), + .mst_ar_lock_o ( mst_ar_lock_o ), + .mst_ar_cache_o ( mst_ar_cache_o ), + .mst_ar_qos_o ( mst_ar_qos_o ), + .mst_ar_id_o ( mst_ar_id_o ), + .mst_ar_user_o ( mst_ar_user_o ), + .mst_ar_ready_i ( mst_ar_ready_i ), + .mst_ar_valid_o ( mst_ar_valid_o ), + .mst_w_data_o ( mst_w_data_o ), + .mst_w_strb_o ( mst_w_strb_o ), + .mst_w_user_o ( mst_w_user_o ), + .mst_w_last_o ( mst_w_last_o ), + .mst_w_ready_i ( mst_w_ready_i ), + .mst_w_valid_o ( mst_w_valid_o ), + .mst_r_data_i ( mst_r_data_i ), + .mst_r_resp_i ( mst_r_resp_i ), + .mst_r_last_i ( mst_r_last_i ), + .mst_r_id_i ( mst_r_id_i ), + .mst_r_user_i ( mst_r_user_i ), + .mst_r_ready_o ( mst_r_ready_o ), + .mst_r_valid_i ( mst_r_valid_i ), + .mst_b_resp_i ( mst_b_resp_i ), + .mst_b_id_i ( mst_b_id_i ), + .mst_b_user_i ( mst_b_user_i ), + .mst_b_ready_o ( mst_b_ready_o ), + .mst_b_valid_i ( mst_b_valid_i ) + ); + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv new file mode 100644 index 00000000..ad3505cf --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv @@ -0,0 +1,151 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Wrapper for the AXI RISC-V Atomics Adapter that exposes AXI SystemVerilog interfaces. +// +// See the header of `axi_riscv_atomics` for a description. +// +// Maintainer: Andreas Kurth + +module axi_riscv_atomics_wrap #( + /// AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + /// Maximum number of AXI bursts outstanding at the same time + parameter int unsigned AXI_MAX_WRITE_TXNS = 0, + // Word width of the widest RISC-V processor that can issue requests to this module. + // 32 for RV32; 64 for RV64, where both 32-bit (.W suffix) and 64-bit (.D suffix) AMOs are + // supported if `aw_strb` is set correctly. + parameter int unsigned RISCV_WORD_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + AXI_BUS.Master mst, + AXI_BUS.Slave slv +); + + axi_riscv_atomics #( + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH), + .AXI_USER_WIDTH (AXI_USER_WIDTH), + .AXI_MAX_WRITE_TXNS (AXI_MAX_WRITE_TXNS), + .RISCV_WORD_WIDTH (RISCV_WORD_WIDTH) + ) i_atomics ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_aw_addr_i ( slv.aw_addr ), + .slv_aw_prot_i ( slv.aw_prot ), + .slv_aw_region_i ( slv.aw_region ), + .slv_aw_atop_i ( slv.aw_atop ), + .slv_aw_len_i ( slv.aw_len ), + .slv_aw_size_i ( slv.aw_size ), + .slv_aw_burst_i ( slv.aw_burst ), + .slv_aw_lock_i ( slv.aw_lock ), + .slv_aw_cache_i ( slv.aw_cache ), + .slv_aw_qos_i ( slv.aw_qos ), + .slv_aw_id_i ( slv.aw_id ), + .slv_aw_user_i ( slv.aw_user ), + .slv_aw_ready_o ( slv.aw_ready ), + .slv_aw_valid_i ( slv.aw_valid ), + .slv_ar_addr_i ( slv.ar_addr ), + .slv_ar_prot_i ( slv.ar_prot ), + .slv_ar_region_i ( slv.ar_region ), + .slv_ar_len_i ( slv.ar_len ), + .slv_ar_size_i ( slv.ar_size ), + .slv_ar_burst_i ( slv.ar_burst ), + .slv_ar_lock_i ( slv.ar_lock ), + .slv_ar_cache_i ( slv.ar_cache ), + .slv_ar_qos_i ( slv.ar_qos ), + .slv_ar_id_i ( slv.ar_id ), + .slv_ar_user_i ( slv.ar_user ), + .slv_ar_ready_o ( slv.ar_ready ), + .slv_ar_valid_i ( slv.ar_valid ), + .slv_w_data_i ( slv.w_data ), + .slv_w_strb_i ( slv.w_strb ), + .slv_w_user_i ( slv.w_user ), + .slv_w_last_i ( slv.w_last ), + .slv_w_ready_o ( slv.w_ready ), + .slv_w_valid_i ( slv.w_valid ), + .slv_r_data_o ( slv.r_data ), + .slv_r_resp_o ( slv.r_resp ), + .slv_r_last_o ( slv.r_last ), + .slv_r_id_o ( slv.r_id ), + .slv_r_user_o ( slv.r_user ), + .slv_r_ready_i ( slv.r_ready ), + .slv_r_valid_o ( slv.r_valid ), + .slv_b_resp_o ( slv.b_resp ), + .slv_b_id_o ( slv.b_id ), + .slv_b_user_o ( slv.b_user ), + .slv_b_ready_i ( slv.b_ready ), + .slv_b_valid_o ( slv.b_valid ), + .mst_aw_addr_o ( mst.aw_addr ), + .mst_aw_prot_o ( mst.aw_prot ), + .mst_aw_region_o ( mst.aw_region ), + .mst_aw_atop_o ( mst.aw_atop ), + .mst_aw_len_o ( mst.aw_len ), + .mst_aw_size_o ( mst.aw_size ), + .mst_aw_burst_o ( mst.aw_burst ), + .mst_aw_lock_o ( mst.aw_lock ), + .mst_aw_cache_o ( mst.aw_cache ), + .mst_aw_qos_o ( mst.aw_qos ), + .mst_aw_id_o ( mst.aw_id ), + .mst_aw_user_o ( mst.aw_user ), + .mst_aw_ready_i ( mst.aw_ready ), + .mst_aw_valid_o ( mst.aw_valid ), + .mst_ar_addr_o ( mst.ar_addr ), + .mst_ar_prot_o ( mst.ar_prot ), + .mst_ar_region_o ( mst.ar_region ), + .mst_ar_len_o ( mst.ar_len ), + .mst_ar_size_o ( mst.ar_size ), + .mst_ar_burst_o ( mst.ar_burst ), + .mst_ar_lock_o ( mst.ar_lock ), + .mst_ar_cache_o ( mst.ar_cache ), + .mst_ar_qos_o ( mst.ar_qos ), + .mst_ar_id_o ( mst.ar_id ), + .mst_ar_user_o ( mst.ar_user ), + .mst_ar_ready_i ( mst.ar_ready ), + .mst_ar_valid_o ( mst.ar_valid ), + .mst_w_data_o ( mst.w_data ), + .mst_w_strb_o ( mst.w_strb ), + .mst_w_user_o ( mst.w_user ), + .mst_w_last_o ( mst.w_last ), + .mst_w_ready_i ( mst.w_ready ), + .mst_w_valid_o ( mst.w_valid ), + .mst_r_data_i ( mst.r_data ), + .mst_r_resp_i ( mst.r_resp ), + .mst_r_last_i ( mst.r_last ), + .mst_r_id_i ( mst.r_id ), + .mst_r_user_i ( mst.r_user ), + .mst_r_ready_o ( mst.r_ready ), + .mst_r_valid_i ( mst.r_valid ), + .mst_b_resp_i ( mst.b_resp ), + .mst_b_id_i ( mst.b_id ), + .mst_b_user_i ( mst.b_user ), + .mst_b_ready_o ( mst.b_ready ), + .mst_b_valid_i ( mst.b_valid ) + ); + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (AXI_STRB_WIDTH == AXI_DATA_WIDTH/8) + else $fatal(1, "AXI_STRB_WIDTH must equal AXI_DATA_WIDTH/8!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv new file mode 100644 index 00000000..82c132c7 --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv @@ -0,0 +1,509 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// AXI RISC-V LR/SC Adapter +// +// This adapter adds support for AXI4 exclusive accesses to a slave that natively does not support +// exclusive accesses. It is to be placed between that slave and the upstream master port, so that +// the `mst` port of this module drives the slave and the `slv` port of this module is driven by +// the upstream master. +// +// Exclusive accesses are only enabled for a range of addresses specified through parameters. All +// addresses within that range are guaranteed to fulfill the constraints described in A7.2 of the +// AXI4 standard, both for normal and exclusive memory accesses. Addresses outside that range +// behave like a slave that does not support exclusive memory accesses (see AXI4, A7.2.5). +// +// Limitations: +// - The adapter allows at most one read and one write access to be outstanding at any given +// time. +// - The adapter does not support bursts in exclusive accessing. Only single words can be +// reserved. +// +// Maintainer: Andreas Kurth + +module axi_riscv_lrsc #( + /// Exclusively-accessible address range (closed interval from ADDR_BEGIN to ADDR_END) + parameter longint unsigned ADDR_BEGIN = 0, + parameter longint unsigned ADDR_END = 0, + /// AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + + /// Slave Interface + input logic [AXI_ADDR_WIDTH-1:0] slv_aw_addr_i, + input logic [2:0] slv_aw_prot_i, + input logic [3:0] slv_aw_region_i, + input logic [5:0] slv_aw_atop_i, + input logic [7:0] slv_aw_len_i, + input logic [2:0] slv_aw_size_i, + input logic [1:0] slv_aw_burst_i, + input logic slv_aw_lock_i, + input logic [3:0] slv_aw_cache_i, + input logic [3:0] slv_aw_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_aw_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_aw_user_i, + output logic slv_aw_ready_o, + input logic slv_aw_valid_i, + + input logic [AXI_ADDR_WIDTH-1:0] slv_ar_addr_i, + input logic [2:0] slv_ar_prot_i, + input logic [3:0] slv_ar_region_i, + input logic [7:0] slv_ar_len_i, + input logic [2:0] slv_ar_size_i, + input logic [1:0] slv_ar_burst_i, + input logic slv_ar_lock_i, + input logic [3:0] slv_ar_cache_i, + input logic [3:0] slv_ar_qos_i, + input logic [AXI_ID_WIDTH-1:0] slv_ar_id_i, + input logic [AXI_USER_WIDTH-1:0] slv_ar_user_i, + output logic slv_ar_ready_o, + input logic slv_ar_valid_i, + + input logic [AXI_DATA_WIDTH-1:0] slv_w_data_i, + input logic [AXI_STRB_WIDTH-1:0] slv_w_strb_i, + input logic [AXI_USER_WIDTH-1:0] slv_w_user_i, + input logic slv_w_last_i, + output logic slv_w_ready_o, + input logic slv_w_valid_i, + + output logic [AXI_DATA_WIDTH-1:0] slv_r_data_o, + output logic [1:0] slv_r_resp_o, + output logic slv_r_last_o, + output logic [AXI_ID_WIDTH-1:0] slv_r_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_r_user_o, + input logic slv_r_ready_i, + output logic slv_r_valid_o, + + output logic [1:0] slv_b_resp_o, + output logic [AXI_ID_WIDTH-1:0] slv_b_id_o, + output logic [AXI_USER_WIDTH-1:0] slv_b_user_o, + input logic slv_b_ready_i, + output logic slv_b_valid_o, + + /// Master Interface + output logic [AXI_ADDR_WIDTH-1:0] mst_aw_addr_o, + output logic [2:0] mst_aw_prot_o, + output logic [3:0] mst_aw_region_o, + output logic [5:0] mst_aw_atop_o, + output logic [7:0] mst_aw_len_o, + output logic [2:0] mst_aw_size_o, + output logic [1:0] mst_aw_burst_o, + output logic mst_aw_lock_o, + output logic [3:0] mst_aw_cache_o, + output logic [3:0] mst_aw_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_aw_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_aw_user_o, + input logic mst_aw_ready_i, + output logic mst_aw_valid_o, + + output logic [AXI_ADDR_WIDTH-1:0] mst_ar_addr_o, + output logic [2:0] mst_ar_prot_o, + output logic [3:0] mst_ar_region_o, + output logic [7:0] mst_ar_len_o, + output logic [2:0] mst_ar_size_o, + output logic [1:0] mst_ar_burst_o, + output logic mst_ar_lock_o, + output logic [3:0] mst_ar_cache_o, + output logic [3:0] mst_ar_qos_o, + output logic [AXI_ID_WIDTH-1:0] mst_ar_id_o, + output logic [AXI_USER_WIDTH-1:0] mst_ar_user_o, + input logic mst_ar_ready_i, + output logic mst_ar_valid_o, + + output logic [AXI_DATA_WIDTH-1:0] mst_w_data_o, + output logic [AXI_STRB_WIDTH-1:0] mst_w_strb_o, + output logic [AXI_USER_WIDTH-1:0] mst_w_user_o, + output logic mst_w_last_o, + input logic mst_w_ready_i, + output logic mst_w_valid_o, + + input logic [AXI_DATA_WIDTH-1:0] mst_r_data_i, + input logic [1:0] mst_r_resp_i, + input logic mst_r_last_i, + input logic [AXI_ID_WIDTH-1:0] mst_r_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_r_user_i, + output logic mst_r_ready_o, + input logic mst_r_valid_i, + + input logic [1:0] mst_b_resp_i, + input logic [AXI_ID_WIDTH-1:0] mst_b_id_i, + input logic [AXI_USER_WIDTH-1:0] mst_b_user_i, + output logic mst_b_ready_o, + input logic mst_b_valid_i +); + + // Declarations of Signals and Types + + logic [AXI_ID_WIDTH-1:0] art_check_id, + art_set_id, + w_id_d, w_id_q; + + logic [AXI_ADDR_WIDTH-1:0] art_check_addr, + art_clr_addr, + art_set_addr, + rd_clr_addr, + wr_clr_addr, + w_addr_d, w_addr_q; + + logic art_check_req, art_check_gnt, + art_clr_req, art_clr_gnt, + art_set_req, art_set_gnt, + rd_clr_req, rd_clr_gnt, + wr_clr_req, wr_clr_gnt; + + logic art_check_res; + + logic b_excl_d, b_excl_q, + r_excl_d, r_excl_q; + + typedef enum logic [1:0] {R_IDLE, R_WAIT_AR, R_WAIT_R} r_state_t; + r_state_t r_state_d, r_state_q; + + typedef enum logic [2:0] {AW_IDLE, W_FORWARD, W_BYPASS, W_WAIT_ART_CLR, W_DROP, B_FORWARD, + B_INJECT} w_state_t; + w_state_t w_state_d, w_state_q; + + // AR and R Channel + + // Time-Invariant Signal Assignments + assign mst_ar_addr_o = slv_ar_addr_i; + assign mst_ar_prot_o = slv_ar_prot_i; + assign mst_ar_region_o = slv_ar_region_i; + assign mst_ar_len_o = slv_ar_len_i; + assign mst_ar_size_o = slv_ar_size_i; + assign mst_ar_burst_o = slv_ar_burst_i; + assign mst_ar_lock_o = 1'b0; + assign mst_ar_cache_o = slv_ar_cache_i; + assign mst_ar_qos_o = slv_ar_qos_i; + assign mst_ar_id_o = slv_ar_id_i; + assign mst_ar_user_o = slv_ar_user_i; + assign slv_r_data_o = mst_r_data_i; + assign slv_r_last_o = mst_r_last_i; + assign slv_r_id_o = mst_r_id_i; + assign slv_r_user_o = mst_r_user_i; + + // FSM for Time-Variant Signal Assignments + always_comb begin + mst_ar_valid_o = 1'b0; + slv_ar_ready_o = 1'b0; + mst_r_ready_o = 1'b0; + slv_r_valid_o = 1'b0; + slv_r_resp_o = '0; + art_set_addr = '0; + art_set_id = '0; + art_set_req = 1'b0; + rd_clr_addr = '0; + rd_clr_req = 1'b0; + r_excl_d = r_excl_q; + r_state_d = r_state_q; + + case (r_state_q) + + R_IDLE: begin + if (slv_ar_valid_i) begin + if (slv_ar_addr_i >= ADDR_BEGIN && slv_ar_addr_i <= ADDR_END && slv_ar_lock_i && + slv_ar_len_i == 8'h00) begin + // Inside exclusively-accessible address range and exclusive access and no + // burst + art_set_addr = slv_ar_addr_i; + art_set_id = slv_ar_id_i; + art_set_req = 1'b1; + r_excl_d = 1'b1; + if (art_set_gnt) begin + mst_ar_valid_o = 1'b1; + if (mst_ar_ready_i) begin + slv_ar_ready_o = 1'b1; + r_state_d = R_WAIT_R; + end else begin + r_state_d = R_WAIT_AR; + end + end + end else begin + // Outside exclusively-accessible address range or regular access or burst + r_excl_d = 1'b0; + mst_ar_valid_o = 1'b1; + if (mst_ar_ready_i) begin + slv_ar_ready_o = 1'b1; + r_state_d = R_WAIT_R; + end else begin + r_state_d = R_WAIT_AR; + end + end + end + end + + R_WAIT_AR: begin + mst_ar_valid_o = slv_ar_valid_i; + slv_ar_ready_o = mst_ar_ready_i; + if (mst_ar_ready_i && mst_ar_valid_o) begin + r_state_d = R_WAIT_R; + end + end + + R_WAIT_R: begin + mst_r_ready_o = slv_r_ready_i; + slv_r_valid_o = mst_r_valid_i; + if (mst_r_resp_i[1] == 1'b0) begin + slv_r_resp_o = {1'b0, r_excl_q}; + end else begin + slv_r_resp_o = mst_r_resp_i; + end + if (mst_r_valid_i && mst_r_ready_o && mst_r_last_i) begin + r_excl_d = 1'b0; + r_state_d = R_IDLE; + end + end + + default: begin + r_state_d = R_IDLE; + end + endcase + end + + // AW, W and B Channel + + // Time-Invariant Signal Assignments + assign mst_aw_addr_o = slv_aw_addr_i; + assign mst_aw_prot_o = slv_aw_prot_i; + assign mst_aw_region_o = slv_aw_region_i; + assign mst_aw_atop_o = slv_aw_atop_i; + assign mst_aw_len_o = slv_aw_len_i; + assign mst_aw_size_o = slv_aw_size_i; + assign mst_aw_burst_o = slv_aw_burst_i; + assign mst_aw_lock_o = 1'b0; + assign mst_aw_cache_o = slv_aw_cache_i; + assign mst_aw_qos_o = slv_aw_qos_i; + assign mst_aw_id_o = slv_aw_id_i; + assign mst_aw_user_o = slv_aw_user_i; + assign mst_w_data_o = slv_w_data_i; + assign mst_w_strb_o = slv_w_strb_i; + assign mst_w_user_o = slv_w_user_i; + assign mst_w_last_o = slv_w_last_i; + + always_comb begin + w_addr_d = w_addr_q; + w_id_d = w_id_q; + if (slv_aw_valid_i && slv_aw_ready_o) begin + w_addr_d = slv_aw_addr_i; + w_id_d = slv_aw_id_i; + end + end + + // FSM for Time-Variant Signal Assignments + always_comb begin + mst_aw_valid_o = 1'b0; + slv_aw_ready_o = 1'b0; + mst_w_valid_o = 1'b0; + slv_w_ready_o = 1'b0; + slv_b_valid_o = 1'b0; + mst_b_ready_o = 1'b0; + slv_b_resp_o = '0; + slv_b_id_o = '0; + slv_b_user_o = '0; + art_check_addr = '0; + art_check_id = '0; + art_check_req = 1'b0; + wr_clr_addr = '0; + wr_clr_req = 1'b0; + b_excl_d = b_excl_q; + w_state_d = w_state_q; + + case (w_state_q) + + AW_IDLE: begin + if (slv_aw_valid_i) begin + // New AW, and W channel is idle + if (slv_aw_addr_i >= ADDR_BEGIN && slv_aw_addr_i <= ADDR_END) begin + // Inside exclusively-accessible address range + if (slv_aw_lock_i && slv_aw_len_i == 8'h00) begin + // Exclusive access and no burst, so check if reservation exists + art_check_addr = slv_aw_addr_i; + art_check_id = slv_aw_id_i; + art_check_req = 1'b1; + if (art_check_gnt) begin + if (art_check_res) begin + // Yes, so forward downstream + mst_aw_valid_o = 1'b1; + if (mst_aw_ready_i) begin + slv_aw_ready_o = 1'b1; + b_excl_d = 1'b1; + w_state_d = W_FORWARD; + end + end else begin + // No, drop in W channel. + slv_aw_ready_o = 1'b1; + w_state_d = W_DROP; + end + end + end else begin + // Non-exclusive access or burst, so forward downstream + mst_aw_valid_o = 1'b1; + if (mst_aw_ready_i) begin + slv_aw_ready_o = 1'b1; + w_state_d = W_FORWARD; + end + end + end else begin + // Outside exclusively-accessible address range, so bypass any + // modifications. + mst_aw_valid_o = 1'b1; + slv_aw_ready_o = mst_aw_ready_i; + if (slv_aw_ready_o) begin + w_state_d = W_BYPASS; + end + end + end + end + + W_FORWARD: begin + mst_w_valid_o = slv_w_valid_i; + slv_w_ready_o = mst_w_ready_i; + if (slv_w_valid_i && slv_w_ready_o && slv_w_last_i) begin + wr_clr_addr = w_addr_q; + wr_clr_req = 1'b1; + if (wr_clr_gnt) begin + w_state_d = B_FORWARD; + end else begin + w_state_d = W_WAIT_ART_CLR; + end + end + end + + W_BYPASS: begin + mst_w_valid_o = slv_w_valid_i; + slv_w_ready_o = mst_w_ready_i; + if (slv_w_valid_i && slv_w_ready_o && slv_w_last_i) begin + w_state_d = B_FORWARD; + end + end + + W_WAIT_ART_CLR: begin + wr_clr_addr = w_addr_q; + wr_clr_req = 1'b1; + if (wr_clr_gnt) begin + w_state_d = B_FORWARD; + end + end + + W_DROP: begin + slv_w_ready_o = 1'b1; + if (slv_w_valid_i && slv_w_last_i) begin + w_state_d = B_INJECT; + end + end + + B_FORWARD: begin + mst_b_ready_o = slv_b_ready_i; + slv_b_valid_o = mst_b_valid_i; + slv_b_resp_o[1] = mst_b_resp_i[1]; + slv_b_resp_o[0] = (mst_b_resp_i[1] == 1'b0) ? b_excl_q : mst_b_resp_i[0]; + slv_b_user_o = mst_b_user_i; + slv_b_id_o = mst_b_id_i; + if (slv_b_valid_o && slv_b_ready_i) begin + b_excl_d = 1'b0; + w_state_d = AW_IDLE; + end + end + + B_INJECT: begin + slv_b_id_o = w_id_q; + slv_b_resp_o = 2'b00; + slv_b_valid_o = 1'b1; + if (slv_b_ready_i) begin + w_state_d = AW_IDLE; + end + end + + default: begin + w_state_d = AW_IDLE; + end + endcase + end + + // AXI Reservation Table + axi_res_tbl #( + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH) + ) i_art ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clr_addr_i (art_clr_addr), + .clr_req_i (art_clr_req), + .clr_gnt_o (art_clr_gnt), + .set_addr_i (art_set_addr), + .set_id_i (art_set_id), + .set_req_i (art_set_req), + .set_gnt_o (art_set_gnt), + .check_addr_i (art_check_addr), + .check_id_i (art_check_id), + .check_res_o (art_check_res), + .check_req_i (art_check_req), + .check_gnt_o (art_check_gnt) + ); + + // ART Clear Arbiter + stream_arbiter #( + .DATA_T (logic[AXI_ADDR_WIDTH-1:0]), + .N_INP (2) + ) i_non_excl_acc_arb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .inp_data_i ({rd_clr_addr, wr_clr_addr}), + .inp_valid_i ({rd_clr_req, wr_clr_req}), + .inp_ready_o ({rd_clr_gnt, wr_clr_gnt}), + .oup_data_o (art_clr_addr), + .oup_valid_o (art_clr_req), + .oup_ready_i (art_clr_gnt) + ); + + // Registers + always_ff @(posedge clk_i, negedge rst_ni) begin + if (~rst_ni) begin + b_excl_q <= 1'b0; + r_excl_q <= 1'b0; + r_state_q <= R_IDLE; + w_addr_q <= '0; + w_id_q <= '0; + w_state_q <= AW_IDLE; + end else begin + b_excl_q <= b_excl_d; + r_excl_q <= r_excl_d; + r_state_q <= r_state_d; + w_addr_q <= w_addr_d; + w_id_q <= w_id_d; + w_state_q <= w_state_d; + end + end + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (ADDR_END > ADDR_BEGIN) + else $fatal(1, "ADDR_END must be greater than ADDR_BEGIN!"); + assert (AXI_ADDR_WIDTH > 0) + else $fatal(1, "AXI_ADDR_WIDTH must be greater than 0!"); + assert (AXI_DATA_WIDTH > 0) + else $fatal(1, "AXI_DATA_WIDTH must be greater than 0!"); + assert (AXI_ID_WIDTH > 0) + else $fatal(1, "AXI_ID_WIDTH must be greater than 0!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv new file mode 100644 index 00000000..3eb409bf --- /dev/null +++ b/test/type_param/corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv @@ -0,0 +1,148 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Wrapper for the AXI RISC-V LR/SC Adapter that exposes AXI SystemVerilog interfaces. +// +// See the header of `axi_riscv_lrsc` for a description. +// +// Maintainer: Andreas Kurth + +module axi_riscv_lrsc_wrap #( + /// Exclusively-accessible address range (closed interval from ADDR_BEGIN to ADDR_END) + parameter longint unsigned ADDR_BEGIN = 0, + parameter longint unsigned ADDR_END = 0, + /// AXI Parameters + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + /// Derived Parameters (do NOT change manually!) + localparam int unsigned AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8 +) ( + input logic clk_i, + input logic rst_ni, + AXI_BUS.Master mst, + AXI_BUS.Slave slv +); + + axi_riscv_lrsc #( + .ADDR_BEGIN (ADDR_BEGIN), + .ADDR_END (ADDR_END), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ID_WIDTH (AXI_ID_WIDTH), + .AXI_USER_WIDTH (AXI_USER_WIDTH) + ) i_lrsc ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_aw_addr_i ( slv.aw_addr ), + .slv_aw_prot_i ( slv.aw_prot ), + .slv_aw_region_i ( slv.aw_region ), + .slv_aw_atop_i ( slv.aw_atop ), + .slv_aw_len_i ( slv.aw_len ), + .slv_aw_size_i ( slv.aw_size ), + .slv_aw_burst_i ( slv.aw_burst ), + .slv_aw_lock_i ( slv.aw_lock ), + .slv_aw_cache_i ( slv.aw_cache ), + .slv_aw_qos_i ( slv.aw_qos ), + .slv_aw_id_i ( slv.aw_id ), + .slv_aw_user_i ( slv.aw_user ), + .slv_aw_ready_o ( slv.aw_ready ), + .slv_aw_valid_i ( slv.aw_valid ), + .slv_ar_addr_i ( slv.ar_addr ), + .slv_ar_prot_i ( slv.ar_prot ), + .slv_ar_region_i ( slv.ar_region ), + .slv_ar_len_i ( slv.ar_len ), + .slv_ar_size_i ( slv.ar_size ), + .slv_ar_burst_i ( slv.ar_burst ), + .slv_ar_lock_i ( slv.ar_lock ), + .slv_ar_cache_i ( slv.ar_cache ), + .slv_ar_qos_i ( slv.ar_qos ), + .slv_ar_id_i ( slv.ar_id ), + .slv_ar_user_i ( slv.ar_user ), + .slv_ar_ready_o ( slv.ar_ready ), + .slv_ar_valid_i ( slv.ar_valid ), + .slv_w_data_i ( slv.w_data ), + .slv_w_strb_i ( slv.w_strb ), + .slv_w_user_i ( slv.w_user ), + .slv_w_last_i ( slv.w_last ), + .slv_w_ready_o ( slv.w_ready ), + .slv_w_valid_i ( slv.w_valid ), + .slv_r_data_o ( slv.r_data ), + .slv_r_resp_o ( slv.r_resp ), + .slv_r_last_o ( slv.r_last ), + .slv_r_id_o ( slv.r_id ), + .slv_r_user_o ( slv.r_user ), + .slv_r_ready_i ( slv.r_ready ), + .slv_r_valid_o ( slv.r_valid ), + .slv_b_resp_o ( slv.b_resp ), + .slv_b_id_o ( slv.b_id ), + .slv_b_user_o ( slv.b_user ), + .slv_b_ready_i ( slv.b_ready ), + .slv_b_valid_o ( slv.b_valid ), + .mst_aw_addr_o ( mst.aw_addr ), + .mst_aw_prot_o ( mst.aw_prot ), + .mst_aw_region_o ( mst.aw_region ), + .mst_aw_atop_o ( mst.aw_atop ), + .mst_aw_len_o ( mst.aw_len ), + .mst_aw_size_o ( mst.aw_size ), + .mst_aw_burst_o ( mst.aw_burst ), + .mst_aw_lock_o ( mst.aw_lock ), + .mst_aw_cache_o ( mst.aw_cache ), + .mst_aw_qos_o ( mst.aw_qos ), + .mst_aw_id_o ( mst.aw_id ), + .mst_aw_user_o ( mst.aw_user ), + .mst_aw_ready_i ( mst.aw_ready ), + .mst_aw_valid_o ( mst.aw_valid ), + .mst_ar_addr_o ( mst.ar_addr ), + .mst_ar_prot_o ( mst.ar_prot ), + .mst_ar_region_o ( mst.ar_region ), + .mst_ar_len_o ( mst.ar_len ), + .mst_ar_size_o ( mst.ar_size ), + .mst_ar_burst_o ( mst.ar_burst ), + .mst_ar_lock_o ( mst.ar_lock ), + .mst_ar_cache_o ( mst.ar_cache ), + .mst_ar_qos_o ( mst.ar_qos ), + .mst_ar_id_o ( mst.ar_id ), + .mst_ar_user_o ( mst.ar_user ), + .mst_ar_ready_i ( mst.ar_ready ), + .mst_ar_valid_o ( mst.ar_valid ), + .mst_w_data_o ( mst.w_data ), + .mst_w_strb_o ( mst.w_strb ), + .mst_w_user_o ( mst.w_user ), + .mst_w_last_o ( mst.w_last ), + .mst_w_ready_i ( mst.w_ready ), + .mst_w_valid_o ( mst.w_valid ), + .mst_r_data_i ( mst.r_data ), + .mst_r_resp_i ( mst.r_resp ), + .mst_r_last_i ( mst.r_last ), + .mst_r_id_i ( mst.r_id ), + .mst_r_user_i ( mst.r_user ), + .mst_r_ready_o ( mst.r_ready ), + .mst_r_valid_i ( mst.r_valid ), + .mst_b_resp_i ( mst.b_resp ), + .mst_b_id_i ( mst.b_id ), + .mst_b_user_i ( mst.b_user ), + .mst_b_ready_o ( mst.b_ready ), + .mst_b_valid_i ( mst.b_valid ) + ); + + // Validate parameters. +// pragma translate_off +`ifndef VERILATOR + initial begin: validate_params + assert (AXI_STRB_WIDTH == AXI_DATA_WIDTH/8) + else $fatal(1, "AXI_STRB_WIDTH must equal AXI_DATA_WIDTH/8!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/corev_apu/tb/ariane_axi_pkg.sv b/test/type_param/corev_apu/tb/ariane_axi_pkg.sv new file mode 100644 index 00000000..c750336a --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_axi_pkg.sv @@ -0,0 +1,109 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: ariane_axi_pkg.sv + * Author: Florian Zaruba + * Date: 17.8.2018 + * + * Description: Contains Ariane's AXI ports, does not contain user ports + */ + +package ariane_axi; + + localparam IdWidth = cva6_config_pkg::CVA6ConfigAxiIdWidth; // Recommended by AXI standard + localparam UserWidth = cva6_config_pkg::CVA6ConfigDataUserWidth; + localparam AddrWidth = cva6_config_pkg::CVA6ConfigAxiAddrWidth; + localparam DataWidth = cva6_config_pkg::CVA6ConfigAxiDataWidth; + localparam StrbWidth = DataWidth / 8; + + typedef logic [IdWidth-1:0] id_t; + typedef logic [AddrWidth-1:0] addr_t; + typedef logic [DataWidth-1:0] data_t; + typedef logic [StrbWidth-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; + + // AW Channel + typedef struct packed { + id_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + axi_pkg::atop_t atop; + user_t user; + } aw_chan_t; + + // W Channel - AXI4 doesn't define a wid + typedef struct packed { + data_t data; + strb_t strb; + logic last; + user_t user; + } w_chan_t; + + // B Channel + typedef struct packed { + id_t id; + axi_pkg::resp_t resp; + user_t user; + } b_chan_t; + + // AR Channel + typedef struct packed { + id_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + user_t user; + } ar_chan_t; + + // R Channel + typedef struct packed { + id_t id; + data_t data; + axi_pkg::resp_t resp; + logic last; + user_t user; + } r_chan_t; + + // Request/Response structs + typedef struct packed { + aw_chan_t aw; + logic aw_valid; + w_chan_t w; + logic w_valid; + logic b_ready; + ar_chan_t ar; + logic ar_valid; + logic r_ready; + } req_t; + + typedef struct packed { + logic aw_ready; + logic ar_ready; + logic w_ready; + logic b_valid; + b_chan_t b; + logic r_valid; + r_chan_t r; + } resp_t; + +endpackage diff --git a/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv b/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv new file mode 100644 index 00000000..378b0d66 --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_axi_soc_pkg.sv @@ -0,0 +1,102 @@ +/* Copyright 2018 ETH Zurich and University of Bologna. + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the “License”); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + * + * File: ariane_axi_soc_pkg.sv + * Author: Florian Zaruba + * Date: 17.8.2018 + * + * Description: Contains Ariane's AXI ports on SoC, does not contain user ports + */ + +package ariane_axi_soc; + + localparam UserWidth = ariane_axi::UserWidth; + localparam AddrWidth = ariane_axi::AddrWidth; + localparam DataWidth = ariane_axi::DataWidth; + localparam StrbWidth = DataWidth / 8; + localparam IdWidth = ariane_axi::IdWidth; + localparam IdWidthSlave = IdWidth + $clog2(ariane_soc::NrSlaves); + + typedef logic [IdWidth-1:0] id_t; + typedef logic [IdWidthSlave-1:0] id_slv_t; + typedef logic [AddrWidth-1:0] addr_t; + typedef logic [DataWidth-1:0] data_t; + typedef logic [StrbWidth-1:0] strb_t; + typedef logic [UserWidth-1:0] user_t; + + // AW Channel - Slave + typedef struct packed { + id_slv_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + axi_pkg::atop_t atop; + user_t user; + } aw_chan_slv_t; + + // B Channel - Slave + typedef struct packed { + id_slv_t id; + axi_pkg::resp_t resp; + user_t user; + } b_chan_slv_t; + + // AR Channel - Slave + typedef struct packed { + id_slv_t id; + addr_t addr; + axi_pkg::len_t len; + axi_pkg::size_t size; + axi_pkg::burst_t burst; + logic lock; + axi_pkg::cache_t cache; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + user_t user; + } ar_chan_slv_t; + + // R Channel - Slave + typedef struct packed { + id_slv_t id; + data_t data; + axi_pkg::resp_t resp; + logic last; + user_t user; + } r_chan_slv_t; + + typedef struct packed { + aw_chan_slv_t aw; + logic aw_valid; + ariane_axi::w_chan_t w; + logic w_valid; + logic b_ready; + ar_chan_slv_t ar; + logic ar_valid; + logic r_ready; + } req_slv_t; + + typedef struct packed { + logic aw_ready; + logic ar_ready; + logic w_ready; + logic b_valid; + b_chan_slv_t b; + logic r_valid; + r_chan_slv_t r; + } resp_slv_t; + +endpackage diff --git a/test/type_param/corev_apu/tb/ariane_peripherals.sv b/test/type_param/corev_apu/tb/ariane_peripherals.sv new file mode 100644 index 00000000..9865af46 --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_peripherals.sv @@ -0,0 +1,619 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +`include "register_interface/assign.svh" +`include "register_interface/typedef.svh" + +// Xilinx Peripherals +module ariane_peripherals #( + parameter int AxiAddrWidth = -1, + parameter int AxiDataWidth = -1, + parameter int AxiIdWidth = -1, + parameter int AxiUserWidth = 1, + parameter bit InclUART = 1, + parameter bit InclSPI = 0, + parameter bit InclEthernet = 0, + parameter bit InclGPIO = 0, + parameter bit InclTimer = 1 +) ( + input logic clk_i , // Clock + input logic rst_ni , // Asynchronous reset active low + AXI_BUS.Slave plic , + AXI_BUS.Slave uart , + AXI_BUS.Slave spi , + AXI_BUS.Slave ethernet , + AXI_BUS.Slave timer , + output logic [1:0] irq_o , + // UART + input logic rx_i , + output logic tx_o , + // Ethernet + input wire eth_txck , + input wire eth_rxck , + input wire eth_rxctl , + input wire [3:0] eth_rxd , + output wire eth_rst_n , + output wire eth_tx_en , + output wire [3:0] eth_txd , + inout wire phy_mdio , + output logic eth_mdc , + // MDIO Interface + inout mdio , + output mdc , + // SPI + output logic spi_clk_o , + output logic spi_mosi , + input logic spi_miso , + output logic spi_ss +); + + // --------------- + // 1. PLIC + // --------------- + logic [ariane_soc::NumSources-1:0] irq_sources; + + // Unused interrupt sources + assign irq_sources[ariane_soc::NumSources-1:7] = '0; + + REG_BUS #( + .ADDR_WIDTH ( 32 ), + .DATA_WIDTH ( 32 ) + ) reg_bus (clk_i); + + logic plic_penable; + logic plic_pwrite; + logic [31:0] plic_paddr; + logic plic_psel; + logic [31:0] plic_pwdata; + logic [31:0] plic_prdata; + logic plic_pready; + logic plic_pslverr; + + axi2apb_64_32 #( + .AXI4_ADDRESS_WIDTH ( AxiAddrWidth ), + .AXI4_RDATA_WIDTH ( AxiDataWidth ), + .AXI4_WDATA_WIDTH ( AxiDataWidth ), + .AXI4_ID_WIDTH ( AxiIdWidth ), + .AXI4_USER_WIDTH ( AxiUserWidth ), + .BUFF_DEPTH_SLAVE ( 2 ), + .APB_ADDR_WIDTH ( 32 ) + ) i_axi2apb_64_32_plic ( + .ACLK ( clk_i ), + .ARESETn ( rst_ni ), + .test_en_i ( 1'b0 ), + .AWID_i ( plic.aw_id ), + .AWADDR_i ( plic.aw_addr ), + .AWLEN_i ( plic.aw_len ), + .AWSIZE_i ( plic.aw_size ), + .AWBURST_i ( plic.aw_burst ), + .AWLOCK_i ( plic.aw_lock ), + .AWCACHE_i ( plic.aw_cache ), + .AWPROT_i ( plic.aw_prot ), + .AWREGION_i( plic.aw_region ), + .AWUSER_i ( plic.aw_user ), + .AWQOS_i ( plic.aw_qos ), + .AWVALID_i ( plic.aw_valid ), + .AWREADY_o ( plic.aw_ready ), + .WDATA_i ( plic.w_data ), + .WSTRB_i ( plic.w_strb ), + .WLAST_i ( plic.w_last ), + .WUSER_i ( plic.w_user ), + .WVALID_i ( plic.w_valid ), + .WREADY_o ( plic.w_ready ), + .BID_o ( plic.b_id ), + .BRESP_o ( plic.b_resp ), + .BVALID_o ( plic.b_valid ), + .BUSER_o ( plic.b_user ), + .BREADY_i ( plic.b_ready ), + .ARID_i ( plic.ar_id ), + .ARADDR_i ( plic.ar_addr ), + .ARLEN_i ( plic.ar_len ), + .ARSIZE_i ( plic.ar_size ), + .ARBURST_i ( plic.ar_burst ), + .ARLOCK_i ( plic.ar_lock ), + .ARCACHE_i ( plic.ar_cache ), + .ARPROT_i ( plic.ar_prot ), + .ARREGION_i( plic.ar_region ), + .ARUSER_i ( plic.ar_user ), + .ARQOS_i ( plic.ar_qos ), + .ARVALID_i ( plic.ar_valid ), + .ARREADY_o ( plic.ar_ready ), + .RID_o ( plic.r_id ), + .RDATA_o ( plic.r_data ), + .RRESP_o ( plic.r_resp ), + .RLAST_o ( plic.r_last ), + .RUSER_o ( plic.r_user ), + .RVALID_o ( plic.r_valid ), + .RREADY_i ( plic.r_ready ), + .PENABLE ( plic_penable ), + .PWRITE ( plic_pwrite ), + .PADDR ( plic_paddr ), + .PSEL ( plic_psel ), + .PWDATA ( plic_pwdata ), + .PRDATA ( plic_prdata ), + .PREADY ( plic_pready ), + .PSLVERR ( plic_pslverr ) + ); + + apb_to_reg i_apb_to_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .penable_i ( plic_penable ), + .pwrite_i ( plic_pwrite ), + .paddr_i ( plic_paddr ), + .psel_i ( plic_psel ), + .pwdata_i ( plic_pwdata ), + .prdata_o ( plic_prdata ), + .pready_o ( plic_pready ), + .pslverr_o ( plic_pslverr ), + .reg_o ( reg_bus ) + ); + + // define reg type according to REG_BUS above + `REG_BUS_TYPEDEF_ALL(plic, logic[31:0], logic[31:0], logic[3:0]) + plic_req_t plic_req; + plic_rsp_t plic_rsp; + + // assign REG_BUS.out to (req_t, rsp_t) pair + `REG_BUS_ASSIGN_TO_REQ(plic_req, reg_bus) + `REG_BUS_ASSIGN_FROM_RSP(reg_bus, plic_rsp) + + plic_top #( + .N_SOURCE ( ariane_soc::NumSources ), + .N_TARGET ( ariane_soc::NumTargets ), + .MAX_PRIO ( ariane_soc::MaxPriority ), + .reg_req_t ( plic_req_t ), + .reg_rsp_t ( plic_rsp_t ) + ) i_plic ( + .clk_i, + .rst_ni, + .req_i ( plic_req ), + .resp_o ( plic_rsp ), + .le_i ( '0 ), // 0:level 1:edge + .irq_sources_i ( irq_sources ), + .eip_targets_o ( irq_o ) + ); + + // --------------- + // 2. UART + // --------------- + logic uart_penable; + logic uart_pwrite; + logic [31:0] uart_paddr; + logic uart_psel; + logic [31:0] uart_pwdata; + logic [31:0] uart_prdata; + logic uart_pready; + logic uart_pslverr; + + axi2apb_64_32 #( + .AXI4_ADDRESS_WIDTH ( AxiAddrWidth ), + .AXI4_RDATA_WIDTH ( AxiDataWidth ), + .AXI4_WDATA_WIDTH ( AxiDataWidth ), + .AXI4_ID_WIDTH ( AxiIdWidth ), + .AXI4_USER_WIDTH ( AxiUserWidth ), + .BUFF_DEPTH_SLAVE ( 2 ), + .APB_ADDR_WIDTH ( 32 ) + ) i_axi2apb_64_32_uart ( + .ACLK ( clk_i ), + .ARESETn ( rst_ni ), + .test_en_i ( 1'b0 ), + .AWID_i ( uart.aw_id ), + .AWADDR_i ( uart.aw_addr ), + .AWLEN_i ( uart.aw_len ), + .AWSIZE_i ( uart.aw_size ), + .AWBURST_i ( uart.aw_burst ), + .AWLOCK_i ( uart.aw_lock ), + .AWCACHE_i ( uart.aw_cache ), + .AWPROT_i ( uart.aw_prot ), + .AWREGION_i( uart.aw_region ), + .AWUSER_i ( uart.aw_user ), + .AWQOS_i ( uart.aw_qos ), + .AWVALID_i ( uart.aw_valid ), + .AWREADY_o ( uart.aw_ready ), + .WDATA_i ( uart.w_data ), + .WSTRB_i ( uart.w_strb ), + .WLAST_i ( uart.w_last ), + .WUSER_i ( uart.w_user ), + .WVALID_i ( uart.w_valid ), + .WREADY_o ( uart.w_ready ), + .BID_o ( uart.b_id ), + .BRESP_o ( uart.b_resp ), + .BVALID_o ( uart.b_valid ), + .BUSER_o ( uart.b_user ), + .BREADY_i ( uart.b_ready ), + .ARID_i ( uart.ar_id ), + .ARADDR_i ( uart.ar_addr ), + .ARLEN_i ( uart.ar_len ), + .ARSIZE_i ( uart.ar_size ), + .ARBURST_i ( uart.ar_burst ), + .ARLOCK_i ( uart.ar_lock ), + .ARCACHE_i ( uart.ar_cache ), + .ARPROT_i ( uart.ar_prot ), + .ARREGION_i( uart.ar_region ), + .ARUSER_i ( uart.ar_user ), + .ARQOS_i ( uart.ar_qos ), + .ARVALID_i ( uart.ar_valid ), + .ARREADY_o ( uart.ar_ready ), + .RID_o ( uart.r_id ), + .RDATA_o ( uart.r_data ), + .RRESP_o ( uart.r_resp ), + .RLAST_o ( uart.r_last ), + .RUSER_o ( uart.r_user ), + .RVALID_o ( uart.r_valid ), + .RREADY_i ( uart.r_ready ), + .PENABLE ( uart_penable ), + .PWRITE ( uart_pwrite ), + .PADDR ( uart_paddr ), + .PSEL ( uart_psel ), + .PWDATA ( uart_pwdata ), + .PRDATA ( uart_prdata ), + .PREADY ( uart_pready ), + .PSLVERR ( uart_pslverr ) + ); + + if (InclUART) begin : gen_uart + apb_uart i_apb_uart ( + .CLK ( clk_i ), + .RSTN ( rst_ni ), + .PSEL ( uart_psel ), + .PENABLE ( uart_penable ), + .PWRITE ( uart_pwrite ), + .PADDR ( uart_paddr[4:2] ), + .PWDATA ( uart_pwdata ), + .PRDATA ( uart_prdata ), + .PREADY ( uart_pready ), + .PSLVERR ( uart_pslverr ), + .INT ( irq_sources[0] ), + .OUT1N ( ), // keep open + .OUT2N ( ), // keep open + .RTSN ( ), // no flow control + .DTRN ( ), // no flow control + .CTSN ( 1'b0 ), + .DSRN ( 1'b0 ), + .DCDN ( 1'b0 ), + .RIN ( 1'b0 ), + .SIN ( rx_i ), + .SOUT ( tx_o ) + ); + end else begin + assign irq_sources[0] = 1'b0; + /* pragma translate_off */ + mock_uart i_mock_uart ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .penable_i ( uart_penable ), + .pwrite_i ( uart_pwrite ), + .paddr_i ( uart_paddr ), + .psel_i ( uart_psel ), + .pwdata_i ( uart_pwdata ), + .prdata_o ( uart_prdata ), + .pready_o ( uart_pready ), + .pslverr_o ( uart_pslverr ) + ); + /* pragma translate_on */ + end + + // --------------- + // 3. SPI + // --------------- + if (InclSPI) begin : gen_spi + logic [31:0] s_axi_spi_awaddr; + logic [7:0] s_axi_spi_awlen; + logic [2:0] s_axi_spi_awsize; + logic [1:0] s_axi_spi_awburst; + logic [0:0] s_axi_spi_awlock; + logic [3:0] s_axi_spi_awcache; + logic [2:0] s_axi_spi_awprot; + logic [3:0] s_axi_spi_awregion; + logic [3:0] s_axi_spi_awqos; + logic s_axi_spi_awvalid; + logic s_axi_spi_awready; + logic [31:0] s_axi_spi_wdata; + logic [3:0] s_axi_spi_wstrb; + logic s_axi_spi_wlast; + logic s_axi_spi_wvalid; + logic s_axi_spi_wready; + logic [1:0] s_axi_spi_bresp; + logic s_axi_spi_bvalid; + logic s_axi_spi_bready; + logic [31:0] s_axi_spi_araddr; + logic [7:0] s_axi_spi_arlen; + logic [2:0] s_axi_spi_arsize; + logic [1:0] s_axi_spi_arburst; + logic [0:0] s_axi_spi_arlock; + logic [3:0] s_axi_spi_arcache; + logic [2:0] s_axi_spi_arprot; + logic [3:0] s_axi_spi_arregion; + logic [3:0] s_axi_spi_arqos; + logic s_axi_spi_arvalid; + logic s_axi_spi_arready; + logic [31:0] s_axi_spi_rdata; + logic [1:0] s_axi_spi_rresp; + logic s_axi_spi_rlast; + logic s_axi_spi_rvalid; + logic s_axi_spi_rready; + + xlnx_axi_clock_converter i_xlnx_axi_clock_converter_spi ( + .s_axi_aclk ( clk_i ), + .s_axi_aresetn ( rst_ni ), + + .s_axi_awid ( spi.aw_id ), + .s_axi_awaddr ( spi.aw_addr[31:0] ), + .s_axi_awlen ( spi.aw_len ), + .s_axi_awsize ( spi.aw_size ), + .s_axi_awburst ( spi.aw_burst ), + .s_axi_awlock ( spi.aw_lock ), + .s_axi_awcache ( spi.aw_cache ), + .s_axi_awprot ( spi.aw_prot ), + .s_axi_awregion ( spi.aw_region ), + .s_axi_awqos ( spi.aw_qos ), + .s_axi_awvalid ( spi.aw_valid ), + .s_axi_awready ( spi.aw_ready ), + .s_axi_wdata ( spi.w_data ), + .s_axi_wstrb ( spi.w_strb ), + .s_axi_wlast ( spi.w_last ), + .s_axi_wvalid ( spi.w_valid ), + .s_axi_wready ( spi.w_ready ), + .s_axi_bid ( spi.b_id ), + .s_axi_bresp ( spi.b_resp ), + .s_axi_bvalid ( spi.b_valid ), + .s_axi_bready ( spi.b_ready ), + .s_axi_arid ( spi.ar_id ), + .s_axi_araddr ( spi.ar_addr[31:0] ), + .s_axi_arlen ( spi.ar_len ), + .s_axi_arsize ( spi.ar_size ), + .s_axi_arburst ( spi.ar_burst ), + .s_axi_arlock ( spi.ar_lock ), + .s_axi_arcache ( spi.ar_cache ), + .s_axi_arprot ( spi.ar_prot ), + .s_axi_arregion ( spi.ar_region ), + .s_axi_arqos ( spi.ar_qos ), + .s_axi_arvalid ( spi.ar_valid ), + .s_axi_arready ( spi.ar_ready ), + .s_axi_rid ( spi.r_id ), + .s_axi_rdata ( spi.r_data ), + .s_axi_rresp ( spi.r_resp ), + .s_axi_rlast ( spi.r_last ), + .s_axi_rvalid ( spi.r_valid ), + .s_axi_rready ( spi.r_ready ), + + .m_axi_awaddr ( s_axi_spi_awaddr ), + .m_axi_awlen ( s_axi_spi_awlen ), + .m_axi_awsize ( s_axi_spi_awsize ), + .m_axi_awburst ( s_axi_spi_awburst ), + .m_axi_awlock ( s_axi_spi_awlock ), + .m_axi_awcache ( s_axi_spi_awcache ), + .m_axi_awprot ( s_axi_spi_awprot ), + .m_axi_awregion ( s_axi_spi_awregion ), + .m_axi_awqos ( s_axi_spi_awqos ), + .m_axi_awvalid ( s_axi_spi_awvalid ), + .m_axi_awready ( s_axi_spi_awready ), + .m_axi_wdata ( s_axi_spi_wdata ), + .m_axi_wstrb ( s_axi_spi_wstrb ), + .m_axi_wlast ( s_axi_spi_wlast ), + .m_axi_wvalid ( s_axi_spi_wvalid ), + .m_axi_wready ( s_axi_spi_wready ), + .m_axi_bresp ( s_axi_spi_bresp ), + .m_axi_bvalid ( s_axi_spi_bvalid ), + .m_axi_bready ( s_axi_spi_bready ), + .m_axi_araddr ( s_axi_spi_araddr ), + .m_axi_arlen ( s_axi_spi_arlen ), + .m_axi_arsize ( s_axi_spi_arsize ), + .m_axi_arburst ( s_axi_spi_arburst ), + .m_axi_arlock ( s_axi_spi_arlock ), + .m_axi_arcache ( s_axi_spi_arcache ), + .m_axi_arprot ( s_axi_spi_arprot ), + .m_axi_arregion ( s_axi_spi_arregion ), + .m_axi_arqos ( s_axi_spi_arqos ), + .m_axi_arvalid ( s_axi_spi_arvalid ), + .m_axi_arready ( s_axi_spi_arready ), + .m_axi_rdata ( s_axi_spi_rdata ), + .m_axi_rresp ( s_axi_spi_rresp ), + .m_axi_rlast ( s_axi_spi_rlast ), + .m_axi_rvalid ( s_axi_spi_rvalid ), + .m_axi_rready ( s_axi_spi_rready ) + ); + + xlnx_axi_quad_spi i_xlnx_axi_quad_spi ( + .ext_spi_clk ( clk_i ), + .s_axi4_aclk ( clk_i ), + .s_axi4_aresetn ( rst_ni ), + .s_axi4_awaddr ( s_axi_spi_awaddr[23:0] ), + .s_axi4_awlen ( s_axi_spi_awlen ), + .s_axi4_awsize ( s_axi_spi_awsize ), + .s_axi4_awburst ( s_axi_spi_awburst ), + .s_axi4_awlock ( s_axi_spi_awlock ), + .s_axi4_awcache ( s_axi_spi_awcache ), + .s_axi4_awprot ( s_axi_spi_awprot ), + .s_axi4_awvalid ( s_axi_spi_awvalid ), + .s_axi4_awready ( s_axi_spi_awready ), + .s_axi4_wdata ( s_axi_spi_wdata ), + .s_axi4_wstrb ( s_axi_spi_wstrb ), + .s_axi4_wlast ( s_axi_spi_wlast ), + .s_axi4_wvalid ( s_axi_spi_wvalid ), + .s_axi4_wready ( s_axi_spi_wready ), + .s_axi4_bresp ( s_axi_spi_bresp ), + .s_axi4_bvalid ( s_axi_spi_bvalid ), + .s_axi4_bready ( s_axi_spi_bready ), + .s_axi4_araddr ( s_axi_spi_araddr[23:0] ), + .s_axi4_arlen ( s_axi_spi_arlen ), + .s_axi4_arsize ( s_axi_spi_arsize ), + .s_axi4_arburst ( s_axi_spi_arburst ), + .s_axi4_arlock ( s_axi_spi_arlock ), + .s_axi4_arcache ( s_axi_spi_arcache ), + .s_axi4_arprot ( s_axi_spi_arprot ), + .s_axi4_arvalid ( s_axi_spi_arvalid ), + .s_axi4_arready ( s_axi_spi_arready ), + .s_axi4_rdata ( s_axi_spi_rdata ), + .s_axi4_rresp ( s_axi_spi_rresp ), + .s_axi4_rlast ( s_axi_spi_rlast ), + .s_axi4_rvalid ( s_axi_spi_rvalid ), + .s_axi4_rready ( s_axi_spi_rready ), + + .io0_i ( '0 ), + .io0_o ( spi_mosi ), + .io0_t ( '0 ), + .io1_i ( spi_miso ), + .io1_o ( ), + .io1_t ( '0 ), + .ss_i ( '0 ), + .ss_o ( spi_ss ), + .ss_t ( '0 ), + .sck_o ( spi_clk_o ), + .sck_i ( '0 ), + .sck_t ( ), + .ip2intc_irpt ( irq_sources[1] ) + // .ip2intc_irpt ( irq_sources[1] ) + ); + // assign irq_sources [1] = 1'b0; + end else begin + assign spi_clk_o = 1'b0; + assign spi_mosi = 1'b0; + assign spi_ss = 1'b0; + + assign irq_sources [1] = 1'b0; + assign spi.aw_ready = 1'b1; + assign spi.ar_ready = 1'b1; + assign spi.w_ready = 1'b1; + + assign spi.b_valid = spi.aw_valid; + assign spi.b_id = spi.aw_id; + assign spi.b_resp = axi_pkg::RESP_SLVERR; + assign spi.b_user = '0; + + assign spi.r_valid = spi.ar_valid; + assign spi.r_resp = axi_pkg::RESP_SLVERR; + assign spi.r_data = 'hdeadbeef; + assign spi.r_last = 1'b1; + end + + + // --------------- + // 4. Ethernet + // --------------- + if (0) + begin + end + else + begin + assign irq_sources [2] = 1'b0; + assign ethernet.aw_ready = 1'b1; + assign ethernet.ar_ready = 1'b1; + assign ethernet.w_ready = 1'b1; + + assign ethernet.b_valid = ethernet.aw_valid; + assign ethernet.b_id = ethernet.aw_id; + assign ethernet.b_resp = axi_pkg::RESP_SLVERR; + assign ethernet.b_user = '0; + + assign ethernet.r_valid = ethernet.ar_valid; + assign ethernet.r_resp = axi_pkg::RESP_SLVERR; + assign ethernet.r_data = 'hdeadbeef; + assign ethernet.r_last = 1'b1; + end + + // --------------- + // 5. Timer + // --------------- + if (InclTimer) begin : gen_timer + logic timer_penable; + logic timer_pwrite; + logic [31:0] timer_paddr; + logic timer_psel; + logic [31:0] timer_pwdata; + logic [31:0] timer_prdata; + logic timer_pready; + logic timer_pslverr; + + axi2apb_64_32 #( + .AXI4_ADDRESS_WIDTH ( AxiAddrWidth ), + .AXI4_RDATA_WIDTH ( AxiDataWidth ), + .AXI4_WDATA_WIDTH ( AxiDataWidth ), + .AXI4_ID_WIDTH ( AxiIdWidth ), + .AXI4_USER_WIDTH ( AxiUserWidth ), + .BUFF_DEPTH_SLAVE ( 2 ), + .APB_ADDR_WIDTH ( 32 ) + ) i_axi2apb_64_32_timer ( + .ACLK ( clk_i ), + .ARESETn ( rst_ni ), + .test_en_i ( 1'b0 ), + .AWID_i ( timer.aw_id ), + .AWADDR_i ( timer.aw_addr ), + .AWLEN_i ( timer.aw_len ), + .AWSIZE_i ( timer.aw_size ), + .AWBURST_i ( timer.aw_burst ), + .AWLOCK_i ( timer.aw_lock ), + .AWCACHE_i ( timer.aw_cache ), + .AWPROT_i ( timer.aw_prot ), + .AWREGION_i( timer.aw_region ), + .AWUSER_i ( timer.aw_user ), + .AWQOS_i ( timer.aw_qos ), + .AWVALID_i ( timer.aw_valid ), + .AWREADY_o ( timer.aw_ready ), + .WDATA_i ( timer.w_data ), + .WSTRB_i ( timer.w_strb ), + .WLAST_i ( timer.w_last ), + .WUSER_i ( timer.w_user ), + .WVALID_i ( timer.w_valid ), + .WREADY_o ( timer.w_ready ), + .BID_o ( timer.b_id ), + .BRESP_o ( timer.b_resp ), + .BVALID_o ( timer.b_valid ), + .BUSER_o ( timer.b_user ), + .BREADY_i ( timer.b_ready ), + .ARID_i ( timer.ar_id ), + .ARADDR_i ( timer.ar_addr ), + .ARLEN_i ( timer.ar_len ), + .ARSIZE_i ( timer.ar_size ), + .ARBURST_i ( timer.ar_burst ), + .ARLOCK_i ( timer.ar_lock ), + .ARCACHE_i ( timer.ar_cache ), + .ARPROT_i ( timer.ar_prot ), + .ARREGION_i( timer.ar_region ), + .ARUSER_i ( timer.ar_user ), + .ARQOS_i ( timer.ar_qos ), + .ARVALID_i ( timer.ar_valid ), + .ARREADY_o ( timer.ar_ready ), + .RID_o ( timer.r_id ), + .RDATA_o ( timer.r_data ), + .RRESP_o ( timer.r_resp ), + .RLAST_o ( timer.r_last ), + .RUSER_o ( timer.r_user ), + .RVALID_o ( timer.r_valid ), + .RREADY_i ( timer.r_ready ), + .PENABLE ( timer_penable ), + .PWRITE ( timer_pwrite ), + .PADDR ( timer_paddr ), + .PSEL ( timer_psel ), + .PWDATA ( timer_pwdata ), + .PRDATA ( timer_prdata ), + .PREADY ( timer_pready ), + .PSLVERR ( timer_pslverr ) + ); + + apb_timer #( + .APB_ADDR_WIDTH ( 32 ), + .TIMER_CNT ( 2 ) + ) i_timer ( + .HCLK ( clk_i ), + .HRESETn ( rst_ni ), + .PSEL ( timer_psel ), + .PENABLE ( timer_penable ), + .PWRITE ( timer_pwrite ), + .PADDR ( timer_paddr ), + .PWDATA ( timer_pwdata ), + .PRDATA ( timer_prdata ), + .PREADY ( timer_pready ), + .PSLVERR ( timer_pslverr ), + .irq_o ( irq_sources[6:3] ) + ); + end +endmodule diff --git a/test/type_param/corev_apu/tb/ariane_soc_pkg.sv b/test/type_param/corev_apu/tb/ariane_soc_pkg.sv new file mode 100644 index 00000000..cc57f807 --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_soc_pkg.sv @@ -0,0 +1,68 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Description: Contains SoC information as constants +package ariane_soc; + // M-Mode Hart, S-Mode Hart + localparam int unsigned NumTargets = 2; + // Uart, SPI, Ethernet, reserved + localparam int unsigned NumSources = 30; + localparam int unsigned MaxPriority = 7; + + localparam NrSlaves = 2; // actually masters, but slaves on the crossbar + + typedef enum int unsigned { + DRAM = 0, + GPIO = 1, + Ethernet = 2, + SPI = 3, + Timer = 4, + UART = 5, + PLIC = 6, + CLINT = 7, + ROM = 8, + Debug = 9 + } axi_slaves_t; + + localparam NB_PERIPHERALS = Debug + 1; + + + localparam logic[63:0] DebugLength = 64'h1000; + localparam logic[63:0] ROMLength = 64'h10000; + localparam logic[63:0] CLINTLength = 64'hC0000; + localparam logic[63:0] PLICLength = 64'h3FF_FFFF; + localparam logic[63:0] UARTLength = 64'h1000; + localparam logic[63:0] TimerLength = 64'h1000; + localparam logic[63:0] SPILength = 64'h800000; + localparam logic[63:0] EthernetLength = 64'h10000; + localparam logic[63:0] GPIOLength = 64'h1000; + localparam logic[63:0] DRAMLength = 64'h40000000; // 1GByte of DDR (split between two chips on Genesys2) + localparam logic[63:0] SRAMLength = 64'h1800000; // 24 MByte of SRAM + // Instantiate AXI protocol checkers + localparam bit GenProtocolChecker = 1'b0; + + typedef enum logic [63:0] { + DebugBase = 64'h0000_0000, + ROMBase = 64'h0001_0000, + CLINTBase = 64'h0200_0000, + PLICBase = 64'h0C00_0000, + UARTBase = 64'h1000_0000, + TimerBase = 64'h1800_0000, + SPIBase = 64'h2000_0000, + EthernetBase = 64'h3000_0000, + GPIOBase = 64'h4000_0000, + DRAMBase = 64'h8000_0000 + } soc_bus_start_t; + + localparam NrRegion = 1; + localparam logic [NrRegion-1:0][NB_PERIPHERALS-1:0] ValidRule = {{NrRegion * NB_PERIPHERALS}{1'b1}}; + +endpackage diff --git a/test/type_param/corev_apu/tb/ariane_testharness.sv b/test/type_param/corev_apu/tb/ariane_testharness.sv new file mode 100644 index 00000000..3530da0a --- /dev/null +++ b/test/type_param/corev_apu/tb/ariane_testharness.sv @@ -0,0 +1,807 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 19.03.2017 +// Description: Test-harness for Ariane +// Instantiates an AXI-Bus and memories + +`include "axi/assign.svh" + +module ariane_testharness #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg, + parameter bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace), + // + parameter int unsigned AXI_USER_WIDTH = ariane_pkg::AXI_USER_WIDTH, + parameter int unsigned AXI_USER_EN = ariane_pkg::AXI_USER_EN, + parameter int unsigned AXI_ADDRESS_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter bit InclSimDTM = 1'b1, + parameter int unsigned NUM_WORDS = 2**25, // memory size + parameter bit StallRandomOutput = 1'b0, + parameter bit StallRandomInput = 1'b0 +) ( + input logic clk_i, + input logic rtc_i, + input logic rst_ni, + output logic [31:0] exit_o +); + + localparam [7:0] hart_id = '0; + + localparam type rvfi_instr_t = struct packed { + logic [config_pkg::NRET-1:0] valid; + logic [config_pkg::NRET*64-1:0] order; + logic [config_pkg::NRET*config_pkg::ILEN-1:0] insn; + logic [config_pkg::NRET-1:0] trap; + logic [config_pkg::NRET*riscv::XLEN-1:0] cause; + logic [config_pkg::NRET-1:0] halt; + logic [config_pkg::NRET-1:0] intr; + logic [config_pkg::NRET*2-1:0] mode; + logic [config_pkg::NRET*2-1:0] ixl; + logic [config_pkg::NRET*5-1:0] rs1_addr; + logic [config_pkg::NRET*5-1:0] rs2_addr; + logic [config_pkg::NRET*riscv::XLEN-1:0] rs1_rdata; + logic [config_pkg::NRET*riscv::XLEN-1:0] rs2_rdata; + logic [config_pkg::NRET*5-1:0] rd_addr; + logic [config_pkg::NRET*riscv::XLEN-1:0] rd_wdata; + logic [config_pkg::NRET*riscv::XLEN-1:0] pc_rdata; + logic [config_pkg::NRET*riscv::XLEN-1:0] pc_wdata; + logic [config_pkg::NRET*riscv::VLEN-1:0] mem_addr; + logic [config_pkg::NRET*riscv::PLEN-1:0] mem_paddr; + logic [config_pkg::NRET*(riscv::XLEN/8)-1:0] mem_rmask; + logic [config_pkg::NRET*(riscv::XLEN/8)-1:0] mem_wmask; + logic [config_pkg::NRET*riscv::XLEN-1:0] mem_rdata; + logic [config_pkg::NRET*riscv::XLEN-1:0] mem_wdata; + }; + + localparam type rvfi_probes_t = struct packed { + logic [ariane_pkg::TRANS_ID_BITS-1:0] issue_pointer; + logic [CVA6Cfg.NrCommitPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] commit_pointer; + logic flush_unissued_instr; + logic decoded_instr_valid; + logic decoded_instr_ack; + logic flush; + logic issue_instr_ack; + logic fetch_entry_valid; + logic [31:0] instruction; + logic is_compressed; + riscv::xlen_t rs1_forwarding; + riscv::xlen_t rs2_forwarding; + ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr; + ariane_pkg::exception_t ex_commit; + riscv::priv_lvl_t priv_lvl; + ariane_pkg::lsu_ctrl_t lsu_ctrl; + logic [((CVA6Cfg.CvxifEn || CVA6Cfg.RVV) ? 5 : 4)-1:0][riscv::XLEN-1:0] wbdata; + logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack; + logic [riscv::PLEN-1:0] mem_paddr; + logic debug_mode; + logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata; + }; + + // disable test-enable + logic test_en; + logic ndmreset; + logic ndmreset_n; + logic debug_req_core; + + int jtag_enable; + logic init_done; + logic [31:0] jtag_exit, dmi_exit; + logic [31:0] rvfi_exit; + + logic jtag_TCK; + logic jtag_TMS; + logic jtag_TDI; + logic jtag_TRSTn; + logic jtag_TDO_data; + logic jtag_TDO_driven; + + logic debug_req_valid; + logic debug_req_ready; + logic debug_resp_valid; + logic debug_resp_ready; + + logic jtag_req_valid; + logic [6:0] jtag_req_bits_addr; + logic [1:0] jtag_req_bits_op; + logic [31:0] jtag_req_bits_data; + logic jtag_resp_ready; + logic jtag_resp_valid; + + logic dmi_req_valid; + logic dmi_resp_ready; + logic dmi_resp_valid; + + dm::dmi_req_t jtag_dmi_req; + dm::dmi_req_t dmi_req; + + dm::dmi_req_t debug_req; + dm::dmi_resp_t debug_resp; + + assign test_en = 1'b0; + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidth ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) slave[ariane_soc::NrSlaves-1:0](); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) master[ariane_soc::NB_PERIPHERALS-1:0](); + + rstgen i_rstgen_main ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni & (~ndmreset) ), + .test_mode_i ( test_en ), + .rst_no ( ndmreset_n ), + .init_no ( ) // keep open + ); + + // --------------- + // Debug + // --------------- + assign init_done = rst_ni; + + logic debug_enable; + initial begin + if (!$value$plusargs("jtag_rbb_enable=%b", jtag_enable)) jtag_enable = 'h0; + if ($test$plusargs("debug_disable")) debug_enable = 'h0; else debug_enable = 'h1; + if (riscv::XLEN != 32 & riscv::XLEN != 64) $error("XLEN different from 32 and 64"); + end + + // debug if MUX + assign debug_req_valid = (jtag_enable[0]) ? jtag_req_valid : dmi_req_valid; + assign debug_resp_ready = (jtag_enable[0]) ? jtag_resp_ready : dmi_resp_ready; + assign debug_req = (jtag_enable[0]) ? jtag_dmi_req : dmi_req; + if (ariane_pkg::RVFI) begin + assign exit_o = (jtag_enable[0]) ? jtag_exit : rvfi_exit; + end else begin + assign exit_o = (jtag_enable[0]) ? jtag_exit : dmi_exit; + end + assign jtag_resp_valid = (jtag_enable[0]) ? debug_resp_valid : 1'b0; + assign dmi_resp_valid = (jtag_enable[0]) ? 1'b0 : debug_resp_valid; + + // SiFive's SimJTAG Module + // Converts to DPI calls + SimJTAG i_SimJTAG ( + .clock ( clk_i ), + .reset ( ~rst_ni ), + .enable ( jtag_enable[0] ), + .init_done ( init_done ), + .jtag_TCK ( jtag_TCK ), + .jtag_TMS ( jtag_TMS ), + .jtag_TDI ( jtag_TDI ), + .jtag_TRSTn ( jtag_TRSTn ), + .jtag_TDO_data ( jtag_TDO_data ), + .jtag_TDO_driven ( jtag_TDO_driven ), + .exit ( jtag_exit ) + ); + + dmi_jtag i_dmi_jtag ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en ), + .dmi_req_o ( jtag_dmi_req ), + .dmi_req_valid_o ( jtag_req_valid ), + .dmi_req_ready_i ( debug_req_ready ), + .dmi_resp_i ( debug_resp ), + .dmi_resp_ready_o ( jtag_resp_ready ), + .dmi_resp_valid_i ( jtag_resp_valid ), + .dmi_rst_no ( ), // not connected + .tck_i ( jtag_TCK ), + .tms_i ( jtag_TMS ), + .trst_ni ( jtag_TRSTn ), + .td_i ( jtag_TDI ), + .td_o ( jtag_TDO_data ), + .tdo_oe_o ( jtag_TDO_driven ) + ); + + // SiFive's SimDTM Module + // Converts to DPI calls + logic [1:0] debug_req_bits_op; + assign dmi_req.op = dm::dtm_op_e'(debug_req_bits_op); + + if (InclSimDTM) begin + SimDTM i_SimDTM ( + .clk ( clk_i ), + .reset ( ~rst_ni ), + .debug_req_valid ( dmi_req_valid ), + .debug_req_ready ( debug_req_ready ), + .debug_req_bits_addr ( dmi_req.addr ), + .debug_req_bits_op ( debug_req_bits_op ), + .debug_req_bits_data ( dmi_req.data ), + .debug_resp_valid ( dmi_resp_valid ), + .debug_resp_ready ( dmi_resp_ready ), + .debug_resp_bits_resp ( debug_resp.resp ), + .debug_resp_bits_data ( debug_resp.data ), + .exit ( dmi_exit ) + ); + end else begin + assign dmi_req_valid = '0; + assign debug_req_bits_op = '0; + assign dmi_exit = 1'b0; + end + + // this delay window allows the core to read and execute init code + // from the bootrom before the first debug request can interrupt + // core. this is needed in cases where an fsbl is involved that + // expects a0 and a1 to be initialized with the hart id and a + // pointer to the dev tree, respectively. + localparam int unsigned DmiDelCycles = 500; + + logic debug_req_core_ungtd; + int dmi_del_cnt_d, dmi_del_cnt_q; + + assign dmi_del_cnt_d = (dmi_del_cnt_q) ? dmi_del_cnt_q - 1 : 0; + assign debug_req_core = (dmi_del_cnt_q) ? 1'b0 : + (!debug_enable) ? 1'b0 : debug_req_core_ungtd; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_dmi_del_cnt + if(!rst_ni) begin + dmi_del_cnt_q <= DmiDelCycles; + end else begin + dmi_del_cnt_q <= dmi_del_cnt_d; + end + end + + ariane_axi::req_t dm_axi_m_req; + ariane_axi::resp_t dm_axi_m_resp; + + logic dm_slave_req; + logic dm_slave_we; + logic [64-1:0] dm_slave_addr; + logic [64/8-1:0] dm_slave_be; + logic [64-1:0] dm_slave_wdata; + logic [64-1:0] dm_slave_rdata; + + logic dm_master_req; + logic [64-1:0] dm_master_add; + logic dm_master_we; + logic [64-1:0] dm_master_wdata; + logic [64/8-1:0] dm_master_be; + logic dm_master_gnt; + logic dm_master_r_valid; + logic [64-1:0] dm_master_r_rdata; + + // debug module + dm_top #( + .NrHarts ( 1 ), + .BusWidth ( AXI_DATA_WIDTH ), + .SelectableHarts ( 1'b1 ) + ) i_dm_top ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), // PoR + .testmode_i ( test_en ), + .ndmreset_o ( ndmreset ), + .dmactive_o ( ), // active debug session + .debug_req_o ( debug_req_core_ungtd ), + .unavailable_i ( '0 ), + .hartinfo_i ( {ariane_pkg::DebugHartInfo} ), + .slave_req_i ( dm_slave_req ), + .slave_we_i ( dm_slave_we ), + .slave_addr_i ( dm_slave_addr ), + .slave_be_i ( dm_slave_be ), + .slave_wdata_i ( dm_slave_wdata ), + .slave_rdata_o ( dm_slave_rdata ), + .master_req_o ( dm_master_req ), + .master_add_o ( dm_master_add ), + .master_we_o ( dm_master_we ), + .master_wdata_o ( dm_master_wdata ), + .master_be_o ( dm_master_be ), + .master_gnt_i ( dm_master_gnt ), + .master_r_valid_i ( dm_master_r_valid ), + .master_r_rdata_i ( dm_master_r_rdata ), + .dmi_rst_ni ( rst_ni ), + .dmi_req_valid_i ( debug_req_valid ), + .dmi_req_ready_o ( debug_req_ready ), + .dmi_req_i ( debug_req ), + .dmi_resp_valid_o ( debug_resp_valid ), + .dmi_resp_ready_i ( debug_resp_ready ), + .dmi_resp_o ( debug_resp ) + ); + + + axi2mem #( + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) i_dm_axi2mem ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slave ( master[ariane_soc::Debug] ), + .req_o ( dm_slave_req ), + .we_o ( dm_slave_we ), + .addr_o ( dm_slave_addr ), + .be_o ( dm_slave_be ), + .user_o ( ), + .data_o ( dm_slave_wdata ), + .user_i ( '0 ), + .data_i ( dm_slave_rdata ) + ); + + `AXI_ASSIGN_FROM_REQ(slave[1], dm_axi_m_req) + `AXI_ASSIGN_TO_RESP(dm_axi_m_resp, slave[1]) + + axi_adapter #( + .CVA6Cfg ( CVA6Cfg ), + .DATA_WIDTH ( AXI_DATA_WIDTH ), + .axi_req_t ( ariane_axi::req_t ), + .axi_rsp_t ( ariane_axi::resp_t ) + ) i_dm_axi_master ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( dm_master_req ), + .type_i ( ariane_pkg::SINGLE_REQ ), + .amo_i ( ariane_pkg::AMO_NONE ), + .gnt_o ( dm_master_gnt ), + .addr_i ( dm_master_add ), + .we_i ( dm_master_we ), + .wdata_i ( dm_master_wdata ), + .be_i ( dm_master_be ), + .size_i ( 2'b11 ), // always do 64bit here and use byte enables to gate + .id_i ( '0 ), + .valid_o ( dm_master_r_valid ), + .rdata_o ( dm_master_r_rdata ), + .id_o ( ), + .critical_word_o ( ), + .critical_word_valid_o ( ), + .axi_req_o ( dm_axi_m_req ), + .axi_resp_i ( dm_axi_m_resp ) + ); + + + // --------------- + // ROM + // --------------- + logic rom_req; + logic [AXI_ADDRESS_WIDTH-1:0] rom_addr; + logic [AXI_DATA_WIDTH-1:0] rom_rdata; + + axi2mem #( + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) i_axi2rom ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .slave ( master[ariane_soc::ROM] ), + .req_o ( rom_req ), + .we_o ( ), + .addr_o ( rom_addr ), + .be_o ( ), + .user_o ( ), + .data_o ( ), + .user_i ( '0 ), + .data_i ( rom_rdata ) + ); + + bootrom i_bootrom ( + .clk_i ( clk_i ), + .req_i ( rom_req ), + .addr_i ( rom_addr ), + .rdata_o ( rom_rdata ) + ); + + // ------------------------------ + // GPIO + // ------------------------------ + + // GPIO not implemented, adding an error slave here + + ariane_axi_soc::req_slv_t gpio_req; + ariane_axi_soc::resp_slv_t gpio_resp; + `AXI_ASSIGN_TO_REQ(gpio_req, master[ariane_soc::GPIO]) + `AXI_ASSIGN_FROM_RESP(master[ariane_soc::GPIO], gpio_resp) + axi_err_slv #( + .AxiIdWidth ( ariane_axi_soc::IdWidthSlave ), + .req_t ( ariane_axi_soc::req_slv_t ), + .resp_t ( ariane_axi_soc::resp_slv_t ) + ) i_gpio_err_slv ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .test_i ( test_en ), + .slv_req_i ( gpio_req ), + .slv_resp_o ( gpio_resp ) + ); + + + // ------------------------------ + // Memory + Exclusive Access + // ------------------------------ + AXI_BUS #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) dram(); + + logic req; + logic we; + logic [AXI_ADDRESS_WIDTH-1:0] addr; + logic [AXI_DATA_WIDTH/8-1:0] be; + logic [AXI_DATA_WIDTH-1:0] wdata; + logic [AXI_DATA_WIDTH-1:0] rdata; + logic [AXI_USER_WIDTH-1:0] wuser; + logic [AXI_USER_WIDTH-1:0] ruser; + + axi_riscv_atomics_wrap #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .AXI_MAX_WRITE_TXNS ( 1 ), + .RISCV_WORD_WIDTH ( 64 ) + ) i_axi_riscv_atomics ( + .clk_i, + .rst_ni ( ndmreset_n ), + .slv ( master[ariane_soc::DRAM] ), + .mst ( dram ) + ); + + AXI_BUS #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) dram_delayed(); + + axi_delayer_intf #( + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .STALL_RANDOM_INPUT ( StallRandomInput ), + .STALL_RANDOM_OUTPUT ( StallRandomOutput ), + .FIXED_DELAY_INPUT ( 0 ), + .FIXED_DELAY_OUTPUT ( 0 ) + ) i_axi_delayer ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .slv ( dram ), + .mst ( dram_delayed ) + ); + + axi2mem #( + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) i_axi2mem ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .slave ( dram_delayed ), + .req_o ( req ), + .we_o ( we ), + .addr_o ( addr ), + .be_o ( be ), + .user_o ( wuser ), + .data_o ( wdata ), + .user_i ( ruser ), + .data_i ( rdata ) + ); + + sram #( + .DATA_WIDTH ( AXI_DATA_WIDTH ), + .USER_WIDTH ( AXI_USER_WIDTH ), + .USER_EN ( AXI_USER_EN ), +`ifdef VERILATOR + .SIM_INIT ( "none" ), +`else + .SIM_INIT ( "zeros" ), +`endif + .NUM_WORDS ( NUM_WORDS ) + ) i_sram ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req ), + .we_i ( we ), + .addr_i ( addr[$clog2(NUM_WORDS)-1+$clog2(AXI_DATA_WIDTH/8):$clog2(AXI_DATA_WIDTH/8)] ), + .wuser_i ( wuser ), + .wdata_i ( wdata ), + .be_i ( be ), + .ruser_o ( ruser ), + .rdata_o ( rdata ) + ); + + // --------------- + // AXI Xbar + // --------------- + + axi_pkg::xbar_rule_64_t [ariane_soc::NB_PERIPHERALS-1:0] addr_map; + + assign addr_map = '{ + '{ idx: ariane_soc::Debug, start_addr: ariane_soc::DebugBase, end_addr: ariane_soc::DebugBase + ariane_soc::DebugLength }, + '{ idx: ariane_soc::ROM, start_addr: ariane_soc::ROMBase, end_addr: ariane_soc::ROMBase + ariane_soc::ROMLength }, + '{ idx: ariane_soc::CLINT, start_addr: ariane_soc::CLINTBase, end_addr: ariane_soc::CLINTBase + ariane_soc::CLINTLength }, + '{ idx: ariane_soc::PLIC, start_addr: ariane_soc::PLICBase, end_addr: ariane_soc::PLICBase + ariane_soc::PLICLength }, + '{ idx: ariane_soc::UART, start_addr: ariane_soc::UARTBase, end_addr: ariane_soc::UARTBase + ariane_soc::UARTLength }, + '{ idx: ariane_soc::Timer, start_addr: ariane_soc::TimerBase, end_addr: ariane_soc::TimerBase + ariane_soc::TimerLength }, + '{ idx: ariane_soc::SPI, start_addr: ariane_soc::SPIBase, end_addr: ariane_soc::SPIBase + ariane_soc::SPILength }, + '{ idx: ariane_soc::Ethernet, start_addr: ariane_soc::EthernetBase, end_addr: ariane_soc::EthernetBase + ariane_soc::EthernetLength }, + '{ idx: ariane_soc::GPIO, start_addr: ariane_soc::GPIOBase, end_addr: ariane_soc::GPIOBase + ariane_soc::GPIOLength }, + '{ idx: ariane_soc::DRAM, start_addr: ariane_soc::DRAMBase, end_addr: ariane_soc::DRAMBase + ariane_soc::DRAMLength } + }; + + localparam axi_pkg::xbar_cfg_t AXI_XBAR_CFG = '{ + NoSlvPorts: unsigned'(ariane_soc::NrSlaves), + NoMstPorts: unsigned'(ariane_soc::NB_PERIPHERALS), + MaxMstTrans: unsigned'(1), // Probably requires update + MaxSlvTrans: unsigned'(1), // Probably requires update + FallThrough: 1'b0, + LatencyMode: axi_pkg::NO_LATENCY, + AxiIdWidthSlvPorts: unsigned'(ariane_axi_soc::IdWidth), + AxiIdUsedSlvPorts: unsigned'(ariane_axi_soc::IdWidth), + UniqueIds: 1'b0, + AxiAddrWidth: unsigned'(AXI_ADDRESS_WIDTH), + AxiDataWidth: unsigned'(AXI_DATA_WIDTH), + NoAddrRules: unsigned'(ariane_soc::NB_PERIPHERALS) + }; + + axi_xbar_intf #( + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .Cfg ( AXI_XBAR_CFG ), + .rule_t ( axi_pkg::xbar_rule_64_t ) + ) i_axi_xbar ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .test_i ( test_en ), + .slv_ports ( slave ), + .mst_ports ( master ), + .addr_map_i ( addr_map ), + .en_default_mst_port_i ( '0 ), + .default_mst_port_i ( '0 ) + ); + + // --------------- + // CLINT + // --------------- + logic ipi; + logic timer_irq; + + ariane_axi_soc::req_slv_t axi_clint_req; + ariane_axi_soc::resp_slv_t axi_clint_resp; + + clint #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), + .NR_CORES ( 1 ), + .axi_req_t ( ariane_axi_soc::req_slv_t ), + .axi_resp_t ( ariane_axi_soc::resp_slv_t ) + ) i_clint ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .testmode_i ( test_en ), + .axi_req_i ( axi_clint_req ), + .axi_resp_o ( axi_clint_resp ), + .rtc_i ( rtc_i ), + .timer_irq_o ( timer_irq ), + .ipi_o ( ipi ) + ); + + `AXI_ASSIGN_TO_REQ(axi_clint_req, master[ariane_soc::CLINT]) + `AXI_ASSIGN_FROM_RESP(master[ariane_soc::CLINT], axi_clint_resp) + + // --------------- + // Peripherals + // --------------- + logic tx, rx; + logic [1:0] irqs; + + ariane_peripherals #( + .AxiAddrWidth ( AXI_ADDRESS_WIDTH ), + .AxiDataWidth ( AXI_DATA_WIDTH ), + .AxiIdWidth ( ariane_axi_soc::IdWidthSlave ), + .AxiUserWidth ( AXI_USER_WIDTH ), +`ifndef VERILATOR + .InclUART ( 1'b1 ), +`else + .InclUART ( 1'b0 ), +`endif + .InclSPI ( 1'b0 ), + .InclEthernet ( 1'b0 ) + ) i_ariane_peripherals ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .plic ( master[ariane_soc::PLIC] ), + .uart ( master[ariane_soc::UART] ), + .spi ( master[ariane_soc::SPI] ), + .ethernet ( master[ariane_soc::Ethernet] ), + .timer ( master[ariane_soc::Timer] ), + .irq_o ( irqs ), + .rx_i ( rx ), + .tx_o ( tx ), + .eth_txck ( ), + .eth_rxck ( ), + .eth_rxctl ( ), + .eth_rxd ( ), + .eth_rst_n ( ), + .eth_tx_en ( ), + .eth_txd ( ), + .phy_mdio ( ), + .eth_mdc ( ), + .mdio ( ), + .mdc ( ), + .spi_clk_o ( ), + .spi_mosi ( ), + .spi_miso ( ), + .spi_ss ( ) + ); + + uart_bus #(.BAUD_RATE(115200), .PARITY_EN(0)) i_uart_bus (.rx(tx), .tx(rx), .rx_en(1'b1)); + + // --------------- + // Core + // --------------- + ariane_axi::req_t axi_ariane_req; + ariane_axi::resp_t axi_ariane_resp; + rvfi_probes_t rvfi_probes; + rvfi_instr_t [CVA6Cfg.NrCommitPorts-1:0] rvfi_instr; + + ariane #( + .CVA6Cfg ( CVA6Cfg ), + .IsRVFI ( IsRVFI ), + .rvfi_probes_t ( rvfi_probes_t ), + .noc_req_t ( ariane_axi::req_t ), + .noc_resp_t ( ariane_axi::resp_t ) + ) i_ariane ( + .clk_i ( clk_i ), + .rst_ni ( ndmreset_n ), + .boot_addr_i ( ariane_soc::ROMBase ), // start fetching from ROM + .hart_id_i ( {56'h0, hart_id} ), + .irq_i ( irqs ), + .ipi_i ( ipi ), + .time_irq_i ( timer_irq ), + .rvfi_probes_o ( rvfi_probes ), +// Disable Debug when simulating with Spike +`ifdef SPIKE_TANDEM + .debug_req_i ( 1'b0 ), +`else + .debug_req_i ( debug_req_core ), +`endif + .noc_req_o ( axi_ariane_req ), + .noc_resp_i ( axi_ariane_resp ) + ); + + `AXI_ASSIGN_FROM_REQ(slave[0], axi_ariane_req) + `AXI_ASSIGN_TO_RESP(axi_ariane_resp, slave[0]) + + // ------------- + // Simulation Helper Functions + // ------------- + // check for response errors + always_ff @(posedge clk_i) begin : p_assert + if (axi_ariane_req.r_ready && + axi_ariane_resp.r_valid && + axi_ariane_resp.r.resp inside {axi_pkg::RESP_DECERR, axi_pkg::RESP_SLVERR}) begin + $warning("R Response Errored"); + end + if (axi_ariane_req.b_ready && + axi_ariane_resp.b_valid && + axi_ariane_resp.b.resp inside {axi_pkg::RESP_DECERR, axi_pkg::RESP_SLVERR}) begin + $warning("B Response Errored"); + end + end + + cva6_rvfi #( + .CVA6Cfg (CVA6Cfg), + .rvfi_instr_t(rvfi_instr_t), + .rvfi_probes_t(rvfi_probes_t) + ) i_cva6_rvfi ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .rvfi_probes_i(rvfi_probes), + .rvfi_o(rvfi_instr) + ); + + rvfi_tracer #( + .CVA6Cfg(CVA6Cfg), + .rvfi_instr_t(rvfi_instr_t), + // + .HART_ID(hart_id), + .DEBUG_START(0), + .DEBUG_STOP(0) + ) i_rvfi_tracer ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .rvfi_i(rvfi_instr), + .end_of_test_o(rvfi_exit) + ); + +`ifdef SPIKE_TANDEM + spike #( + .CVA6Cfg ( CVA6Cfg ), + .rvfi_instr_t(rvfi_instr_t) + ) i_spike ( + .clk_i, + .rst_ni, + .clint_tick_i ( rtc_i ), + .rvfi_i ( rvfi_instr ) + ); + initial begin + $display("Running binary in tandem mode"); + end +`endif + + +`ifdef AXI_SVA + // AXI 4 Assertion IP integration - You will need to get your own copy of this IP if you want + // to use it + Axi4PC #( + .DATA_WIDTH(ariane_axi_soc::DataWidth), + .WID_WIDTH(ariane_axi_soc::IdWidthSlave), + .RID_WIDTH(ariane_axi_soc::IdWidthSlave), + .AWUSER_WIDTH(ariane_axi_soc::UserWidth), + .WUSER_WIDTH(ariane_axi_soc::UserWidth), + .BUSER_WIDTH(ariane_axi_soc::UserWidth), + .ARUSER_WIDTH(ariane_axi_soc::UserWidth), + .RUSER_WIDTH(ariane_axi_soc::UserWidth), + .ADDR_WIDTH(ariane_axi_soc::AddrWidth) + ) i_Axi4PC ( + .ACLK(clk_i), + .ARESETn(ndmreset_n), + .AWID(dram.aw_id), + .AWADDR(dram.aw_addr), + .AWLEN(dram.aw_len), + .AWSIZE(dram.aw_size), + .AWBURST(dram.aw_burst), + .AWLOCK(dram.aw_lock), + .AWCACHE(dram.aw_cache), + .AWPROT(dram.aw_prot), + .AWQOS(dram.aw_qos), + .AWREGION(dram.aw_region), + .AWUSER(dram.aw_user), + .AWVALID(dram.aw_valid), + .AWREADY(dram.aw_ready), + .WLAST(dram.w_last), + .WDATA(dram.w_data), + .WSTRB(dram.w_strb), + .WUSER(dram.w_user), + .WVALID(dram.w_valid), + .WREADY(dram.w_ready), + .BID(dram.b_id), + .BRESP(dram.b_resp), + .BUSER(dram.b_user), + .BVALID(dram.b_valid), + .BREADY(dram.b_ready), + .ARID(dram.ar_id), + .ARADDR(dram.ar_addr), + .ARLEN(dram.ar_len), + .ARSIZE(dram.ar_size), + .ARBURST(dram.ar_burst), + .ARLOCK(dram.ar_lock), + .ARCACHE(dram.ar_cache), + .ARPROT(dram.ar_prot), + .ARQOS(dram.ar_qos), + .ARREGION(dram.ar_region), + .ARUSER(dram.ar_user), + .ARVALID(dram.ar_valid), + .ARREADY(dram.ar_ready), + .RID(dram.r_id), + .RLAST(dram.r_last), + .RDATA(dram.r_data), + .RRESP(dram.r_resp), + .RUSER(dram.r_user), + .RVALID(dram.r_valid), + .RREADY(dram.r_ready), + .CACTIVE('0), + .CSYSREQ('0), + .CSYSACK('0) + ); +`endif +endmodule diff --git a/test/type_param/corev_apu/tb/axi_intf.sv b/test/type_param/corev_apu/tb/axi_intf.sv new file mode 100644 index 00000000..41d4b16a --- /dev/null +++ b/test/type_param/corev_apu/tb/axi_intf.sv @@ -0,0 +1,311 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki +// +// This file defines the interfaces we support. + + + +/// An AXI4 interface. +interface AXI_BUS #( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1, + parameter AXI_ID_WIDTH = -1, + parameter AXI_USER_WIDTH = -1 +); + + import axi_pkg::*; + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + typedef logic [5:0] atop_t; + + id_t aw_id; + addr_t aw_addr; + logic [7:0] aw_len; + logic [2:0] aw_size; + burst_t aw_burst; + logic aw_lock; + cache_t aw_cache; + prot_t aw_prot; + qos_t aw_qos; + atop_t aw_atop; + region_t aw_region; + user_t aw_user; + logic aw_valid; + logic aw_ready; + + data_t w_data; + strb_t w_strb; + logic w_last; + user_t w_user; + logic w_valid; + logic w_ready; + + id_t b_id; + resp_t b_resp; + user_t b_user; + logic b_valid; + logic b_ready; + + id_t ar_id; + addr_t ar_addr; + logic [7:0] ar_len; + logic [2:0] ar_size; + burst_t ar_burst; + logic ar_lock; + cache_t ar_cache; + prot_t ar_prot; + qos_t ar_qos; + region_t ar_region; + user_t ar_user; + logic ar_valid; + logic ar_ready; + + id_t r_id; + data_t r_data; + resp_t r_resp; + logic r_last; + user_t r_user; + logic r_valid; + logic r_ready; + + modport Master ( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_valid, input aw_ready, + output w_data, w_strb, w_last, w_user, w_valid, input w_ready, + input b_id, b_resp, b_user, b_valid, output b_ready, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, input ar_ready, + input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready + ); + + modport Slave ( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_valid, output aw_ready, + input w_data, w_strb, w_last, w_user, w_valid, output w_ready, + output b_id, b_resp, b_user, b_valid, input b_ready, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, output ar_ready, + output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready + ); + +endinterface + + +/// An asynchronous AXI4 interface. +interface AXI_BUS_ASYNC +#( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1, + parameter AXI_ID_WIDTH = -1, + parameter AXI_USER_WIDTH = -1, + parameter BUFFER_WIDTH = -1 +); + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + + logic [AXI_ID_WIDTH-1:0] aw_id; + logic [AXI_ADDR_WIDTH-1:0] aw_addr; + logic [7:0] aw_len; + logic [2:0] aw_size; + logic [1:0] aw_burst; + logic aw_lock; + logic [3:0] aw_cache; + logic [2:0] aw_prot; + logic [3:0] aw_qos; + logic [5:0] aw_atop; + logic [3:0] aw_region; + logic [AXI_USER_WIDTH-1:0] aw_user; + logic [BUFFER_WIDTH-1:0] aw_writetoken; + logic [BUFFER_WIDTH-1:0] aw_readpointer; + + logic [AXI_DATA_WIDTH-1:0] w_data; + logic [AXI_STRB_WIDTH-1:0] w_strb; + logic w_last; + logic [AXI_USER_WIDTH-1:0] w_user; + logic [BUFFER_WIDTH-1:0] w_writetoken; + logic [BUFFER_WIDTH-1:0] w_readpointer; + + logic [AXI_ID_WIDTH-1:0] b_id; + logic [1:0] b_resp; + logic [AXI_USER_WIDTH-1:0] b_user; + logic [BUFFER_WIDTH-1:0] b_writetoken; + logic [BUFFER_WIDTH-1:0] b_readpointer; + + logic [AXI_ID_WIDTH-1:0] ar_id; + logic [AXI_ADDR_WIDTH-1:0] ar_addr; + logic [7:0] ar_len; + logic [2:0] ar_size; + logic [1:0] ar_burst; + logic ar_lock; + logic [3:0] ar_cache; + logic [2:0] ar_prot; + logic [3:0] ar_qos; + logic [3:0] ar_region; + logic [AXI_USER_WIDTH-1:0] ar_user; + logic [BUFFER_WIDTH-1:0] ar_writetoken; + logic [BUFFER_WIDTH-1:0] ar_readpointer; + + logic [AXI_ID_WIDTH-1:0] r_id; + logic [AXI_DATA_WIDTH-1:0] r_data; + logic [1:0] r_resp; + logic r_last; + logic [AXI_USER_WIDTH-1:0] r_user; + logic [BUFFER_WIDTH-1:0] r_writetoken; + logic [BUFFER_WIDTH-1:0] r_readpointer; + + modport Master ( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_writetoken, input aw_readpointer, + output w_data, w_strb, w_last, w_user, w_writetoken, input w_readpointer, + input b_id, b_resp, b_user, b_writetoken, output b_readpointer, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_writetoken, input ar_readpointer, + input r_id, r_data, r_resp, r_last, r_user, r_writetoken, output r_readpointer + ); + + modport Slave ( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_atop, aw_region, aw_user, aw_writetoken, output aw_readpointer, + input w_data, w_strb, w_last, w_user, w_writetoken, output w_readpointer, + output b_id, b_resp, b_user, b_writetoken, input b_readpointer, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_writetoken, output ar_readpointer, + output r_id, r_data, r_resp, r_last, r_user, r_writetoken, input r_readpointer + ); + +endinterface + + +/// An AXI4-Lite interface. +interface AXI_LITE #( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1 +); + + import axi_pkg::*; + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + + // AW channel + addr_t aw_addr; + prot_t aw_prot; + logic aw_valid; + logic aw_ready; + + data_t w_data; + strb_t w_strb; + logic w_valid; + logic w_ready; + + resp_t b_resp; + logic b_valid; + logic b_ready; + + addr_t ar_addr; + prot_t ar_prot; + logic ar_valid; + logic ar_ready; + + data_t r_data; + resp_t r_resp; + logic r_valid; + logic r_ready; + + modport Master ( + output aw_addr, aw_prot, aw_valid, input aw_ready, + output w_data, w_strb, w_valid, input w_ready, + input b_resp, b_valid, output b_ready, + output ar_addr, ar_prot, ar_valid, input ar_ready, + input r_data, r_resp, r_valid, output r_ready + ); + + modport Slave ( + input aw_addr, aw_prot, aw_valid, output aw_ready, + input w_data, w_strb, w_valid, output w_ready, + output b_resp, b_valid, input b_ready, + input ar_addr, ar_prot, ar_valid, output ar_ready, + output r_data, r_resp, r_valid, input r_ready + ); + + /// The interface as an output (issuing requests, initiator, master). + modport out ( + output aw_addr, aw_valid, input aw_ready, + output w_data, w_strb, w_valid, input w_ready, + input b_resp, b_valid, output b_ready, + output ar_addr, ar_valid, input ar_ready, + input r_data, r_resp, r_valid, output r_ready + ); + + /// The interface as an input (accepting requests, target, slave). + modport in ( + input aw_addr, aw_valid, output aw_ready, + input w_data, w_strb, w_valid, output w_ready, + output b_resp, b_valid, input b_ready, + input ar_addr, ar_valid, output ar_ready, + output r_data, r_resp, r_valid, input r_ready + ); + +endinterface + + +/// An AXI routing table. +/// +/// For each slave, multiple rules can be defined. Each rule consists of an +/// address mask and a base. Addresses are masked and then compared against the +/// base to decide where transfers need to go. +interface AXI_ROUTING_RULES #( + /// The address width. + parameter int AXI_ADDR_WIDTH = -1, + /// The number of slaves in the routing table. + parameter int NUM_SLAVE = -1, + /// The number of rules in the routing table. + parameter int NUM_RULES = -1 +); + + struct packed { + logic enabled; + logic [AXI_ADDR_WIDTH-1:0] mask; + logic [AXI_ADDR_WIDTH-1:0] base; + } [NUM_RULES-1:0] rules [NUM_SLAVE]; + + modport xbar(input rules); + modport cfg(output rules); + +endinterface + + +/// An AXI arbitration interface. +interface AXI_ARBITRATION #( + /// The number of requestors. + parameter int NUM_REQ = -1 +); + + // Incoming requests. + logic [NUM_REQ-1:0] in_req; + logic [NUM_REQ-1:0] in_ack; + + // Outgoing request. + logic out_req; + logic out_ack; + logic [$clog2(NUM_REQ)-1:0] out_sel; + + // The arbiter side of the interface. + modport arb(input in_req, out_ack, output out_req, out_sel, in_ack); + + // The requestor side of the interface. + modport req(output in_req, out_ack, input out_req, out_sel, in_ack); + +endinterface diff --git a/test/type_param/corev_apu/tb/common/mock_uart.sv b/test/type_param/corev_apu/tb/common/mock_uart.sv new file mode 100644 index 00000000..6a14904b --- /dev/null +++ b/test/type_param/corev_apu/tb/common/mock_uart.sv @@ -0,0 +1,120 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 28/09/2018 +// Description: Mock replacement for UART in testbench (not synthesiesable!) + +module mock_uart ( + input logic clk_i, + input logic rst_ni, + input logic penable_i, + input logic pwrite_i, + input logic [31:0] paddr_i, + input logic psel_i, + input logic [31:0] pwdata_i, + output logic [31:0] prdata_o, + output logic pready_o, + output logic pslverr_o +); + localparam RBR = 0; + localparam THR = 0; + localparam IER = 1; + localparam IIR = 2; + localparam FCR = 2; + localparam LCR = 3; + localparam MCR = 4; + localparam LSR = 5; + localparam MSR = 6; + localparam SCR = 7; + localparam DLL = 0; + localparam DLM = 1; + + localparam THRE = 5; // transmit holding register empty + localparam TEMT = 6; // transmit holding register empty + + byte lcr = 0; + byte dlm = 0; + byte dll = 0; + byte mcr = 0; + byte lsr = 0; + byte ier = 0; + byte msr = 0; + byte scr = 0; + logic fifo_enabled = 1'b0; + + assign pready_o = 1'b1; + assign pslverr_o = 1'b0; + + function void uart_tx(byte ch); + $write("%c", ch); + endfunction : uart_tx + +/* verilator lint_off WIDTHTRUNC */ +/* verilator lint_off WIDTHEXPAND */ +/* verilator lint_off WIDTHCONCAT */ + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (rst_ni) begin + if (psel_i & penable_i & pwrite_i) begin + case ((paddr_i >> 'h2) & 'h7) + THR: begin + if (lcr & 'h80) dll <= byte'(pwdata_i[7:0]); + else uart_tx(byte'(pwdata_i[7:0])); + end + IER: begin + if (lcr & 'h80) dlm <= byte'(pwdata_i[7:0]); + else ier <= byte'(pwdata_i[7:0] & 'hF); + end + FCR: begin + if (pwdata_i[0]) fifo_enabled <= 1'b1; + else fifo_enabled <= 1'b0; + end + LCR: lcr <= byte'(pwdata_i[7:0]); + MCR: mcr <= byte'(pwdata_i[7:0] & 'h1F); + LSR: lsr <= byte'(pwdata_i[7:0]); + MSR: msr <= byte'(pwdata_i[7:0]); + SCR: scr <= byte'(pwdata_i[7:0]); + default:; + endcase + end + end + end + + always_comb begin + prdata_o = '0; + if (psel_i & penable_i & ~pwrite_i) begin + case ((paddr_i >> 'h2) & 'h7) + THR: begin + if (lcr & 'h80) prdata_o = {24'b0, dll}; + end + IER: begin + if (lcr & 'h80) prdata_o = {24'b0, dlm}; + else prdata_o = {24'b0, ier}; + end + IIR: begin + if (fifo_enabled) prdata_o = {24'b0, 8'hc0}; + else prdata_o = {24'b0, 8'b0}; + end + LCR: prdata_o = {24'b0, lcr}; + MCR: prdata_o = {24'b0, mcr}; + LSR: prdata_o = {24'b0, (lsr | (1 << THRE) | (1 << TEMT))}; + MSR: prdata_o = {24'b0, msr}; + SCR: prdata_o = {24'b0, scr}; + default:; + endcase + end + end + +/* verilator lint_on WIDTHTRUNC */ +/* verilator lint_on WIDTHEXPAND */ +/* verilator lint_on WIDTHCONCAT */ + +endmodule diff --git a/test/type_param/corev_apu/tb/common/uart.sv b/test/type_param/corev_apu/tb/common/uart.sv new file mode 100644 index 00000000..d45f39a5 --- /dev/null +++ b/test/type_param/corev_apu/tb/common/uart.sv @@ -0,0 +1,104 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Unknown +// Date: Unknown +// Description: This module takes data over UART and prints them to the console +// A string is printed to the console as soon as a '\n' character is found + +interface uart_bus #( + parameter int unsigned BAUD_RATE = 115200, + parameter int unsigned PARITY_EN = 0 +)( + input logic rx, + output logic tx, + input logic rx_en +); + +/* pragma translate_off */ +`ifndef VERILATOR + localparam time BIT_PERIOD = (1000000000 / BAUD_RATE) * 1ns; + + logic [7:0] character; + logic [256*8-1:0] stringa; + logic parity; + integer charnum; + integer file; + + initial begin + tx = 1'bZ; + file = $fopen("uart", "w"); + end + + always begin + if (rx_en) begin + @(negedge rx); + #(BIT_PERIOD/2); + for (int i = 0; i <= 7; i++) begin + #BIT_PERIOD character[i] = rx; + end + + if (PARITY_EN == 1) begin + // check parity + #BIT_PERIOD parity = rx; + + for (int i=7;i>=0;i--) begin + parity = character[i] ^ parity; + end + + if (parity == 1'b1) begin + $display("Parity error detected"); + end + end + + // STOP BIT + #BIT_PERIOD; + + $fwrite(file, "%c", character); + stringa[(255-charnum)*8 +: 8] = character; + if (character == 8'h0A || charnum == 254) begin // line feed or max. chars reached + if (character == 8'h0A) begin + stringa[(255-charnum)*8 +: 8] = 8'h0; // null terminate string, replace line feed + end else begin + stringa[(255-charnum-1)*8 +: 8] = 8'h0; // null terminate string + end + + $write("[UART]: %s\n", stringa); + charnum = 0; + stringa = ""; + end else begin + charnum = charnum + 1; + end + end else begin + charnum = 0; + stringa = ""; + #10; + end + end + + task send_char(input logic [7:0] c); + int i; + + // start bit + tx = 1'b0; + + for (i = 0; i < 8; i++) begin + #(BIT_PERIOD); + tx = c[i]; + end + + // stop bit + #(BIT_PERIOD); + tx = 1'b1; + #(BIT_PERIOD); + endtask +`endif +/* pragma translate_on */ +endinterface diff --git a/test/type_param/corev_apu/tb/rvfi_tracer.sv b/test/type_param/corev_apu/tb/rvfi_tracer.sv new file mode 100644 index 00000000..75f68beb --- /dev/null +++ b/test/type_param/corev_apu/tb/rvfi_tracer.sv @@ -0,0 +1,134 @@ +// Copyright 2020 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + +module rvfi_tracer #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type rvfi_instr_t = logic, + // + parameter logic [7:0] HART_ID = '0, + parameter int unsigned DEBUG_START = 0, + parameter int unsigned DEBUG_STOP = 0 +)( + input logic clk_i, + input logic rst_ni, + input rvfi_instr_t[CVA6Cfg.NrCommitPorts-1:0] rvfi_i, + output logic[31:0] end_of_test_o +); + + logic[riscv::PLEN-1:0] TOHOST_ADDR; + int f; + int unsigned SIM_FINISH; + initial begin + f = $fopen($sformatf("trace_rvfi_hart_%h.dasm", HART_ID), "w"); + if (!$value$plusargs("time_out=%d", SIM_FINISH)) SIM_FINISH = 2000000; + if (!$value$plusargs("tohost_addr=%h", TOHOST_ADDR)) TOHOST_ADDR = '0; + if (TOHOST_ADDR == '0) begin + $display("*** [rvf_tracer] WARNING: No valid address of 'tohost' (tohost == 0x%h), termination possible only by timeout or Ctrl-C!\n", TOHOST_ADDR); + $fwrite(f, "*** [rvfi_tracer] WARNING No valid address of 'tohost' (tohost == 0x%h), termination possible only by timeout or Ctrl-C!\n", TOHOST_ADDR); + end + end + + final $fclose(f); + + logic [31:0] cycles; + // Generate the trace based on RVFI + logic [63:0] pc64; + string cause; + logic[31:0] end_of_test_q; + logic[31:0] end_of_test_d; + + assign end_of_test_o = end_of_test_d; + always_ff @(posedge clk_i) begin + end_of_test_q = (rst_ni && (end_of_test_d[0] == 1'b1)) ? end_of_test_d : 0; + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin + pc64 = {{riscv::XLEN-riscv::VLEN{rvfi_i[i].pc_rdata[riscv::VLEN-1]}}, rvfi_i[i].pc_rdata}; + // print the instruction information if the instruction is valid or a trap is taken + if (rvfi_i[i].valid) begin + // Instruction information + $fwrite(f, "core 0: 0x%h (0x%h) DASM(%h)\n", + pc64, rvfi_i[i].insn, rvfi_i[i].insn); + // Destination register information + if (rvfi_i[i].insn[1:0] != 2'b11) begin + $fwrite(f, "%h 0x%h (0x%h)", + rvfi_i[i].mode, pc64, rvfi_i[i].insn[15:0]); + end else begin + $fwrite(f, "%h 0x%h (0x%h)", + rvfi_i[i].mode, pc64, rvfi_i[i].insn); + end + // Decode instruction to know if destination register is FP register. + // Handle both uncompressed and compressed instructions. + if ( rvfi_i[i].insn[6:0] == 7'b1001111 || + rvfi_i[i].insn[6:0] == 7'b1001011 || + rvfi_i[i].insn[6:0] == 7'b1000111 || + rvfi_i[i].insn[6:0] == 7'b1000011 || + rvfi_i[i].insn[6:0] == 7'b0000111 || + (rvfi_i[i].insn[6:0] == 7'b1010011 && rvfi_i[i].insn[31:26] != 6'b111000 + && rvfi_i[i].insn[31:26] != 6'b101000 + && rvfi_i[i].insn[31:26] != 6'b110000) || + (rvfi_i[i].insn[0] == 1'b0 && ((rvfi_i[i].insn[15:13] == 3'b001 && riscv::XLEN == 64) || + (rvfi_i[i].insn[15:13] == 3'b011 && riscv::XLEN == 32) ))) begin + $fwrite(f, " f%d 0x%h", rvfi_i[i].rd_addr, rvfi_i[i].rd_wdata); + end else if (rvfi_i[i].rd_addr != 0) begin + $fwrite(f, " x%d 0x%h", rvfi_i[i].rd_addr, rvfi_i[i].rd_wdata); + if (rvfi_i[i].mem_rmask != 0) begin + $fwrite(f, " mem 0x%h", rvfi_i[i].mem_addr); + end + end else begin + if (rvfi_i[i].mem_wmask != 0) begin + $fwrite(f, " mem 0x%h 0x%h", rvfi_i[i].mem_addr, rvfi_i[i].mem_wdata); + if (TOHOST_ADDR != '0 && + rvfi_i[i].mem_paddr == TOHOST_ADDR && + rvfi_i[i].mem_wdata[0] == 1'b1) begin + end_of_test_q = rvfi_i[i].mem_wdata[31:0]; + end + end + end + $fwrite(f, "\n"); + end else begin + if (rvfi_i[i].trap) begin + case (rvfi_i[i].cause) + 32'h0: cause = "INSTR_ADDR_MISALIGNED"; + 32'h1: cause = "INSTR_ACCESS_FAULT"; + 32'h2: cause = "ILLEGAL_INSTR"; + 32'h3: cause = "BREAKPOINT"; + 32'h4: cause = "LD_ADDR_MISALIGNED"; + 32'h5: cause = "LD_ACCESS_FAULT"; + 32'h6: cause = "ST_ADDR_MISALIGNED"; + 32'h7: cause = "ST_ACCESS_FAULT"; + endcase; + $fwrite(f, "%s exception @ 0x%h\n", cause, pc64); + end + end + end + + if (~rst_ni) + cycles <= 0; + else + cycles <= cycles+1; + if (cycles > SIM_FINISH) + end_of_test_q = 32'hffff_ffff; + + end_of_test_d <= end_of_test_q; + end + + + // Trace any custom signals + // Define signals to be traced by adding them into debug and name arrays + string name[0:10]; + logic[63:0] debug[0:10], debug_previous[0:10]; + + always_ff @(posedge clk_i) begin + if (cycles > DEBUG_START && cycles < DEBUG_STOP) + for (int index = 0; index < 100; index++) + if (debug_previous[index] != debug[index]) + $fwrite(f, "%d %s %x\n", cycles, name[index], debug[index]); + debug_previous <= debug; + end + +endmodule // rvfi_tracer diff --git a/test/type_param/sv2v.sh b/test/type_param/sv2v.sh new file mode 100755 index 00000000..4d739e00 --- /dev/null +++ b/test/type_param/sv2v.sh @@ -0,0 +1,249 @@ +sv2v -v --top=ariane_testharness --define=VERILATOR > cva6_nonsys.v \ +--incdir=vendor/pulp-platform/common_cells/include/ \ +--incdir=vendor/pulp-platform/common_cells/src/ \ +--incdir=vendor/pulp-platform/axi/include/ \ +--incdir=common/local/util/ \ +--incdir=corev_apu/axi_node \ +--incdir=core/cache_subsystem/hpdcache/rtl/include \ +--incdir=corev_apu/register_interface/include \ +vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv \ +vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv \ +vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_fma.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_top.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv \ +core/include/config_pkg.sv \ +core/include/cv64a6_imafdc_sv39_config_pkg.sv \ +core/include/riscv_pkg.sv \ +core/include/ariane_pkg.sv \ +vendor/pulp-platform/axi/src/axi_pkg.sv \ +core/include/wt_cache_pkg.sv \ +core/include/std_cache_pkg.sv \ +core/include/instr_tracer_pkg.sv \ +core/include/acc_pkg.sv \ +core/include/cvxif_pkg.sv \ +core/cvxif_example/include/cvxif_instr_pkg.sv \ +core/cvxif_fu.sv \ +core/cvxif_example/cvxif_example_coprocessor.sv \ +core/cvxif_example/instr_decoder.sv \ +vendor/pulp-platform/common_cells/src/cf_math_pkg.sv \ +vendor/pulp-platform/common_cells/src/fifo_v3.sv \ +vendor/pulp-platform/common_cells/src/lfsr.sv \ +vendor/pulp-platform/common_cells/src/lfsr_8bit.sv \ +vendor/pulp-platform/common_cells/src/stream_arbiter.sv \ +vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv \ +vendor/pulp-platform/common_cells/src/stream_mux.sv \ +vendor/pulp-platform/common_cells/src/stream_demux.sv \ +vendor/pulp-platform/common_cells/src/lzc.sv \ +vendor/pulp-platform/common_cells/src/rr_arb_tree.sv \ +vendor/pulp-platform/common_cells/src/shift_reg.sv \ +vendor/pulp-platform/common_cells/src/unread.sv \ +vendor/pulp-platform/common_cells/src/popcount.sv \ +vendor/pulp-platform/common_cells/src/exp_backoff.sv \ +vendor/pulp-platform/common_cells/src/counter.sv \ +vendor/pulp-platform/common_cells/src/delta_counter.sv \ +core/cva6.sv \ +core/cva6_rvfi_probes.sv \ +core/alu.sv \ +core/fpu_wrap.sv \ +core/branch_unit.sv \ +core/compressed_decoder.sv \ +core/controller.sv \ +core/csr_buffer.sv \ +core/csr_regfile.sv \ +core/decoder.sv \ +core/ex_stage.sv \ +core/instr_realign.sv \ +core/id_stage.sv \ +core/issue_read_operands.sv \ +core/issue_stage.sv \ +core/load_unit.sv \ +core/load_store_unit.sv \ +core/lsu_bypass.sv \ +core/mult.sv \ +core/multiplier.sv \ +core/serdiv.sv \ +core/perf_counters.sv \ +core/ariane_regfile_ff.sv \ +core/ariane_regfile_fpga.sv \ +core/scoreboard.sv \ +core/store_buffer.sv \ +core/amo_buffer.sv \ +core/store_unit.sv \ +core/commit_stage.sv \ +core/axi_shim.sv \ +core/cva6_accel_first_pass_decoder_stub.sv \ +core/acc_dispatcher.sv \ +core/frontend/btb.sv \ +core/frontend/bht.sv \ +core/frontend/ras.sv \ +core/frontend/instr_scan.sv \ +core/frontend/instr_queue.sv \ +core/frontend/frontend.sv \ +core/cache_subsystem/wt_dcache_ctrl.sv \ +core/cache_subsystem/wt_dcache_mem.sv \ +core/cache_subsystem/wt_dcache_missunit.sv \ +core/cache_subsystem/wt_dcache_wbuffer.sv \ +core/cache_subsystem/wt_dcache.sv \ +core/cache_subsystem/cva6_icache.sv \ +core/cache_subsystem/wt_cache_subsystem.sv \ +core/cache_subsystem/wt_axi_adapter.sv \ +core/cache_subsystem/tag_cmp.sv \ +core/cache_subsystem/axi_adapter.sv \ +core/cache_subsystem/miss_handler.sv \ +core/cache_subsystem/cache_ctrl.sv \ +core/cache_subsystem/cva6_icache_axi_wrapper.sv \ +core/cache_subsystem/std_cache_subsystem.sv \ +core/cache_subsystem/std_nbdcache.sv \ +core/include/cva6_hpdcache_default_config_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv \ +core/cache_subsystem/cva6_hpdcache_subsystem.sv \ +core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv \ +core/cache_subsystem/cva6_hpdcache_if_adapter.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv \ +core/pmp/src/pmp.sv \ +core/pmp/src/pmp_entry.sv \ +common/local/util/instr_tracer_if.sv \ +common/local/util/instr_tracer.sv \ +common/local/util/tc_sram_wrapper.sv \ +vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv \ +common/local/util/sram.sv \ +core/mmu_sv39/mmu.sv \ +core/mmu_sv39/ptw.sv \ +core/mmu_sv39/tlb.sv \ +core/mmu_sv32/cva6_mmu_sv32.sv \ +core/mmu_sv32/cva6_ptw_sv32.sv \ +core/mmu_sv32/cva6_tlb_sv32.sv \ +core/mmu_sv32/cva6_shared_tlb_sv32.sv \ + core/cva6_rvfi.sv \ + corev_apu/tb/ariane_axi_pkg.sv \ + corev_apu/tb/axi_intf.sv \ + corev_apu/register_interface/src/reg_intf.sv \ + corev_apu/tb/ariane_soc_pkg.sv \ + corev_apu/riscv-dbg/src/dm_pkg.sv \ + corev_apu/tb/ariane_axi_soc_pkg.sv \ + corev_apu/src/ariane.sv \ + corev_apu/bootrom/bootrom.sv \ + corev_apu/clint/axi_lite_interface.sv \ + corev_apu/clint/clint.sv \ + corev_apu/fpga/src/axi2apb/src/axi2apb.sv \ + corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv \ + corev_apu/fpga/src/apb_timer/apb_timer.sv \ + corev_apu/fpga/src/apb_timer/timer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv \ + corev_apu/fpga/src/axi_slice/src/axi_slice.sv \ + corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv \ + corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv \ + corev_apu/axi_mem_if/src/axi2mem.sv \ + corev_apu/rv_plic/rtl/rv_plic_target.sv \ + corev_apu/rv_plic/rtl/rv_plic_gateway.sv \ + corev_apu/rv_plic/rtl/plic_regmap.sv \ + corev_apu/rv_plic/rtl/plic_top.sv \ + corev_apu/riscv-dbg/src/dmi_cdc.sv \ + corev_apu/riscv-dbg/src/dmi_jtag.sv \ + corev_apu/riscv-dbg/src/dmi_jtag_tap.sv \ + corev_apu/riscv-dbg/src/dm_csrs.sv \ + corev_apu/riscv-dbg/src/dm_mem.sv \ + corev_apu/riscv-dbg/src/dm_sba.sv \ + corev_apu/riscv-dbg/src/dm_top.sv \ + corev_apu/riscv-dbg/debug_rom/debug_rom.sv \ + corev_apu/register_interface/src/apb_to_reg.sv \ + vendor/pulp-platform/axi/src/axi_multicut.sv \ + vendor/pulp-platform/common_cells/src/rstgen_bypass.sv \ + vendor/pulp-platform/common_cells/src/rstgen.sv \ + vendor/pulp-platform/common_cells/src/addr_decode.sv \ + vendor/pulp-platform/common_cells/src/stream_register.sv \ + vendor/pulp-platform/axi/src/axi_cut.sv \ + vendor/pulp-platform/axi/src/axi_join.sv \ + vendor/pulp-platform/axi/src/axi_delayer.sv \ + vendor/pulp-platform/axi/src/axi_to_axi_lite.sv \ + vendor/pulp-platform/axi/src/axi_id_prepend.sv \ + vendor/pulp-platform/axi/src/axi_atop_filter.sv \ + vendor/pulp-platform/axi/src/axi_err_slv.sv \ + vendor/pulp-platform/axi/src/axi_mux.sv \ + vendor/pulp-platform/axi/src/axi_demux.sv \ + vendor/pulp-platform/axi/src/axi_xbar.sv \ + vendor/pulp-platform/common_cells/src/cdc_2phase.sv \ + vendor/pulp-platform/common_cells/src/spill_register_flushable.sv \ + vendor/pulp-platform/common_cells/src/spill_register.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv \ + vendor/pulp-platform/common_cells/src/stream_delay.sv \ + vendor/pulp-platform/common_cells/src/lfsr_16bit.sv \ + vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv \ + vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv \ + vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv \ + corev_apu/tb/ariane_testharness.sv \ + corev_apu/tb/ariane_peripherals.sv \ + corev_apu/tb/rvfi_tracer.sv \ + corev_apu/tb/common/uart.sv \ + corev_apu/tb/common/mock_uart.sv \ diff --git a/test/type_param/sv2v_corrected.sh b/test/type_param/sv2v_corrected.sh new file mode 100755 index 00000000..4eb7accb --- /dev/null +++ b/test/type_param/sv2v_corrected.sh @@ -0,0 +1,249 @@ +sv2v -v --top=ariane_testharness --define=VERILATOR > cva6_nonsys.v \ +--incdir=vendor/pulp-platform/common_cells/include/ \ +--incdir=vendor/pulp-platform/common_cells/src/ \ +--incdir=vendor/pulp-platform/axi/include/ \ +--incdir=common/local/util/ \ +--incdir=corev_apu/axi_node \ +--incdir=core/cache_subsystem/hpdcache/rtl/include \ +--incdir=corev_apu/register_interface/include \ +vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv \ +vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv \ +vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_fma.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv \ +vendor/openhwgroup/cvfpu/src/fpnew_top.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv \ +vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv \ +core/include/config_pkg.sv \ +core/include/cv64a6_imafdc_sv39_config_pkg.sv \ +core/include/riscv_pkg.sv \ +core/include/ariane_pkg.sv \ +vendor/pulp-platform/axi/src/axi_pkg.sv \ +core/include/wt_cache_pkg.sv \ +core/include/std_cache_pkg.sv \ +core/include/instr_tracer_pkg.sv \ +core/include/acc_pkg.sv \ +core/include/cvxif_pkg.sv \ +core/cvxif_example/include/cvxif_instr_pkg.sv \ +core/cvxif_fu.sv \ +core/cvxif_example/cvxif_example_coprocessor.sv \ +core/cvxif_example/instr_decoder.sv \ +vendor/pulp-platform/common_cells/src/cf_math_pkg.sv \ +vendor/pulp-platform/common_cells/src/fifo_v3.sv \ +vendor/pulp-platform/common_cells/src/lfsr.sv \ +vendor/pulp-platform/common_cells/src/lfsr_8bit.sv \ +vendor/pulp-platform/common_cells/src/stream_arbiter.sv \ +vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv \ +vendor/pulp-platform/common_cells/src/stream_mux.sv \ +vendor/pulp-platform/common_cells/src/stream_demux.sv \ +vendor/pulp-platform/common_cells/src/lzc.sv \ +vendor/pulp-platform/common_cells/src/rr_arb_tree.sv \ +vendor/pulp-platform/common_cells/src/shift_reg.sv \ +vendor/pulp-platform/common_cells/src/unread.sv \ +vendor/pulp-platform/common_cells/src/popcount.sv \ +vendor/pulp-platform/common_cells/src/exp_backoff.sv \ +vendor/pulp-platform/common_cells/src/counter.sv \ +vendor/pulp-platform/common_cells/src/delta_counter.sv \ +core/cva6.sv \ +core/cva6_rvfi_probes.sv \ +core/alu.sv \ +core/fpu_wrap.sv \ +core/branch_unit.sv \ +core/compressed_decoder.sv \ +core/controller.sv \ +core/csr_buffer.sv \ +core/csr_regfile.sv \ +core/decoder.sv \ +core/ex_stage.sv \ +core/instr_realign.sv \ +core/id_stage.sv \ +core/issue_read_operands.sv \ +core/issue_stage.sv \ +core/load_unit.sv \ +core/load_store_unit.sv \ +core/lsu_bypass.sv \ +core/mult.sv \ +core/multiplier.sv \ +core/serdiv.sv \ +core/perf_counters.sv \ +core/ariane_regfile_ff.sv \ +core/ariane_regfile_fpga.sv \ +core/scoreboard.sv \ +core/store_buffer.sv \ +core/amo_buffer.sv \ +core/store_unit.sv \ +core/commit_stage.sv \ +core/axi_shim.sv \ +core/cva6_accel_first_pass_decoder_stub.sv \ +core/acc_dispatcher_corrected.sv \ +core/frontend/btb.sv \ +core/frontend/bht.sv \ +core/frontend/ras.sv \ +core/frontend/instr_scan.sv \ +core/frontend/instr_queue.sv \ +core/frontend/frontend.sv \ +core/cache_subsystem/wt_dcache_ctrl.sv \ +core/cache_subsystem/wt_dcache_mem.sv \ +core/cache_subsystem/wt_dcache_missunit.sv \ +core/cache_subsystem/wt_dcache_wbuffer.sv \ +core/cache_subsystem/wt_dcache.sv \ +core/cache_subsystem/cva6_icache.sv \ +core/cache_subsystem/wt_cache_subsystem.sv \ +core/cache_subsystem/wt_axi_adapter.sv \ +core/cache_subsystem/tag_cmp.sv \ +core/cache_subsystem/axi_adapter.sv \ +core/cache_subsystem/miss_handler.sv \ +core/cache_subsystem/cache_ctrl.sv \ +core/cache_subsystem/cva6_icache_axi_wrapper.sv \ +core/cache_subsystem/std_cache_subsystem.sv \ +core/cache_subsystem/std_nbdcache.sv \ +core/include/cva6_hpdcache_default_config_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_demux.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sync_buffer.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fifo_reg.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_fxarb.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_rrarb.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_mux.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_sram_wmask.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_downsize.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/hpdcache_data_upsize.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv \ +core/cache_subsystem/hpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_amo.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_cmo.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_core_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_ctrl_pe.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_memarray.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_memctrl.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_miss_handler.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_plru.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_rtab.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_uncached.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf.sv \ +core/cache_subsystem/hpdcache/rtl/src/hpdcache_wbuf_wrapper.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv \ +core/cache_subsystem/hpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv \ +core/cache_subsystem/cva6_hpdcache_subsystem.sv \ +core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv \ +core/cache_subsystem/cva6_hpdcache_if_adapter.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv \ +core/cache_subsystem/hpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv \ +core/pmp/src/pmp.sv \ +core/pmp/src/pmp_entry.sv \ +common/local/util/instr_tracer_if.sv \ +common/local/util/instr_tracer.sv \ +common/local/util/tc_sram_wrapper.sv \ +vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv \ +common/local/util/sram.sv \ +core/mmu_sv39/mmu.sv \ +core/mmu_sv39/ptw.sv \ +core/mmu_sv39/tlb.sv \ +core/mmu_sv32/cva6_mmu_sv32.sv \ +core/mmu_sv32/cva6_ptw_sv32.sv \ +core/mmu_sv32/cva6_tlb_sv32.sv \ +core/mmu_sv32/cva6_shared_tlb_sv32.sv \ + core/cva6_rvfi.sv \ + corev_apu/tb/ariane_axi_pkg.sv \ + corev_apu/tb/axi_intf.sv \ + corev_apu/register_interface/src/reg_intf.sv \ + corev_apu/tb/ariane_soc_pkg.sv \ + corev_apu/riscv-dbg/src/dm_pkg.sv \ + corev_apu/tb/ariane_axi_soc_pkg.sv \ + corev_apu/src/ariane.sv \ + corev_apu/bootrom/bootrom.sv \ + corev_apu/clint/axi_lite_interface.sv \ + corev_apu/clint/clint.sv \ + corev_apu/fpga/src/axi2apb/src/axi2apb.sv \ + corev_apu/fpga/src/axi2apb/src/axi2apb_64_32.sv \ + corev_apu/fpga/src/apb_timer/apb_timer.sv \ + corev_apu/fpga/src/apb_timer/timer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_ar_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_aw_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_b_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_r_buffer.sv \ + corev_apu/fpga/src/axi_slice/src/axi_single_slice.sv \ + corev_apu/fpga/src/axi_slice/src/axi_slice.sv \ + corev_apu/fpga/src/axi_slice/src/axi_slice_wrap.sv \ + corev_apu/fpga/src/axi_slice/src/axi_w_buffer.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_res_tbl.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_amos_alu.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_atomics_wrap.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc.sv \ + corev_apu/src/axi_riscv_atomics/src/axi_riscv_lrsc_wrap.sv \ + corev_apu/axi_mem_if/src/axi2mem.sv \ + corev_apu/rv_plic/rtl/rv_plic_target.sv \ + corev_apu/rv_plic/rtl/rv_plic_gateway.sv \ + corev_apu/rv_plic/rtl/plic_regmap.sv \ + corev_apu/rv_plic/rtl/plic_top.sv \ + corev_apu/riscv-dbg/src/dmi_cdc.sv \ + corev_apu/riscv-dbg/src/dmi_jtag.sv \ + corev_apu/riscv-dbg/src/dmi_jtag_tap.sv \ + corev_apu/riscv-dbg/src/dm_csrs.sv \ + corev_apu/riscv-dbg/src/dm_mem.sv \ + corev_apu/riscv-dbg/src/dm_sba.sv \ + corev_apu/riscv-dbg/src/dm_top.sv \ + corev_apu/riscv-dbg/debug_rom/debug_rom.sv \ + corev_apu/register_interface/src/apb_to_reg.sv \ + vendor/pulp-platform/axi/src/axi_multicut.sv \ + vendor/pulp-platform/common_cells/src/rstgen_bypass.sv \ + vendor/pulp-platform/common_cells/src/rstgen.sv \ + vendor/pulp-platform/common_cells/src/addr_decode.sv \ + vendor/pulp-platform/common_cells/src/stream_register.sv \ + vendor/pulp-platform/axi/src/axi_cut.sv \ + vendor/pulp-platform/axi/src/axi_join.sv \ + vendor/pulp-platform/axi/src/axi_delayer.sv \ + vendor/pulp-platform/axi/src/axi_to_axi_lite.sv \ + vendor/pulp-platform/axi/src/axi_id_prepend.sv \ + vendor/pulp-platform/axi/src/axi_atop_filter.sv \ + vendor/pulp-platform/axi/src/axi_err_slv.sv \ + vendor/pulp-platform/axi/src/axi_mux.sv \ + vendor/pulp-platform/axi/src/axi_demux.sv \ + vendor/pulp-platform/axi/src/axi_xbar.sv \ + vendor/pulp-platform/common_cells/src/cdc_2phase.sv \ + vendor/pulp-platform/common_cells/src/spill_register_flushable.sv \ + vendor/pulp-platform/common_cells/src/spill_register.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv \ + vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv \ + vendor/pulp-platform/common_cells/src/stream_delay.sv \ + vendor/pulp-platform/common_cells/src/lfsr_16bit.sv \ + vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv \ + vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv \ + vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv \ + corev_apu/tb/ariane_testharness.sv \ + corev_apu/tb/ariane_peripherals.sv \ + corev_apu/tb/rvfi_tracer.sv \ + corev_apu/tb/common/uart.sv \ + corev_apu/tb/common/mock_uart.sv \ diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv new file mode 100644 index 00000000..e166d0bf --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv @@ -0,0 +1,794 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_cast_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, + // FPU configuration + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), + fpnew_pkg::max_int_width(IntFmtConfig)), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [WIDTH-1:0] operands_i, // 1 operand + input logic [NUM_FORMATS-1:0] is_boxed_i, // 1 operand + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS; + localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); + + localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); + + localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; + localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; + localparam int unsigned SUPER_BIAS = 2**(SUPER_EXP_BITS - 1) - 1; + + // The internal mantissa includes normal bit or an entire integer + localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SUPER_MAN_BITS + 1, MAX_INT_WIDTH); + // If needed, there will be a LZC for renormalization + localparam int unsigned LZC_RESULT_WIDTH = $clog2(INT_MAN_WIDTH); + // The internal exponent must be able to represent the smallest denormal input value as signed + // or the number of bits in an integer + localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH), + fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1; + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [WIDTH-1:0] operands_q; + logic [NUM_FORMATS-1:0] is_boxed_q; + logic op_mod_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + fpnew_pkg::int_format_e int_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_int_fmt_q[0] = int_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS]; + assign op_mod_q = inp_pipe_op_mod_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic src_is_int, dst_is_int; // if 0, it's a float + + assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F); + assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I); + + logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit + + logic [NUM_FORMATS-1:0] fmt_sign; + logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_exponent; + logic [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa; + logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info; + + logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val; + logic int_sign; + logic [INT_MAN_WIDTH-1:0] int_value, int_mantissa; + + // FP Input initialization + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 1 ) + ) i_fpnew_classifier ( + .operands_i ( operands_q[FP_WIDTH-1:0] ), + .is_boxed_i ( is_boxed_q[fmt] ), + .info_o ( info[fmt] ) + ); + + assign fmt_sign[fmt] = operands_q[FP_WIDTH-1]; + assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]}); + assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad + // Compensation for the difference in mantissa widths used for leading-zero count + assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS); + end else begin : inactive_format + assign info[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_shift_compensation[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + // Sign-extend INT input + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_sign_extend_int + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format // only active formats + always_comb begin : sign_ext_input + // sign-extend value only if it's signed + ifmt_input_val[ifmt] = '{default: operands_q[INT_WIDTH-1] & ~op_mod_q}; + ifmt_input_val[ifmt][INT_WIDTH-1:0] = operands_q[INT_WIDTH-1:0]; + end + end else begin : inactive_format + assign ifmt_input_val[ifmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + // Construct input mantissa from integer + assign int_value = ifmt_input_val[int_fmt_q]; + assign int_sign = int_value[INT_MAN_WIDTH-1] & ~op_mod_q; // only signed ints are negative + assign int_mantissa = int_sign ? unsigned'(-int_value) : int_value; // get magnitude of negative + + // select mantissa with source format + assign encoded_mant = src_is_int ? int_mantissa : fmt_mantissa[src_fmt_q]; + + // -------------- + // Normalization + // -------------- + logic signed [INT_EXP_WIDTH-1:0] src_bias; // src format bias + logic signed [INT_EXP_WIDTH-1:0] src_exp; // src format exponent (biased) + logic signed [INT_EXP_WIDTH-1:0] src_subnormal; // src is subnormal + logic signed [INT_EXP_WIDTH-1:0] src_offset; // src offset within mantissa + + assign src_bias = signed'(fpnew_pkg::bias(src_fmt_q)); + assign src_exp = fmt_exponent[src_fmt_q]; + assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal}); + assign src_offset = fmt_shift_compensation[src_fmt_q]; + + logic input_sign; // input sign + logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent + logic [INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa + logic mant_is_zero; // for integer zeroes + + logic signed [INT_EXP_WIDTH-1:0] fp_input_exp; + logic signed [INT_EXP_WIDTH-1:0] int_input_exp; + + // Input mantissa needs to be normalized + logic [LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount + logic [LZC_RESULT_WIDTH:0] renorm_shamt_sgn; // signed form for calculations + + // Leading-zero counter is needed for renormalization + lzc #( + .WIDTH ( INT_MAN_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( encoded_mant ), + .cnt_o ( renorm_shamt ), + .empty_o ( mant_is_zero ) + ); + assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt}); + + // Get the sign from the proper source + assign input_sign = src_is_int ? int_sign : fmt_sign[src_fmt_q]; + // Realign input mantissa, append zeroes if destination is wider + assign input_mant = encoded_mant << renorm_shamt; + // Unbias exponent and compensate for shift + assign fp_input_exp = signed'(src_exp + src_subnormal - src_bias - + renorm_shamt_sgn + src_offset); // compensate for shift + assign int_input_exp = signed'(INT_MAN_WIDTH - 1 - renorm_shamt_sgn); + + assign input_exp = src_is_int ? int_input_exp : fp_input_exp; + + logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination + + // Rebias the exponent + assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q)); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic input_sign_q; + logic signed [INT_EXP_WIDTH-1:0] input_exp_q; + logic [INT_MAN_WIDTH-1:0] input_mant_q; + logic signed [INT_EXP_WIDTH-1:0] destination_exp_q; + logic src_is_int_q; + logic dst_is_int_q; + fpnew_pkg::fp_info_t info_q; + logic mant_is_zero_q; + logic op_mod_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::fp_format_e src_fmt_q2; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::int_format_e int_fmt_q2; + // Internal pipeline signals, index i holds signal after i register stages + + + logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; + logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q; + logic [0:NUM_MID_REGS] mid_pipe_src_is_int_q; + logic [0:NUM_MID_REGS] mid_pipe_dst_is_int_q; + fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_q; + logic [0:NUM_MID_REGS] mid_pipe_mant_zero_q; + logic [0:NUM_MID_REGS] mid_pipe_op_mod_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_input_sign_q[0] = input_sign; + assign mid_pipe_input_exp_q[0] = input_exp; + assign mid_pipe_input_mant_q[0] = input_mant; + assign mid_pipe_dest_exp_q[0] = destination_exp; + assign mid_pipe_src_is_int_q[0] = src_is_int; + assign mid_pipe_dst_is_int_q[0] = dst_is_int; + assign mid_pipe_info_q[0] = info[src_fmt_q]; + assign mid_pipe_mant_zero_q[0] = mant_is_zero; + assign mid_pipe_op_mod_q[0] = op_mod_q; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_src_fmt_q[0] = src_fmt_q; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_int_fmt_q[0] = int_fmt_q; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0) + `FFL(mid_pipe_dest_exp_q[i+1], mid_pipe_dest_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_info_q[i+1], mid_pipe_info_q[i], reg_ena, '0) + `FFL(mid_pipe_mant_zero_q[i+1], mid_pipe_mant_zero_q[i], reg_ena, '0) + `FFL(mid_pipe_op_mod_q[i+1], mid_pipe_op_mod_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; + assign input_exp_q = mid_pipe_input_exp_q[NUM_MID_REGS]; + assign input_mant_q = mid_pipe_input_mant_q[NUM_MID_REGS]; + assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS]; + assign src_is_int_q = mid_pipe_src_is_int_q[NUM_MID_REGS]; + assign dst_is_int_q = mid_pipe_dst_is_int_q[NUM_MID_REGS]; + assign info_q = mid_pipe_info_q[NUM_MID_REGS]; + assign mant_is_zero_q = mid_pipe_mant_zero_q[NUM_MID_REGS]; + assign op_mod_q2 = mid_pipe_op_mod_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign src_fmt_q2 = mid_pipe_src_fmt_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign int_fmt_q2 = mid_pipe_int_fmt_q[NUM_MID_REGS]; + + // -------- + // Casting + // -------- + logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments + + logic [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift + logic [2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit + logic [SUPER_MAN_BITS-1:0] final_mant; // mantissa after adjustments + logic [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position + + logic [$clog2(INT_MAN_WIDTH+1)-1:0] denorm_shamt; // shift amount for denormalization + + logic [1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits; + logic of_before_round, uf_before_round; + + + // Perform adjustments to mantissa and exponent + always_comb begin : cast_value + // Default assignment + final_exp = unsigned'(destination_exp_q); // take exponent as is, only look at lower bits + preshift_mant = '0; // initialize mantissa container with zeroes + denorm_shamt = SUPER_MAN_BITS - fpnew_pkg::man_bits(dst_fmt_q2); // right of mantissa + of_before_round = 1'b0; + uf_before_round = 1'b0; + + // Place mantissa to the left of the shifter + preshift_mant = input_mant_q << (INT_MAN_WIDTH + 1); + + // Handle INT casts + if (dst_is_int_q) begin + // By default right shift mantissa to be an integer + denorm_shamt = unsigned'(MAX_INT_WIDTH - 1 - input_exp_q); + // overflow: when converting to unsigned the range is larger by one + if (input_exp_q >= signed'(fpnew_pkg::int_width(int_fmt_q2) - 1 + op_mod_q2)) begin + denorm_shamt = '0; // prevent shifting + of_before_round = 1'b1; + // underflow + end else if (input_exp_q < -1) begin + denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky + uf_before_round = 1'b1; + end + // Handle FP over-/underflows + end else begin + // Overflow or infinities (for proper rounding) + if ((destination_exp_q >= signed'(2**fpnew_pkg::exp_bits(dst_fmt_q2))-1) || + (~src_is_int_q && info_q.is_inf)) begin + final_exp = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value + preshift_mant = '1; // largest normal value and RS bits set + of_before_round = 1'b1; + // Denormalize underflowing values + end else if (destination_exp_q < 1 && + destination_exp_q >= -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin + final_exp = '0; // denormal result + denorm_shamt = unsigned'(denorm_shamt + 1 - destination_exp_q); // adjust right shifting + uf_before_round = 1'b1; + // Limit the shift to retain sticky bits + end else if (destination_exp_q < -signed'(fpnew_pkg::man_bits(dst_fmt_q2))) begin + final_exp = '0; // denormal result + denorm_shamt = unsigned'(denorm_shamt + 2 + fpnew_pkg::man_bits(dst_fmt_q2)); // to sticky + uf_before_round = 1'b1; + end + end + end + + localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - SUPER_MAN_BITS - 1; // removed mantissa, 1. and R + localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R + + // Mantissa adjustment shift + assign destination_mant = preshift_mant >> denorm_shamt; + // Extract final mantissa and round bit, discard the normal bit (for FP) + assign {final_mant, fp_round_sticky_bits[1]} = + destination_mant[2*INT_MAN_WIDTH-1-:SUPER_MAN_BITS+1]; + assign {final_int, int_round_sticky_bits[1]} = destination_mant[2*INT_MAN_WIDTH-:MAX_INT_WIDTH+1]; + // Collapse sticky bits + assign fp_round_sticky_bits[0] = (| {destination_mant[NUM_FP_STICKY-1:0]}); + assign int_round_sticky_bits[0] = (| {destination_mant[NUM_INT_STICKY-1:0]}); + + // select RS bits for destination operation + assign round_sticky_bits = dst_is_int_q ? int_round_sticky_bits : fp_round_sticky_bits; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic [WIDTH-1:0] pre_round_abs; // absolute value of result before rnd + logic of_after_round; // overflow + logic uf_after_round; // underflow + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_pre_round_abs; // per format + logic [NUM_FORMATS-1:0] fmt_of_after_round; + logic [NUM_FORMATS-1:0] fmt_uf_after_round; + + logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format + logic [NUM_INT_FORMATS-1:0] ifmt_of_after_round; + + logic rounded_sign; + logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding + logic result_true_zero; + + logic [WIDTH-1:0] rounded_int_res; // after possible inversion + logic rounded_int_res_zero; // after rounding + + + // Pack exponent and mantissa into proper rounding form + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble + // Set up some constants + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : assemble_result + fmt_pre_round_abs[fmt] = {final_exp[EXP_BITS-1:0], final_mant[MAN_BITS-1:0]}; // 0-extend + end + end else begin : inactive_format + assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Sign-extend integer result + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_res_sign_ext + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : assemble_result + // sign-extend reusult + ifmt_pre_round_abs[ifmt] = '{default: final_int[INT_WIDTH-1]}; + ifmt_pre_round_abs[ifmt][INT_WIDTH-1:0] = final_int[INT_WIDTH-1:0]; + end + end else begin : inactive_format + assign ifmt_pre_round_abs[ifmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Select output with destination format and operation + assign pre_round_abs = dst_is_int_q ? ifmt_pre_round_abs[int_fmt_q2] : fmt_pre_round_abs[dst_fmt_q2]; + + fpnew_rounding #( + .AbsWidth ( WIDTH ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( input_sign_q ), // source format + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( 1'b0 ), // no operation happened + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_true_zero ) + ); + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result; + + // Detect overflows and inject sign + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : post_process + // detect of / uf + fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. + + // Assemble regular result, nan box short ones. Int zeroes need to be detected` + fmt_result[fmt] = '1; + fmt_result[fmt][FP_WIDTH-1:0] = src_is_int_q & mant_is_zero_q + ? '0 + : {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]}; + end + end else begin : inactive_format + assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Negative integer result needs to be brought into two's complement + assign rounded_int_res = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs; + assign rounded_int_res_zero = (rounded_int_res == '0); + + // Detect integer overflows after rounding (only positives) + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_overflow + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : detect_overflow + ifmt_of_after_round[ifmt] = 1'b0; + // Int result can overflow if we're at the max exponent + if (!rounded_sign && input_exp_q == signed'(INT_WIDTH - 2 + op_mod_q2)) begin + // Check whether the rounded MSB differs from unrounded MSB + ifmt_of_after_round[ifmt] = ~rounded_int_res[INT_WIDTH-2+op_mod_q2]; + end + end + end else begin : inactive_format + assign ifmt_of_after_round[ifmt] = fpnew_pkg::DONT_CARE; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = dst_is_int_q ? ifmt_of_after_round[int_fmt_q2] : fmt_of_after_round[dst_fmt_q2]; + + // ------------------------- + // FP Special case handling + // ------------------------- + logic [WIDTH-1:0] fp_special_result; + fpnew_pkg::status_t fp_special_status; + logic fp_result_is_special; + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result; + + // Special result construction + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1; + localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : special_results + logic [FP_WIDTH-1:0] special_res; + special_res = info_q.is_zero + ? input_sign_q << FP_WIDTH-1 // signed zero + : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN + + // Initialize special result with ones (NaN-box) + fmt_special_result[fmt] = '1; + fmt_special_result[fmt][FP_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Detect special case from source format, I2F casts don't produce a special result + assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero | + info_q.is_nan | + ~info_q.is_boxed); + + // Signalling input NaNs raise invalid flag, otherwise no flags set + assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0}; + + // Assemble result according to destination format + assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format + + // -------------------------- + // INT Special case handling + // -------------------------- + logic [WIDTH-1:0] int_special_result; + fpnew_pkg::status_t int_special_status; + logic int_result_is_special; + + logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_special_result; + + // Special result construction + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_special_results_int + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : special_results + automatic logic [INT_WIDTH-1:0] special_res; + + // Default is overflow to positive max, which is 2**INT_WIDTH-1 or 2**(INT_WIDTH-1)-1 + special_res[INT_WIDTH-2:0] = '1; // alone yields 2**(INT_WIDTH-1)-1 + special_res[INT_WIDTH-1] = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1 + + // Negative special case (except for nans) tie to -max or 0 + if (input_sign_q && !info_q.is_nan) + special_res = ~special_res; + + // Initialize special result with sign-extension + ifmt_special_result[ifmt] = '{default: special_res[INT_WIDTH-1]}; + ifmt_special_result[ifmt][INT_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign ifmt_special_result[ifmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) + assign int_result_is_special = info_q.is_nan | info_q.is_inf | + of_before_round | of_after_round | ~info_q.is_boxed | + (input_sign_q & op_mod_q2 & ~rounded_int_res_zero); + + // All integer special cases are invalid + assign int_special_status = '{NV: 1'b1, default: 1'b0}; + + // Assemble result according to destination format + assign int_special_result = ifmt_special_result[int_fmt_q2]; // destination format + + // ----------------- + // Result selection + // ----------------- + fpnew_pkg::status_t int_regular_status, fp_regular_status; + + logic [WIDTH-1:0] fp_result, int_result; + fpnew_pkg::status_t fp_status, int_status; + + assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts + assign fp_regular_status.DZ = 1'b0; // no divisions + assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF + assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX; + assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f + : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round)); + assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0}; + + assign fp_result = fp_result_is_special ? fp_special_result : fmt_result[dst_fmt_q2]; + assign fp_status = fp_result_is_special ? fp_special_status : fp_regular_status; + assign int_result = int_result_is_special ? int_special_result : rounded_int_res; + assign int_status = int_result_is_special ? int_special_status : int_regular_status; + + // Final results for output pipeline + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + logic extension_bit; + + // Select output depending on special case detection + assign result_d = dst_is_int_q ? int_result : fp_result; + assign status_d = dst_is_int_q ? int_status : fp_status; + + // MSB of int result decides extension, otherwise NaN box + assign extension_bit = dst_is_int_q ? int_result[WIDTH-1] : 1'b1; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_ext_bit_q[0] = extension_bit; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv new file mode 100644 index 00000000..a322946d --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv @@ -0,0 +1,74 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_classifier #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumOperands = 1, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) +) ( + input logic [NumOperands-1:0][WIDTH-1:0] operands_i, + input logic [NumOperands-1:0] is_boxed_i, + output fpnew_pkg::fp_info_t [NumOperands-1:0] info_o +); + + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + + // Type definition + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // Iterate through all operands + for (genvar op = 0; op < int'(NumOperands); op++) begin : gen_num_values + + fp_t value; + logic is_boxed; + logic is_normal; + logic is_inf; + logic is_nan; + logic is_signalling; + logic is_quiet; + logic is_zero; + logic is_subnormal; + + // --------------- + // Classify Input + // --------------- + always_comb begin : classify_input + value = operands_i[op]; + is_boxed = is_boxed_i[op]; + is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1); + is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0); + is_subnormal = is_boxed && (value.exponent == '0) && !is_zero; + is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0)); + is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0)); + is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0); + is_quiet = is_nan && !is_signalling; + // Assign output for current input + info_o[op].is_normal = is_normal; + info_o[op].is_subnormal = is_subnormal; + info_o[op].is_zero = is_zero; + info_o[op].is_inf = is_inf; + info_o[op].is_nan = is_nan; + info_o[op].is_signalling = is_signalling; + info_o[op].is_quiet = is_quiet; + info_o[op].is_boxed = is_boxed; + end + end +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv new file mode 100644 index 00000000..0f7ea5d5 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv @@ -0,0 +1,366 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_divsqrt_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + // FPU configuration + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [1:0][WIDTH-1:0] operands_i, // 2 operands + input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + output logic divsqrt_done_o, + input logic simd_synch_done_i, + output logic divsqrt_ready_o, + input logic simd_synch_rdy_i, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [1:0][WIDTH-1:0] operands_q; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::operation_e op_q; + fpnew_pkg::fp_format_e dst_fmt_q; + logic in_valid_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign op_q = inp_pipe_op_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic [1:0] divsqrt_fmt; + logic [1:0][63:0] divsqrt_operands; // those are fixed to 64bit + logic input_is_fp8; + + // Translate fpnew formats into divsqrt formats + always_comb begin : translate_fmt + unique case (dst_fmt_q) + fpnew_pkg::FP32: divsqrt_fmt = 2'b00; + fpnew_pkg::FP64: divsqrt_fmt = 2'b01; + fpnew_pkg::FP16: divsqrt_fmt = 2'b10; + fpnew_pkg::FP16ALT: divsqrt_fmt = 2'b11; + default: divsqrt_fmt = 2'b10; // maps also FP8 to FP16 + endcase + + // Only if FP8 is enabled + input_is_fp8 = FpFmtConfig[fpnew_pkg::FP8] & (dst_fmt_q == fpnew_pkg::FP8); + + // If FP8 is supported, map it to an FP16 value + divsqrt_operands[0] = input_is_fp8 ? operands_q[0] << 8 : operands_q[0]; + divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1]; + end + + // ------------ + // Control FSM + // ------------ + + logic in_ready; // input handshake with upstream + logic div_valid, sqrt_valid; // input signalling with unit + logic unit_ready, unit_done, unit_done_q; // status signals from unit instance + logic op_starting; // high in the cycle a new operation starts + logic out_valid, out_ready; // output handshake with downstream + logic unit_busy; // valid data in flight + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Ready synch with other lanes + // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes + assign divsqrt_ready_o = in_ready; + // Upstream ready comes from sanitization FSM, and it is synched among all the lanes + assign inp_pipe_ready[NUM_INP_REGS] = simd_synch_rdy_i; + + // Valid synch with other lanes + // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes + // As soon as all the lanes are over, we can clear this FF and start with a new operation + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done_i, 1'b0, clk_i, rst_ni); + // Tell the other units that this unit has finished now or in the past + assign divsqrt_done_o = unit_done_q | unit_done; + + // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. + assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; + assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; + assign op_starting = div_valid | sqrt_valid; + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + in_ready = 1'b0; + out_valid = 1'b0; + unit_busy = 1'b0; + state_d = state_q; + + unique case (state_q) + // Waiting for work + IDLE: begin + in_ready = 1'b1; // we're ready + if (in_valid_q && unit_ready) begin // New work arrives + state_d = BUSY; // go into processing state + end + end + // Operation in progress + BUSY: begin + unit_busy = 1'b1; // data in flight + // If all the lanes are done with processing + if (simd_synch_done_i) begin + out_valid = 1'b1; // try to commit result downstream + // If downstream accepts our result + if (out_ready) begin + state_d = IDLE; // we anticipate going back to idling.. + if (in_valid_q && unit_ready) begin // ..unless new work comes in + in_ready = 1'b1; // we acknowledge the instruction + state_d = BUSY; // and stay busy with it + end + // Otherwise if downstream is not ready for the result + end else begin + state_d = HOLD; // wait for the pipeline to take the data + end + end + end + // Waiting with valid result for downstream + HOLD: begin + unit_busy = 1'b1; // data in flight + out_valid = 1'b1; // try to commit result downstream + // If the result is accepted by downstream + if (out_ready) begin + state_d = IDLE; // go back to idle.. + if (in_valid_q && unit_ready) begin // ..unless new work comes in + in_ready = 1'b1; // acknowledge the new transaction + state_d = BUSY; // will be busy with the next instruction + end + end + end + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + unit_busy = 1'b0; // data is invalidated + out_valid = 1'b0; // cancel any valid data + state_d = IDLE; // go to default state + end + end + + // FSM status register (asynch active low reset) + `FF(state_q, state_d, IDLE) + + // Hold additional information while the operation is in progress + logic result_is_fp8_q; + TagType result_tag_q; + logic result_mask_q; + AuxType result_aux_q; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) + `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) + `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) + `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) + + // ----------------- + // DIVSQRT instance + // ----------------- + logic [63:0] unit_result; + logic [WIDTH-1:0] adjusted_result, held_result_q; + fpnew_pkg::status_t unit_status, held_status_q; + logic hold_en; + + div_sqrt_top_mvp i_divsqrt_lei ( + .Clk_CI ( clk_i ), + .Rst_RBI ( rst_ni ), + .Div_start_SI ( div_valid ), + .Sqrt_start_SI ( sqrt_valid ), + .Operand_a_DI ( divsqrt_operands[0] ), + .Operand_b_DI ( divsqrt_operands[1] ), + .RM_SI ( rnd_mode_q ), + .Precision_ctl_SI ( '0 ), + .Format_sel_SI ( divsqrt_fmt ), + .Kill_SI ( flush_i ), + .Result_DO ( unit_result ), + .Fflags_SO ( unit_status ), + .Ready_SO ( unit_ready ), + .Done_SO ( unit_done ) + ); + + // Adjust result width and fix FP8 + assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + + // Hold the result when one lane has finished execution, except when all the lanes finish together + // and the result can be accepted downstream + assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready); + // The Hold register (load, no reset) + `FFLNR(held_result_q, adjusted_result, hold_en, clk_i) + `FFLNR(held_status_q, unit_status, hold_en, clk_i) + + // -------------- + // Output Select + // -------------- + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + // Prioritize hold register data + assign result_d = unit_done_q ? held_result_q : adjusted_result; + assign status_d = unit_done_q ? held_status_q : unit_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_mask_q[0] = result_mask_q; + assign out_pipe_aux_q[0] = result_aux_q; + assign out_pipe_valid_q[0] = out_valid; + // Input stage: Propagate pipeline ready signal to inside pipe + assign out_ready = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv new file mode 100644 index 00000000..c29e7b3e --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv @@ -0,0 +1,690 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_fma #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [2:0][WIDTH-1:0] operands_i, // 3 operands + input logic [2:0] is_boxed_i, // 3 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + localparam int unsigned BIAS = fpnew_pkg::bias(FpFormat); + // Precision bits 'p' include the implicit bit + localparam int unsigned PRECISION_BITS = MAN_BITS + 1; + // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection + localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3; + localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH); + // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid + // datapath leakage. This is either given by the exponent bits or the width of the LZC result. + // In most reasonable FP formats the internal exponent will be wider than the LZC result. + localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH)); + // Shift amount width: maximum internal mantissa size is 3p+4 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + + // ----------------- + // Input processing + // ----------------- + fpnew_pkg::fp_info_t [2:0] info_q; + + // Classify input + fpnew_classifier #( + .FpFormat ( FpFormat ), + .NumOperands ( 3 ) + ) i_class_inputs ( + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) + ); + + fp_t operand_a, operand_b, operand_c; + fpnew_pkg::fp_info_t info_a, info_b, info_c; + + // Operation selection and operand adjustment + // | \c op_q | \c op_mod_q | Operation Adjustment + // |:--------:|:-----------:|--------------------- + // | FMADD | \c 0 | FMADD: none + // | FMADD | \c 1 | FMSUB: Invert sign of operand C + // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A + // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C + // | ADD | \c 0 | ADD: Set operand A to +1.0 + // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C + // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode + // | *others* | \c - | *invalid* + // \note \c op_mod_q always inverts the sign of the addend. + always_comb begin : op_select + + // Default assignments - packing-order-agnostic + operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; + operand_c = inp_pipe_operands_q[NUM_INP_REGS][2]; + info_a = info_q[0]; + info_b = info_q[1]; + info_c = info_q[2]; + + // op_mod_q inverts sign of operand C + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::FMADD: ; // do nothing + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::ADD: begin // Set multiplicand to +1 + operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0}; + info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. + end + fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN + if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN) + operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0}; + else + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. + end + default: begin // propagate don't cares + operand_a = '{default: fpnew_pkg::DONT_CARE}; + operand_b = '{default: fpnew_pkg::DONT_CARE}; + operand_c = '{default: fpnew_pkg::DONT_CARE}; + info_a = '{default: fpnew_pkg::DONT_CARE}; + info_b = '{default: fpnew_pkg::DONT_CARE}; + info_c = '{default: fpnew_pkg::DONT_CARE}; + end + endcase + end + + // --------------------- + // Input classification + // --------------------- + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + logic effective_subtraction; + logic tentative_sign; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling}); + // Effective subtraction in FMA occurs when product and addend signs differ + assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign; + // The tentative sign of the FMA shall be the sign of the product + assign tentative_sign = operand_a.sign ^ operand_b.sign; + + // ---------------------- + // Special case handling + // ---------------------- + fp_t special_result; + fpnew_pkg::status_t special_status; + logic result_is_special; + + always_comb begin : special_cases + // Default assignments + special_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN + special_status = '0; + result_is_special = 1'b0; + + // Handle potentially mixed nan & infinity input => important for the case where infinity and + // zero are multiplied and added to a qnan. + // RISC-V mandates raising the NV exception in these cases: + // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs) + if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin + result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN + special_status.NV = 1'b1; // invalid operation + // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP + end else if (any_operand_nan) begin + result_is_special = 1'b1; // bypass FMA, output is the canonical qNaN + special_status.NV = signalling_nan; // raise the invalid operation flag if signalling + // Special cases involving infinity + end else if (any_operand_inf) begin + result_is_special = 1'b1; // bypass FMA + // Effective addition of opposite infinities (±inf - ±inf) is invalid! + if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction) + special_status.NV = 1'b1; // invalid operation + // Handle cases where output will be inf because of inf product input + else if (info_a.is_inf || info_b.is_inf) begin + // Result is infinity with the sign of the product + special_result = '{sign: operand_a.sign ^ operand_b.sign, exponent: '1, mantissa: '0}; + // Handle cases where the addend is inf + end else if (info_c.is_inf) begin + // Result is inifinity with sign of the addend (= operand_c) + special_result = '{sign: operand_c.sign, exponent: '1, mantissa: '0}; + end + end + end + + // --------------------------- + // Initial exponent data path + // --------------------------- + logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c; + logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference; + logic signed [EXP_WIDTH-1:0] tentative_exponent; + + // Zero-extend exponents into signed container - implicit width extension + assign exponent_a = signed'({1'b0, operand_a.exponent}); + assign exponent_b = signed'({1'b0, operand_b.exponent}); + assign exponent_c = signed'({1'b0, operand_c.exponent}); + + // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx) + // with Ex the encoded exponent and nx the implicit bit. Internal exponents stay biased. + assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm + // Biased product exponent is the sum of encoded exponents minus the bias. + assign exponent_product = (info_a.is_zero || info_b.is_zero) + ? 2 - signed'(BIAS) // in case the product is zero, set minimum exp. + : signed'(exponent_a + info_a.is_subnormal + + exponent_b + info_b.is_subnormal + - signed'(BIAS)); + // Exponent difference is the addend exponent minus the product exponent + assign exponent_difference = exponent_addend - exponent_product; + // The tentative exponent will be the larger of the product or addend exponent + assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product; + + // Shift amount for addend based on exponents (unsigned as only right shifts) + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt; + + always_comb begin : addend_shift_amount + // Product-anchored case, saturated shift (addend is only in the sticky bit) + if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1)) + addend_shamt = 3 * PRECISION_BITS + 4; + // Addend and product will have mutual bits to add + else if (exponent_difference <= signed'(PRECISION_BITS + 2)) + addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference); + // Addend-anchored case, saturated shift (product is only in the sticky bit) + else + addend_shamt = 0; + end + + // ------------------ + // Product data path + // ------------------ + logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c; + logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide + logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R) + + // Add implicit bits to mantissae + assign mantissa_a = {info_a.is_normal, operand_a.mantissa}; + assign mantissa_b = {info_b.is_normal, operand_b.mantissa}; + assign mantissa_c = {info_c.is_normal, operand_c.mantissa}; + + // Mantissa multiplier (a*b) + assign product = mantissa_a * mantissa_b; + + // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky: + // | 000...000 | product | RS | + // <- p+2 -> <- 2p -> < 2> + assign product_shifted = product << 2; // constant shift + + // ----------------- + // Addend data path + // ----------------- + logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on + logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky + logic sticky_before_add; // they are compressed into a single sticky bit + logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R) + logic inject_carry_in; // inject carry for subtractions if needed + + // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits + // are shifted out and compressed into a sticky bit. + // BEFORE THE SHIFT: + // | mantissa_c | 000..000 | + // <- p -> <- 3p+4 -> + // AFTER THE SHIFT: + // | 000..........000 | mantissa_c | 000...............0GR | sticky bits | + // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p -> + assign {addend_after_shift, addend_sticky_bits} = + (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt; + + assign sticky_before_add = (| addend_sticky_bits); + // assign addend_after_shift[0] = sticky_before_add; + + // In case of a subtraction, the addend is inverted + assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; + assign inject_carry_in = effective_subtraction & ~sticky_before_add; + + // ------ + // Adder + // ------ + logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry + logic sum_carry; // observe carry bit from sum for sign fixing + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; + + //Mantissa adder (ab+c). In normal addition, it cannot overflow. + assign sum_raw = product_shifted + addend_shifted + inject_carry_in; + assign sum_carry = sum_raw[3*PRECISION_BITS+4]; + + // Complement negative sum (can only happen in subtraction -> overflows for positive results) + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + + // In case of a mispredicted subtraction result, do a sign flip + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) + ? 1'b1 + : (effective_subtraction ? 1'b0 : tentative_sign); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; + + // -------------- + // Normalization + // -------------- + logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched + logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes + logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count + logic lzc_zeroes; // in case only zeroes found + + logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount + logic signed [EXP_WIDTH-1:0] normalized_exponent; + + logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift + logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit + logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization + logic sticky_after_norm; // sticky bit after normalization + + logic signed [EXP_WIDTH-1:0] final_exponent; + + assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0]; + + // Leading zero counter for cancellations + lzc #( + .WIDTH ( LOWER_SUM_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( sum_lower ), + .cnt_o ( leading_zero_count ), + .empty_o ( lzc_zeroes ) + ); + + assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count}); + + // Normalization shift amount based on exponents and LZC (unsigned as only left shifts) + always_comb begin : norm_shift_amount + // Product-anchored case or cancellations require LZC + if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin + // Normal result (biased exponent > 0 and not a zero) + if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin + // Undo initial product shift, remove the counted zeroes + norm_shamt = PRECISION_BITS + 2 + leading_zero_count; + normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift + // Subnormal result + end else begin + // Cap the shift distance to align mantissa with minimum exponent + norm_shamt = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q); + normalized_exponent = 0; // subnormals encoded as 0 + end + // Addend-anchored case + end else begin + norm_shamt = addend_shamt_q; // Undo the initial shift + normalized_exponent = tentative_exponent_q; + end + end + + // Do the large normalization shift + assign sum_shifted = sum_q << norm_shamt; + + // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left + // or right of the (non-carry) MSB of the sum. + always_comb begin : small_norm + // Default assignment, discarding carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted; + final_exponent = normalized_exponent; + + // The normalized sum has overflown, align right and fix exponent + if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted >> 1; + final_exponent = normalized_exponent + 1; + // The normalized sum is normal, nothing to do + end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB + // do nothing + // The normalized sum is still denormal, align left - unless the result is not already subnormal + end else if (normalized_exponent > 1) begin + {final_mantissa, sum_sticky_bits} = sum_shifted << 1; + final_exponent = normalized_exponent - 1; + // Otherwise we're denormal + end else begin + final_exponent = '0; + end + end + + // Update the sticky bit with the shifted-out bits + assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic pre_round_sign; + logic [EXP_BITS-1:0] pre_round_exponent; + logic [MAN_BITS-1:0] pre_round_mantissa; + logic [EXP_BITS+MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding + logic [1:0] round_sticky_bits; + + logic of_before_round, of_after_round; // overflow + logic uf_before_round, uf_after_round; // underflow + logic result_zero; + + logic rounded_sign; + logic [EXP_BITS+MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding + + // Classification before round. RISC-V mandates checking underflow AFTER rounding! + assign of_before_round = final_exponent >= 2**(EXP_BITS)-1; // infinity exponent is all ones + assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0 + + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign pre_round_sign = final_sign_q; + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]); + assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit + assign pre_round_abs = {pre_round_exponent, pre_round_mantissa}; + + // In case of overflow, the round and sticky bits are set for proper rounding + assign round_sticky_bits = (of_before_round) ? 2'b11 : {final_mantissa[0], sticky_after_norm}; + + // Perform the rounding + fpnew_rounding #( + .AbsWidth ( EXP_BITS + MAN_BITS ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( pre_round_sign ), + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( effective_subtraction_q ), + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_zero ) + ); + + // Classification after rounding + assign uf_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // exponent = 0 + assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones + + // ----------------- + // Result selection + // ----------------- + logic [WIDTH-1:0] regular_result; + fpnew_pkg::status_t regular_status; + + // Assemble regular result + assign regular_result = {rounded_sign, rounded_abs}; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; + + // Final results for output pipeline + fp_t result_d; + fpnew_pkg::status_t status_d; + + // Select output depending on special case detection + assign result_d = result_is_special_q ? special_result_q : regular_result; + assign status_d = result_is_special_q ? special_status_q : regular_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv new file mode 100644 index 00000000..cceeae3c --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv @@ -0,0 +1,839 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_fma_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [2:0][WIDTH-1:0] operands_i, // 3 operands + input logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands + input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + // The super-format that can hold all formats + localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); + + localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; + localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; + + // Precision bits 'p' include the implicit bit + localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1; + // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection + localparam int unsigned LOWER_SUM_WIDTH = 2 * PRECISION_BITS + 3; + localparam int unsigned LZC_RESULT_WIDTH = $clog2(LOWER_SUM_WIDTH); + // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid + // datapath leakage. This is either given by the exponent bits or the width of the LZC result. + // In most reasonable FP formats the internal exponent will be wider than the LZC result. + localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH); + // Shift amount width: maximum internal mantissa size is 3p+4 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [SUPER_EXP_BITS-1:0] exponent; + logic [SUPER_MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [2:0][WIDTH-1:0] operands_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic [NUM_FORMATS-1:0][2:0] fmt_sign; + logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent; + logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa; + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q; + + // FP Input initialization + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + logic [2:0][FP_WIDTH-1:0] trimmed_ops; + + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 3 ) + ) i_fpnew_classifier ( + .operands_i ( trimmed_ops ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ), + .info_o ( info_q[fmt] ) + ); + for (genvar op = 0; op < 3; op++) begin : gen_operands + assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0]; + assign fmt_sign[fmt][op] = operands_q[op][FP_WIDTH-1]; + assign fmt_exponent[fmt][op] = signed'({1'b0, operands_q[op][MAN_BITS+:EXP_BITS]}); + assign fmt_mantissa[fmt][op] = {info_q[fmt][op].is_normal, operands_q[op][MAN_BITS-1:0]} << + (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa + end + end else begin : inactive_format + assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + fp_t operand_a, operand_b, operand_c; + fpnew_pkg::fp_info_t info_a, info_b, info_c; + + // Operation selection and operand adjustment + // | \c op_q | \c op_mod_q | Operation Adjustment + // |:--------:|:-----------:|--------------------- + // | FMADD | \c 0 | FMADD: none + // | FMADD | \c 1 | FMSUB: Invert sign of operand C + // | FNMSUB | \c 0 | FNMSUB: Invert sign of operand A + // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C + // | ADD | \c 0 | ADD: Set operand A to +1.0 + // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C + // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode + // | *others* | \c - | *invalid* + // \note \c op_mod_q always inverts the sign of the addend. + always_comb begin : op_select + + // Default assignments - packing-order-agnostic + operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]}; + operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]}; + operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]}; + info_a = info_q[src_fmt_q][0]; + info_b = info_q[src_fmt_q][1]; + info_c = info_q[dst_fmt_q][2]; + + // op_mod_q inverts sign of operand C + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::FMADD: ; // do nothing + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::ADD: begin // Set multiplicand to +1 + operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0}; + info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. + end + fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN + if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN) + operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0}; + else + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. + end + default: begin // propagate don't cares + operand_a = '{default: fpnew_pkg::DONT_CARE}; + operand_b = '{default: fpnew_pkg::DONT_CARE}; + operand_c = '{default: fpnew_pkg::DONT_CARE}; + info_a = '{default: fpnew_pkg::DONT_CARE}; + info_b = '{default: fpnew_pkg::DONT_CARE}; + info_c = '{default: fpnew_pkg::DONT_CARE}; + end + endcase + end + + // --------------------- + // Input classification + // --------------------- + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + logic effective_subtraction; + logic tentative_sign; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf, info_c.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan, info_c.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling, info_c.is_signalling}); + // Effective subtraction in FMA occurs when product and addend signs differ + assign effective_subtraction = operand_a.sign ^ operand_b.sign ^ operand_c.sign; + // The tentative sign of the FMA shall be the sign of the product + assign tentative_sign = operand_a.sign ^ operand_b.sign; + + // ---------------------- + // Special case handling + // ---------------------- + logic [WIDTH-1:0] special_result; + fpnew_pkg::status_t special_status; + logic result_is_special; + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_special_result; + fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status; + logic [NUM_FORMATS-1:0] fmt_result_is_special; + + + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1; + localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); + localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0; + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : special_results + logic [FP_WIDTH-1:0] special_res; + + // Default assignment + special_res = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN + fmt_special_status[fmt] = '0; + fmt_result_is_special[fmt] = 1'b0; + + // Handle potentially mixed nan & infinity input => important for the case where infinity and + // zero are multiplied and added to a qnan. + // RISC-V mandates raising the NV exception in these cases: + // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs) + if ((info_a.is_inf && info_b.is_zero) || (info_a.is_zero && info_b.is_inf)) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN + fmt_special_status[fmt].NV = 1'b1; // invalid operation + // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP + end else if (any_operand_nan) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA, output is the canonical qNaN + fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling + // Special cases involving infinity + end else if (any_operand_inf) begin + fmt_result_is_special[fmt] = 1'b1; // bypass FMA + // Effective addition of opposite infinities (±inf - ±inf) is invalid! + if ((info_a.is_inf || info_b.is_inf) && info_c.is_inf && effective_subtraction) + fmt_special_status[fmt].NV = 1'b1; // invalid operation + // Handle cases where output will be inf because of inf product input + else if (info_a.is_inf || info_b.is_inf) begin + // Result is infinity with the sign of the product + special_res = {operand_a.sign ^ operand_b.sign, QNAN_EXPONENT, ZERO_MANTISSA}; + // Handle cases where the addend is inf + end else if (info_c.is_inf) begin + // Result is inifinity with sign of the addend (= operand_c) + special_res = {operand_c.sign, QNAN_EXPONENT, ZERO_MANTISSA}; + end + end + // Initialize special result with ones (NaN-box) + fmt_special_result[fmt] = '1; + fmt_special_result[fmt][FP_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_special_status[fmt] = '0; + assign fmt_result_is_special[fmt] = 1'b0; + end + end + + // Detect special case from source format, I2F casts don't produce a special result + assign result_is_special = fmt_result_is_special[dst_fmt_q]; // they're all the same + // Signalling input NaNs raise invalid flag, otherwise no flags set + assign special_status = fmt_special_status[dst_fmt_q]; + // Assemble result according to destination format + assign special_result = fmt_special_result[dst_fmt_q]; // destination format + + // --------------------------- + // Initial exponent data path + // --------------------------- + logic signed [EXP_WIDTH-1:0] exponent_a, exponent_b, exponent_c; + logic signed [EXP_WIDTH-1:0] exponent_addend, exponent_product, exponent_difference; + logic signed [EXP_WIDTH-1:0] tentative_exponent; + + // Zero-extend exponents into signed container - implicit width extension + assign exponent_a = signed'({1'b0, operand_a.exponent}); + assign exponent_b = signed'({1'b0, operand_b.exponent}); + assign exponent_c = signed'({1'b0, operand_c.exponent}); + + // Calculate internal exponents from encoded values. Real exponents are (ex = Ex - bias + 1 - nx) + // with Ex the encoded exponent and nx the implicit bit. Internal exponents are biased to dst fmt. + assign exponent_addend = signed'(exponent_c + $signed({1'b0, ~info_c.is_normal})); // 0 as subnorm + // Biased product exponent is the sum of encoded exponents minus the bias. + assign exponent_product = (info_a.is_zero || info_b.is_zero) // in case the product is zero, set minimum exp. + ? 2 - signed'(fpnew_pkg::bias(dst_fmt_q)) + : signed'(exponent_a + info_a.is_subnormal + + exponent_b + info_b.is_subnormal + - 2*signed'(fpnew_pkg::bias(src_fmt_q)) + + signed'(fpnew_pkg::bias(dst_fmt_q))); // rebias for dst fmt + // Exponent difference is the addend exponent minus the product exponent + assign exponent_difference = exponent_addend - exponent_product; + // The tentative exponent will be the larger of the product or addend exponent + assign tentative_exponent = (exponent_difference > 0) ? exponent_addend : exponent_product; + + // Shift amount for addend based on exponents (unsigned as only right shifts) + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt; + + always_comb begin : addend_shift_amount + // Product-anchored case, saturated shift (addend is only in the sticky bit) + if (exponent_difference <= signed'(-2 * PRECISION_BITS - 1)) + addend_shamt = 3 * PRECISION_BITS + 4; + // Addend and product will have mutual bits to add + else if (exponent_difference <= signed'(PRECISION_BITS + 2)) + addend_shamt = unsigned'(signed'(PRECISION_BITS) + 3 - exponent_difference); + // Addend-anchored case, saturated shift (product is only in the sticky bit) + else + addend_shamt = 0; + end + + // ------------------ + // Product data path + // ------------------ + logic [PRECISION_BITS-1:0] mantissa_a, mantissa_b, mantissa_c; + logic [2*PRECISION_BITS-1:0] product; // the p*p product is 2p bits wide + logic [3*PRECISION_BITS+3:0] product_shifted; // addends are 3p+4 bit wide (including G/R) + + // Add implicit bits to mantissae + assign mantissa_a = {info_a.is_normal, operand_a.mantissa}; + assign mantissa_b = {info_b.is_normal, operand_b.mantissa}; + assign mantissa_c = {info_c.is_normal, operand_c.mantissa}; + + // Mantissa multiplier (a*b) + assign product = mantissa_a * mantissa_b; + + // Product is placed into a 3p+4 bit wide vector, padded with 2 bits for round and sticky: + // | 000...000 | product | RS | + // <- p+2 -> <- 2p -> < 2> + assign product_shifted = product << 2; // constant shift + + // ----------------- + // Addend data path + // ----------------- + logic [3*PRECISION_BITS+3:0] addend_after_shift; // upper 3p+4 bits are needed to go on + logic [PRECISION_BITS-1:0] addend_sticky_bits; // up to p bit of shifted addend are sticky + logic sticky_before_add; // they are compressed into a single sticky bit + logic [3*PRECISION_BITS+3:0] addend_shifted; // addends are 3p+4 bit wide (including G/R) + logic inject_carry_in; // inject carry for subtractions if needed + + // In parallel, the addend is right-shifted according to the exponent difference. Up to p bits are + // shifted out and compressed into a sticky bit. + // BEFORE THE SHIFT: + // | mantissa_c | 000..000 | + // <- p -> <- 3p+4 -> + // AFTER THE SHIFT: + // | 000..........000 | mantissa_c | 000...............0GR | sticky bits | + // <- addend_shamt -> <- p -> <- 2p+4-addend_shamt -> <- up to p -> + assign {addend_after_shift, addend_sticky_bits} = + (mantissa_c << (3 * PRECISION_BITS + 4)) >> addend_shamt; + + assign sticky_before_add = (| addend_sticky_bits); + + // In case of a subtraction, the addend is inverted + assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; + assign inject_carry_in = effective_subtraction & ~sticky_before_add; + + // ------ + // Adder + // ------ + logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry + logic sum_carry; // observe carry bit from sum for sign fixing + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; + + //Mantissa adder (ab+c). In normal addition, it cannot overflow. + assign sum_raw = product_shifted + addend_shifted + inject_carry_in; + assign sum_carry = sum_raw[3*PRECISION_BITS+4]; + + // Complement negative sum (can only happen in subtraction -> overflows for positive results) + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + + // In case of a mispredicted subtraction result, do a sign flip + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) + ? 1'b1 + : (effective_subtraction ? 1'b0 : tentative_sign); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; + + // -------------- + // Normalization + // -------------- + logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched + logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes + logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count + logic lzc_zeroes; // in case only zeroes found + + logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount + logic signed [EXP_WIDTH-1:0] normalized_exponent; + + logic [3*PRECISION_BITS+4:0] sum_shifted; // result after first normalization shift + logic [PRECISION_BITS:0] final_mantissa; // final mantissa before rounding with round bit + logic [2*PRECISION_BITS+2:0] sum_sticky_bits; // remaining 2p+3 sticky bits after normalization + logic sticky_after_norm; // sticky bit after normalization + + logic signed [EXP_WIDTH-1:0] final_exponent; + + assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0]; + + // Leading zero counter for cancellations + lzc #( + .WIDTH ( LOWER_SUM_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( sum_lower ), + .cnt_o ( leading_zero_count ), + .empty_o ( lzc_zeroes ) + ); + + assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count}); + + // Normalization shift amount based on exponents and LZC (unsigned as only left shifts) + always_comb begin : norm_shift_amount + // Product-anchored case or cancellations require LZC + if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin + // Normal result (biased exponent > 0 and not a zero) + if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin + // Undo initial product shift, remove the counted zeroes + norm_shamt = PRECISION_BITS + 2 + leading_zero_count; + normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift + // Subnormal result + end else begin + // Cap the shift distance to align mantissa with minimum exponent + norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q)); + normalized_exponent = 0; // subnormals encoded as 0 + end + // Addend-anchored case + end else begin + norm_shamt = addend_shamt_q; // Undo the initial shift + normalized_exponent = tentative_exponent_q; + end + end + + // Do the large normalization shift + assign sum_shifted = sum_q << norm_shamt; + + // The addend-anchored case needs a 1-bit normalization since the leading-one can be to the left + // or right of the (non-carry) MSB of the sum. + always_comb begin : small_norm + // Default assignment, discarding carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted; + final_exponent = normalized_exponent; + + // The normalized sum has overflown, align right and fix exponent + if (sum_shifted[3*PRECISION_BITS+4]) begin // check the carry bit + {final_mantissa, sum_sticky_bits} = sum_shifted >> 1; + final_exponent = normalized_exponent + 1; + // The normalized sum is normal, nothing to do + end else if (sum_shifted[3*PRECISION_BITS+3]) begin // check the sum MSB + // do nothing + // The normalized sum is still denormal, align left - unless the result is not already subnormal + end else if (normalized_exponent > 1) begin + {final_mantissa, sum_sticky_bits} = sum_shifted << 1; + final_exponent = normalized_exponent - 1; + // Otherwise we're denormal + end else begin + final_exponent = '0; + end + end + + // Update the sticky bit with the shifted-out bits + assign sticky_after_norm = (| {sum_sticky_bits}) | sticky_before_add_q; + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic pre_round_sign; + logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding + logic [1:0] round_sticky_bits; + + logic of_before_round, of_after_round; // overflow + logic uf_before_round, uf_after_round; // underflow + + logic [NUM_FORMATS-1:0][SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] fmt_pre_round_abs; // per format + logic [NUM_FORMATS-1:0][1:0] fmt_round_sticky_bits; + + logic [NUM_FORMATS-1:0] fmt_of_after_round; + logic [NUM_FORMATS-1:0] fmt_uf_after_round; + + logic rounded_sign; + logic [SUPER_EXP_BITS+SUPER_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding + logic result_zero; + + // Classification before round. RISC-V mandates checking underflow AFTER rounding! + assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones + assign uf_before_round = final_exponent == 0; // exponent for subnormals capped to 0 + + // Pack exponent and mantissa into proper rounding form + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble + // Set up some constants + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + logic [EXP_BITS-1:0] pre_round_exponent; + logic [MAN_BITS-1:0] pre_round_mantissa; + + if (FpFmtConfig[fmt]) begin : active_format + + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0]; + assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_MAN_BITS-:MAN_BITS]; + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend + + // Round bit is after mantissa (1 in case of overflow for rounding) + assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_MAN_BITS-MAN_BITS] | + of_before_round; + + // remaining bits in mantissa to sticky (1 in case of overflow for rounding) + if (MAN_BITS < SUPER_MAN_BITS) begin : narrow_sticky + assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_MAN_BITS-MAN_BITS-1:0]) | + sticky_after_norm | of_before_round; + end else begin : normal_sticky + assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round; + end + end else begin : inactive_format + assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign pre_round_sign = final_sign_q; + assign pre_round_abs = fmt_pre_round_abs[dst_fmt_q2]; + + // In case of overflow, the round and sticky bits are set for proper rounding + assign round_sticky_bits = fmt_round_sticky_bits[dst_fmt_q2]; + + // Perform the rounding + fpnew_rounding #( + .AbsWidth ( SUPER_EXP_BITS + SUPER_MAN_BITS ) + ) i_fpnew_rounding ( + .abs_value_i ( pre_round_abs ), + .sign_i ( pre_round_sign ), + .round_sticky_bits_i ( round_sticky_bits ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( effective_subtraction_q ), + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_zero ) + ); + + logic [NUM_FORMATS-1:0][WIDTH-1:0] fmt_result; + + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpFmtConfig[fmt]) begin : active_format + always_comb begin : post_process + // detect of / uf + fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. + + // Assemble regular result, nan box short ones. + fmt_result[fmt] = '1; + fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]}; + end + end else begin : inactive_format + assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = fmt_of_after_round[dst_fmt_q2]; + + + // ----------------- + // Result selection + // ----------------- + logic [WIDTH-1:0] regular_result; + fpnew_pkg::status_t regular_status; + + // Assemble regular result + assign regular_result = fmt_result[dst_fmt_q2]; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; + + // Final results for output pipeline + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + + // Select output depending on special case detection + assign result_d = result_is_special_q ? special_result_q : regular_result; + assign status_d = result_is_special_q ? special_status_q : regular_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv new file mode 100644 index 00000000..8a182617 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv @@ -0,0 +1,415 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_noncomp #( + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + + localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [1:0][WIDTH-1:0] operands_i, // 2 operands + input logic [1:0] is_boxed_i, // 2 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output fpnew_pkg::classmask_e class_mask_o, + output logic is_class_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [EXP_BITS-1:0] exponent; + logic [MAN_BITS-1:0] mantissa; + } fp_t; + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + + // --------------------- + // Input classification + // --------------------- + fpnew_pkg::fp_info_t [1:0] info_q; + + // Classify input + fpnew_classifier #( + .FpFormat ( FpFormat ), + .NumOperands ( 2 ) + ) i_class_a ( + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) + ); + + fp_t operand_a, operand_b; + fpnew_pkg::fp_info_t info_a, info_b; + + // Packing-order-agnostic assignments + assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; + assign info_a = info_q[0]; + assign info_b = info_q[1]; + + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + + // Reduction for special case handling + assign any_operand_inf = (| {info_a.is_inf, info_b.is_inf}); + assign any_operand_nan = (| {info_a.is_nan, info_b.is_nan}); + assign signalling_nan = (| {info_a.is_signalling, info_b.is_signalling}); + + logic operands_equal, operand_a_smaller; + + // Equality checks for zeroes too + assign operands_equal = (operand_a == operand_b) || (info_a.is_zero && info_b.is_zero); + // Invert result if non-zero signs involved (unsigned comparison) + assign operand_a_smaller = (operand_a < operand_b) ^ (operand_a.sign || operand_b.sign); + + // --------------- + // Sign Injection + // --------------- + fp_t sgnj_result; + fpnew_pkg::status_t sgnj_status; + logic sgnj_extension_bit; + + // Sign Injection - operation is encoded in rnd_mode_q: + // RNE = SGNJ, RTZ = SGNJN, RDN = SGNJX, RUP = Passthrough (no NaN-box check) + always_comb begin : sign_injections + logic sign_a, sign_b; // internal signs + // Default assignment + sgnj_result = operand_a; // result based on operand a + + // NaN-boxing check will treat invalid inputs as canonical NaNs + if (!info_a.is_boxed) sgnj_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; + + // Internal signs are treated as positive in case of non-NaN-boxed values + sign_a = operand_a.sign & info_a.is_boxed; + sign_b = operand_b.sign & info_b.is_boxed; + + // Do the sign injection based on rm field + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: sgnj_result.sign = sign_b; // SGNJ + fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b; // SGNJN + fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX + fpnew_pkg::RUP: sgnj_result = operand_a; // passthrough + default: sgnj_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + + assign sgnj_status = '0; // sign injections never raise exceptions + + // op_mod_q enables integer sign-extension of result (for storing to integer regfile) + assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1; + + // ------------------ + // Minimum / Maximum + // ------------------ + fp_t minmax_result; + fpnew_pkg::status_t minmax_status; + logic minmax_extension_bit; + + // Minimum/Maximum - operation is encoded in rnd_mode_q: + // RNE = MIN, RTZ = MAX + always_comb begin : min_max + // Default assignment + minmax_status = '0; + + // Min/Max use quiet comparisons - only sNaN are invalid + minmax_status.NV = signalling_nan; + + // Both NaN inputs cause a NaN output + if (info_a.is_nan && info_b.is_nan) + minmax_result = '{sign: 1'b0, exponent: '1, mantissa: 2**(MAN_BITS-1)}; // canonical qNaN + // If one operand is NaN, the non-NaN operand is returned + else if (info_a.is_nan) minmax_result = operand_b; + else if (info_b.is_nan) minmax_result = operand_a; + // Otherwise decide according to the operation + else begin + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN + fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX + default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + end + + assign minmax_extension_bit = 1'b1; // NaN-box as result is always a float value + + // ------------ + // Comparisons + // ------------ + fp_t cmp_result; + fpnew_pkg::status_t cmp_status; + logic cmp_extension_bit; + + // Comparisons - operation is encoded in rnd_mode_q: + // RNE = LE, RTZ = LT, RDN = EQ + // op_mod_q inverts boolean outputs + always_comb begin : comparisons + // Default assignment + cmp_result = '0; // false + cmp_status = '0; // no flags + + // Signalling NaNs always compare as false and are illegal + if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation + // Otherwise do comparisons + else begin + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) + fpnew_pkg::RNE: begin // Less than or equal + if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid + else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + fpnew_pkg::RTZ: begin // Less than + if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid + else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + fpnew_pkg::RDN: begin // Equal + if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal + else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + end + default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care + endcase + end + end + + assign cmp_extension_bit = 1'b0; // Comparisons always produce booleans in integer registers + + // --------------- + // Classification + // --------------- + fpnew_pkg::status_t class_status; + logic class_extension_bit; + fpnew_pkg::classmask_e class_mask_d; // the result is actually here + + // Classification - always return the classification mask on the dedicated port + always_comb begin : classify + if (info_a.is_normal) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGNORM : fpnew_pkg::POSNORM; + end else if (info_a.is_subnormal) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGSUBNORM : fpnew_pkg::POSSUBNORM; + end else if (info_a.is_zero) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGZERO : fpnew_pkg::POSZERO; + end else if (info_a.is_inf) begin + class_mask_d = operand_a.sign ? fpnew_pkg::NEGINF : fpnew_pkg::POSINF; + end else if (info_a.is_nan) begin + class_mask_d = info_a.is_signalling ? fpnew_pkg::SNAN : fpnew_pkg::QNAN; + end else begin + class_mask_d = fpnew_pkg::QNAN; // default value + end + end + + assign class_status = '0; // classification does not set flags + assign class_extension_bit = 1'b0; // classification always produces results in integer registers + + // ----------------- + // Result selection + // ----------------- + fp_t result_d; + fpnew_pkg::status_t status_d; + logic extension_bit_d; + logic is_class_d; + + // Select result + always_comb begin : select_result + unique case (inp_pipe_op_q[NUM_INP_REGS]) + fpnew_pkg::SGNJ: begin + result_d = sgnj_result; + status_d = sgnj_status; + extension_bit_d = sgnj_extension_bit; + end + fpnew_pkg::MINMAX: begin + result_d = minmax_result; + status_d = minmax_status; + extension_bit_d = minmax_extension_bit; + end + fpnew_pkg::CMP: begin + result_d = cmp_result; + status_d = cmp_status; + extension_bit_d = cmp_extension_bit; + end + fpnew_pkg::CLASSIFY: begin + result_d = '{default: fpnew_pkg::DONT_CARE}; // unused + status_d = class_status; + extension_bit_d = class_extension_bit; + end + default: begin + result_d = '{default: fpnew_pkg::DONT_CARE}; // dont care + status_d = '{default: fpnew_pkg::DONT_CARE}; // dont care + extension_bit_d = fpnew_pkg::DONT_CARE; // dont care + end + endcase + end + + assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY); + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; + fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; + logic [0:NUM_OUT_REGS] out_pipe_is_class_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_extension_bit_q[0] = extension_bit_d; + assign out_pipe_class_mask_q[0] = class_mask_d; + assign out_pipe_is_class_q[0] = is_class_d; + assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) + `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) + `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; + assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; + assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv new file mode 100644 index 00000000..2633406f --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv @@ -0,0 +1,244 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_opgroup_block #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, + // FPU configuration + parameter int unsigned Width = 32, + parameter logic EnableVectors = 1'b1, + parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1, + parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0}, + parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL}, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, + // Do not change + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtMask, EnableVectors), + localparam type MaskType = logic [NUM_LANES-1:0] +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + input MaskType simd_mask_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------------- + // Type Definition + // ---------------- + typedef struct packed { + logic [Width-1:0] result; + fpnew_pkg::status_t status; + logic ext_bit; + TagType tag; + } output_t; + + // Handshake signals for the slices + logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy; + output_t [NUM_FORMATS-1:0] fmt_outputs; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = in_valid_i & fmt_in_ready[dst_fmt_i]; // Ready is given by selected format + + // ------------------------- + // Generate Parallel Slices + // ------------------------- + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices + // Some constants for this format + localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask); + localparam logic IS_FIRST_MERGED = + fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask); + + // Generate slice only if format enabled + if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format + + logic in_valid; + + assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format + + // Forward masks related to the right SIMD lane + localparam int unsigned INTERNAL_LANES = fpnew_pkg::num_lanes(Width, fpnew_pkg::fp_format_e'(fmt), EnableVectors); + logic [INTERNAL_LANES-1:0] mask_slice; + always_comb for (int b = 0; b < INTERNAL_LANES; b++) mask_slice[b] = simd_mask_i[(NUM_LANES/INTERNAL_LANES)*b]; + + fpnew_opgroup_fmt_slice #( + .OpGroup ( OpGroup ), + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .Width ( Width ), + .EnableVectors ( EnableVectors ), + .NumPipeRegs ( FmtPipeRegs[fmt] ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .TrueSIMDClass ( TrueSIMDClass ) + ) i_fmt_slice ( + .clk_i, + .rst_ni, + .operands_i ( operands_i ), + .is_boxed_i ( is_boxed_i[fmt] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .vectorial_op_i, + .tag_i, + .simd_mask_i ( mask_slice ), + .in_valid_i ( in_valid ), + .in_ready_o ( fmt_in_ready[fmt] ), + .flush_i, + .result_o ( fmt_outputs[fmt].result ), + .status_o ( fmt_outputs[fmt].status ), + .extension_bit_o( fmt_outputs[fmt].ext_bit ), + .tag_o ( fmt_outputs[fmt].tag ), + .out_valid_o ( fmt_out_valid[fmt] ), + .out_ready_i ( fmt_out_ready[fmt] ), + .busy_o ( fmt_busy[fmt] ) + ); + // If the format wants to use merged ops, tie off the dangling ones not used here + end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused + + localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask); + // Ready is split up into formats + assign fmt_in_ready[fmt] = fmt_in_ready[int'(FMT)]; + + assign fmt_out_valid[fmt] = 1'b0; // don't emit values + assign fmt_busy[fmt] = 1'b0; // never busy + // Outputs are don't care + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); + + // Tie off disabled formats + end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt + assign fmt_in_ready[fmt] = 1'b0; // don't accept operations + assign fmt_out_valid[fmt] = 1'b0; // don't emit values + assign fmt_busy[fmt] = 1'b0; // never busy + // Outputs are don't care + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); + end + end + + // ---------------------- + // Generate Merged Slice + // ---------------------- + if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice + + localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask); + localparam REG = fpnew_pkg::get_num_regs_multi(FmtPipeRegs, FmtUnitTypes, FpFmtMask); + + logic in_valid; + + assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED); + + fpnew_opgroup_multifmt_slice #( + .OpGroup ( OpGroup ), + .Width ( Width ), + .FpFmtConfig ( FpFmtMask ), + .IntFmtConfig ( IntFmtMask ), + .EnableVectors ( EnableVectors ), + .NumPipeRegs ( REG ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ) + ) i_multifmt_slice ( + .clk_i, + .rst_ni, + .operands_i, + .is_boxed_i, + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .vectorial_op_i, + .tag_i, + .simd_mask_i ( simd_mask_i ), + .in_valid_i ( in_valid ), + .in_ready_o ( fmt_in_ready[FMT] ), + .flush_i, + .result_o ( fmt_outputs[FMT].result ), + .status_o ( fmt_outputs[FMT].status ), + .extension_bit_o ( fmt_outputs[FMT].ext_bit ), + .tag_o ( fmt_outputs[FMT].tag ), + .out_valid_o ( fmt_out_valid[FMT] ), + .out_ready_i ( fmt_out_ready[FMT] ), + .busy_o ( fmt_busy[FMT] ) + ); + + end + + // ------------------ + // Arbitrate Outputs + // ------------------ + output_t arbiter_output; + + // Round-Robin arbiter to decide which result to use + rr_arb_tree #( + .NumIn ( NUM_FORMATS ), + .DataType ( output_t ), + .AxiVldRdy ( 1'b1 ) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( '0 ), + .req_i ( fmt_out_valid ), + .gnt_o ( fmt_out_ready ), + .data_i ( fmt_outputs ), + .gnt_i ( out_ready_i ), + .req_o ( out_valid_o ), + .data_o ( arbiter_output ), + .idx_o ( /* unused */ ) + ); + + // Unpack output + assign result_o = arbiter_output.result; + assign status_o = arbiter_output.status; + assign extension_bit_o = arbiter_output.ext_bit; + assign tag_o = arbiter_output.tag; + + assign busy_o = (| fmt_busy); + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv new file mode 100644 index 00000000..35fbe484 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv @@ -0,0 +1,292 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_opgroup_fmt_slice #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, + parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), + // FPU configuration + parameter int unsigned Width = 32, + parameter logic EnableVectors = 1'b1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, + // Do not change + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors), + localparam type MaskType = logic [NUM_LANES-1:0] +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input logic vectorial_op_i, + input TagType tag_i, + input MaskType simd_mask_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); + localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); + + + logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes + logic vectorial_op; + + logic [NUM_LANES*FP_WIDTH-1:0] slice_result; + logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result; + + fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; + logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used + fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; + TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_masks; + logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito + + logic result_is_vector, result_is_class; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane + assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + + // --------------- + // Generate Lanes + // --------------- + for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes + logic [FP_WIDTH-1:0] local_result; // lane-local results + logic local_sign; + + // Generate instances only if needed, lane 0 always generated + if ((lane == 0) || EnableVectors) begin : active_lane + logic in_valid, out_valid, out_ready; // lane-local handshake + + logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands + logic [FP_WIDTH-1:0] op_result; // lane-local results + fpnew_pkg::status_t op_status; + + assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + // Slice out the operands for this lane + always_comb begin : prepare_input + for (int i = 0; i < int'(NUM_OPERANDS); i++) begin + local_operands[i] = operands_i[i][(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH]; + end + end + + // Instantiate the operation from the selected opgroup + if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance + fpnew_fma #( + .FpFormat ( FpFormat ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic ) + ) i_fma ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( vectorial_op ), // Remember whether operation was vectorial + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_vectorial[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + assign lane_is_class[lane] = 1'b0; + assign lane_class_mask[lane] = fpnew_pkg::NEGINF; + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance + // fpnew_divsqrt #( + // .FpFormat (FpFormat), + // .NumPipeRegs(NumPipeRegs), + // .PipeConfig (PipeConfig), + // .TagType (TagType), + // .AuxType (logic) + // ) i_divsqrt ( + // .clk_i, + // .rst_ni, + // .operands_i ( local_operands ), + // .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + // .rnd_mode_i, + // .op_i, + // .op_mod_i, + // .tag_i, + // .aux_i ( vectorial_op ), // Remember whether operation was vectorial + // .in_valid_i ( in_valid ), + // .in_ready_o ( lane_in_ready[lane] ), + // .flush_i, + // .result_o ( op_result ), + // .status_o ( op_status ), + // .extension_bit_o ( lane_ext_bit[lane] ), + // .tag_o ( lane_tags[lane] ), + // .aux_o ( lane_vectorial[lane] ), + // .out_valid_o ( out_valid ), + // .out_ready_i ( out_ready ), + // .busy_o ( lane_busy[lane] ) + // ); + // assign lane_is_class[lane] = 1'b0; + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + fpnew_noncomp #( + .FpFormat (FpFormat), + .NumPipeRegs(NumPipeRegs), + .PipeConfig (PipeConfig), + .TagType (TagType), + .AuxType (logic) + ) i_noncomp ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( vectorial_op ), // Remember whether operation was vectorial + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .class_mask_o ( lane_class_mask[lane] ), + .is_class_o ( lane_is_class[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_vectorial[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end // ADD OTHER OPTIONS HERE + + // Handshakes are only done if the lane is actually used + assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); + assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + + // Properly NaN-box or sign-extend the slice result if not in use + assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + + // Otherwise generate constant sign-extension + end else begin + assign lane_out_valid[lane] = 1'b0; // unused lane + assign lane_in_ready[lane] = 1'b0; // unused lane + assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box + assign lane_status[lane] = '0; + assign lane_busy[lane] = 1'b0; + assign lane_is_class[lane] = 1'b0; + end + + // Insert lane result into slice result + assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result; + + // Create Classification results + if (TrueSIMDClass && SIMD_WIDTH >= 10) begin : vectorial_true_class // true vectorial class blocks are 10bits in size + assign slice_vec_class_result[lane*SIMD_WIDTH +: 10] = lane_class_mask[lane]; + assign slice_vec_class_result[(lane+1)*SIMD_WIDTH-1 -: SIMD_WIDTH-10] = '0; + end else if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size + assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF || + lane_class_mask[lane] == fpnew_pkg::NEGNORM || + lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM || + lane_class_mask[lane] == fpnew_pkg::NEGZERO); + // Write the current block segment + assign slice_vec_class_result[(lane+1)*8-1:lane*8] = { + local_sign, // BIT 7 + ~local_sign, // BIT 6 + lane_class_mask[lane] == fpnew_pkg::QNAN, // BIT 5 + lane_class_mask[lane] == fpnew_pkg::SNAN, // BIT 4 + lane_class_mask[lane] == fpnew_pkg::POSZERO + || lane_class_mask[lane] == fpnew_pkg::NEGZERO, // BIT 3 + lane_class_mask[lane] == fpnew_pkg::POSSUBNORM + || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM, // BIT 2 + lane_class_mask[lane] == fpnew_pkg::POSNORM + || lane_class_mask[lane] == fpnew_pkg::NEGNORM, // BIT 1 + lane_class_mask[lane] == fpnew_pkg::POSINF + || lane_class_mask[lane] == fpnew_pkg::NEGINF // BIT 0 + }; + end + end + + // ------------ + // Output Side + // ------------ + assign result_is_vector = lane_vectorial[0]; + assign result_is_class = lane_is_class[0]; + + assign slice_regular_result = $signed({extension_bit_o, slice_result}); + + localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8; + + // Pad out unused vec_class bits if each classify result is on 8 bits + if (!(TrueSIMDClass && SIMD_WIDTH >= 10)) begin + if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class + assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + end + end + + // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; + + assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0]; + + // Select the proper result + assign result_o = result_is_class ? slice_class_result : slice_regular_result; + + assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused + assign tag_o = lane_tags[0]; // upper lanes unused + assign busy_o = (| lane_busy); + assign out_valid_o = lane_out_valid[0]; // upper lanes unused + + + // Collapse the lane status + always_comb begin : output_processing + // Collapse the status + automatic fpnew_pkg::status_t temp_status; + temp_status = '0; + for (int i = 0; i < int'(NUM_LANES); i++) + temp_status |= lane_status[i] & {5{lane_masks[i]}}; + status_o = temp_status; + end +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv new file mode 100644 index 00000000..08facb83 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv @@ -0,0 +1,449 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +`include "common_cells/registers.svh" + +module fpnew_opgroup_multifmt_slice #( + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, + parameter int unsigned Width = 64, + // FPU configuration + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, + parameter logic EnableVectors = 1'b1, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + // Do not change + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors), + localparam type MaskType = logic [NUM_SIMD_LANES-1:0] +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i, + input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + input MaskType simd_mask_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [Width-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig); + localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); + localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1); + localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS; + // We will send the format information along with the data + localparam int unsigned FMT_BITS = + fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); + localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags + + logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes + logic vectorial_op; + logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation + logic [AUX_BITS-1:0] aux_data; + + // additional flags for CONV + logic dst_fmt_is_int, dst_is_cpk; + logic [1:0] dst_vec_op; // info for vectorial results (for packing) + logic [2:0] target_aux_d, target_aux_q; + logic is_up_cast, is_down_cast; + + logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result; + logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result; + logic [Width-1:0] conv_slice_result; + + + logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register + + fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; + logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used + TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_masks; + logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used + logic [NUM_LANES-1:0] lane_busy; // dito + + logic result_is_vector; + logic [FMT_BITS-1:0] result_fmt; + logic result_fmt_is_int, result_is_cpk; + logic [1:0] result_vec_op; // info for vectorial results (for packing) + + logic simd_synch_rdy, simd_synch_done; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane + assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + + // Cast-and-Pack ops are encoded in operation and modifier + assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I); + assign dst_is_cpk = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB || + op_i == fpnew_pkg::CPKCD); + assign dst_vec_op = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i}; + + assign is_up_cast = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i)); + assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i)); + + // The destination format is the int format for F2I casts + assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i; + + // The data sent along consists of the vectorial flag and format bits + assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt}; + assign target_aux_d = {dst_vec_op, dst_is_cpk}; + + // CONV passes one operand for assembly after the unit: opC for cpk, opB for others + if (OpGroup == fpnew_pkg::CONV) begin : conv_target + assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1]; + end + + // For 2-operand units, prepare boxing info + logic [NUM_FORMATS-1:0] is_boxed_1op; + logic [NUM_FORMATS-1:0][1:0] is_boxed_2op; + + always_comb begin : boxed_2op + for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin + is_boxed_1op[fmt] = is_boxed_i[fmt][0]; + is_boxed_2op[fmt] = is_boxed_i[fmt][1:0]; + end + end + + // --------------- + // Generate Lanes + // --------------- + for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes + localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter + // Get a mask of active formats for this lane + localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS = + fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE); + localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS = + fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE); + localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS); + + // Cast-specific parameters + localparam fpnew_pkg::fmt_logic_t CONV_FORMATS = + fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE); + localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS = + fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE); + localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS); + + // Lane parameters from Opgroup + localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV) + ? CONV_FORMATS : ACTIVE_FORMATS; + localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH; + + logic [LANE_WIDTH-1:0] local_result; // lane-local results + + // Generate instances only if needed, lane 0 always generated + if ((lane == 0) || EnableVectors) begin : active_lane + logic in_valid, out_valid, out_ready; // lane-local handshake + + logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands + logic [LANE_WIDTH-1:0] op_result; // lane-local results + fpnew_pkg::status_t op_status; + + assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + + // Slice out the operands for this lane, upper bits are ignored in the unit + always_comb begin : prepare_input + for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin + local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i); + end + + // override operand 0 for some conversions + if (OpGroup == fpnew_pkg::CONV) begin + // Source is an integer + if (op_i == fpnew_pkg::I2F) begin + local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i); + // vectorial F2F up casts + end else if (op_i == fpnew_pkg::F2F) begin + if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half + local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) + + MAX_FP_WIDTH/2; + end + // CPK + end else if (dst_is_cpk) begin + if (lane == 1) begin + local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument + end + end + end + end + + // Instantiate the operation from the selected opgroup + if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance + fpnew_fma_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_fma_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands ), + .is_boxed_i, + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance + fpnew_divsqrt_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_divsqrt_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i, + .op_i, + .dst_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .divsqrt_done_o ( divsqrt_done[lane] ), + .simd_synch_done_i( simd_synch_done ), + .divsqrt_ready_o ( divsqrt_ready[lane]), + .simd_synch_rdy_i( simd_synch_rdy ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + + end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance + fpnew_cast_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .IntFmtConfig ( CONV_INT_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_cast_multi ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[0] ), + .is_boxed_i ( is_boxed_1op ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); + end // ADD OTHER OPTIONS HERE + + // Handshakes are only done if the lane is actually used + assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); + assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + + // Properly NaN-box or sign-extend the slice result if not in use + assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + + // Otherwise generate constant sign-extension + end else begin : inactive_lane + assign lane_out_valid[lane] = 1'b0; // unused lane + assign lane_in_ready[lane] = 1'b0; // unused lane + assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box + assign lane_status[lane] = '0; + assign lane_busy[lane] = 1'b0; + end + + // Generate result packing depending on float format + for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + // only for active formats within the lane + if (ACTIVE_FORMATS[fmt]) begin + assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] = + local_result[FP_WIDTH-1:0]; + end else if ((LANE+1)*FP_WIDTH <= Width) begin + assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] = + '{default: lane_ext_bit[LANE]}; + end else if (LANE*FP_WIDTH < Width) begin + assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] = + '{default: lane_ext_bit[LANE]}; + end + end + + // Generate result packing depending on integer format + if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + if (ACTIVE_INT_FORMATS[ifmt]) begin + assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = + local_result[INT_WIDTH-1:0]; + end else if ((LANE+1)*INT_WIDTH <= Width) begin + assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0; + end else if (LANE*INT_WIDTH < Width) begin + assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0; + end + end + end + end + + // Extend slice result if needed + for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + if (NUM_LANES*FP_WIDTH < Width) + assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]}; + end + + // Mute int results if unused + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled + if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result + assign ifmt_slice_result[ifmt] = '0; + end + end + + // Bypass lanes with target operand for vectorial casts + if (OpGroup == fpnew_pkg::CONV) begin : target_regs + // Bypass pipeline signals, index i holds signal after i register stages + logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; + logic [0:NumPipeRegs][2:0] byp_pipe_aux_q; + logic [0:NumPipeRegs] byp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] byp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign byp_pipe_target_q[0] = conv_target_d; + assign byp_pipe_aux_q[0] = target_aux_d; + assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) + `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs + assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; + + // decode the aux data + assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs]; + end else begin : no_conv + assign {result_vec_op, result_is_cpk} = '0; + end + + // Synch lanes if there is more than one + assign simd_synch_rdy = EnableVectors ? &divsqrt_ready : divsqrt_ready[0]; + assign simd_synch_done = EnableVectors ? &divsqrt_done : divsqrt_done[0]; + + // ------------ + // Output Side + // ------------ + assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0]; + + assign result_o = result_fmt_is_int + ? ifmt_slice_result[result_fmt] + : fmt_slice_result[result_fmt]; + + assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones + assign tag_o = lane_tags[0]; // don't care about upper ones + assign busy_o = (| lane_busy); + + assign out_valid_o = lane_out_valid[0]; // don't care about upper ones + + // Collapse the status + always_comb begin : output_processing + // Collapse the status + automatic fpnew_pkg::status_t temp_status; + temp_status = '0; + for (int i = 0; i < int'(NUM_LANES); i++) + temp_status |= lane_status[i] & {5{lane_masks[i]}}; + status_o = temp_status; + end + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv new file mode 100644 index 00000000..7addc3e9 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv @@ -0,0 +1,495 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +package fpnew_pkg; + + // --------- + // FP TYPES + // --------- + // | Enumerator | Format | Width | EXP_BITS | MAN_BITS + // |:----------:|------------------|-------:|:--------:|:--------: + // | FP32 | IEEE binary32 | 32 bit | 8 | 23 + // | FP64 | IEEE binary64 | 64 bit | 11 | 52 + // | FP16 | IEEE binary16 | 16 bit | 5 | 10 + // | FP8 | binary8 | 8 bit | 5 | 2 + // | FP16ALT | binary16alt | 16 bit | 8 | 7 + // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty! + + // Encoding for a format + typedef struct packed { + int unsigned exp_bits; + int unsigned man_bits; + } fp_encoding_t; + + localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats + localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS); + + // FP formats + typedef enum logic [FP_FORMAT_BITS-1:0] { + FP32 = 'd0, + FP64 = 'd1, + FP16 = 'd2, + FP8 = 'd3, + FP16ALT = 'd4 + // add new formats here + } fp_format_e; + + // Encodings for supported FP formats + localparam fp_encoding_t [0:NUM_FP_FORMATS-1] FP_ENCODINGS = '{ + '{8, 23}, // IEEE binary32 (single) + '{11, 52}, // IEEE binary64 (double) + '{5, 10}, // IEEE binary16 (half) + '{5, 2}, // custom binary8 + '{8, 7} // custom binary16alt + // add new formats here + }; + + typedef logic [0:NUM_FP_FORMATS-1] fmt_logic_t; // Logic indexed by FP format (for masks) + typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format + + localparam fmt_logic_t CPK_FORMATS = 5'b11000; // FP32 and FP64 can provide CPK only + + // --------- + // INT TYPES + // --------- + // | Enumerator | Width | + // |:----------:|-------:| + // | INT8 | 8 bit | + // | INT16 | 16 bit | + // | INT32 | 32 bit | + // | INT64 | 64 bit | + // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty! + + localparam int unsigned NUM_INT_FORMATS = 4; // change me to add formats + localparam int unsigned INT_FORMAT_BITS = $clog2(NUM_INT_FORMATS); + + // Int formats + typedef enum logic [INT_FORMAT_BITS-1:0] { + INT8, + INT16, + INT32, + INT64 + // add new formats here + } int_format_e; + + // Returns the width of an INT format by index + function automatic int unsigned int_width(int_format_e ifmt); + unique case (ifmt) + INT8: return 8; + INT16: return 16; + INT32: return 32; + INT64: return 64; + default: begin + // pragma translate_off + $fatal(1, "Invalid INT format supplied"); + // pragma translate_on + // just return any integer to avoid any latches + // hopefully this error is caught by simulation + return INT8; + end + endcase + endfunction + + typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks) + + // -------------- + // FP OPERATIONS + // -------------- + localparam int unsigned NUM_OPGROUPS = 4; + + // Each FP operation belongs to an operation group + typedef enum logic [1:0] { + ADDMUL, DIVSQRT, NONCOMP, CONV + } opgroup_e; + + localparam int unsigned OP_BITS = 4; + + typedef enum logic [OP_BITS-1:0] { + FMADD, FNMSUB, ADD, MUL, // ADDMUL operation group + DIV, SQRT, // DIVSQRT operation group + SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group + F2F, F2I, I2F, CPKAB, CPKCD // CONV operation group + } operation_e; + + // ------------------- + // RISC-V FP-SPECIFIC + // ------------------- + // Rounding modes + typedef enum logic [2:0] { + RNE = 3'b000, + RTZ = 3'b001, + RDN = 3'b010, + RUP = 3'b011, + RMM = 3'b100, + ROD = 3'b101, // This mode is not defined in RISC-V FP-SPEC + DYN = 3'b111 + } roundmode_e; + + // Status flags + typedef struct packed { + logic NV; // Invalid + logic DZ; // Divide by zero + logic OF; // Overflow + logic UF; // Underflow + logic NX; // Inexact + } status_t; + + // Information about a floating point value + typedef struct packed { + logic is_normal; // is the value normal + logic is_subnormal; // is the value subnormal + logic is_zero; // is the value zero + logic is_inf; // is the value infinity + logic is_nan; // is the value NaN + logic is_signalling; // is the value a signalling NaN + logic is_quiet; // is the value a quiet NaN + logic is_boxed; // is the value properly NaN-boxed (RISC-V specific) + } fp_info_t; + + // Classification mask + typedef enum logic [9:0] { + NEGINF = 10'b00_0000_0001, + NEGNORM = 10'b00_0000_0010, + NEGSUBNORM = 10'b00_0000_0100, + NEGZERO = 10'b00_0000_1000, + POSZERO = 10'b00_0001_0000, + POSSUBNORM = 10'b00_0010_0000, + POSNORM = 10'b00_0100_0000, + POSINF = 10'b00_1000_0000, + SNAN = 10'b01_0000_0000, + QNAN = 10'b10_0000_0000 + } classmask_e; + + // ------------------ + // FPU configuration + // ------------------ + // Pipelining registers can be inserted (at elaboration time) into operational units + typedef enum logic [1:0] { + BEFORE, // registers are inserted at the inputs of the unit + AFTER, // registers are inserted at the outputs of the unit + INSIDE, // registers are inserted at predetermined (suboptimal) locations in the unit + DISTRIBUTED // registers are evenly distributed, INSIDE >= AFTER >= BEFORE + } pipe_config_t; + + // Arithmetic units can be arranged in parallel (per format), merged (multi-format) or not at all. + typedef enum logic [1:0] { + DISABLED, // arithmetic units are not generated + PARALLEL, // arithmetic units are generated in prallel slices, one for each format + MERGED // arithmetic units are contained within a merged unit holding multiple formats + } unit_type_t; + + // Array of unit types indexed by format + typedef unit_type_t [0:NUM_FP_FORMATS-1] fmt_unit_types_t; + + // Array of format-specific unit types by opgroup + typedef fmt_unit_types_t [0:NUM_OPGROUPS-1] opgrp_fmt_unit_types_t; + // same with unsigned + typedef fmt_unsigned_t [0:NUM_OPGROUPS-1] opgrp_fmt_unsigned_t; + + // FPU configuration: features + typedef struct packed { + int unsigned Width; + logic EnableVectors; + logic EnableNanBox; + fmt_logic_t FpFmtMask; + ifmt_logic_t IntFmtMask; + } fpu_features_t; + + localparam fpu_features_t RV64D = '{ + Width: 64, + EnableVectors: 1'b0, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11000, + IntFmtMask: 4'b0011 + }; + + localparam fpu_features_t RV32D = '{ + Width: 64, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11000, + IntFmtMask: 4'b0010 + }; + + localparam fpu_features_t RV32F = '{ + Width: 32, + EnableVectors: 1'b0, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10000, + IntFmtMask: 4'b0010 + }; + + localparam fpu_features_t RV64D_Xsflt = '{ + Width: 64, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b11111, + IntFmtMask: 4'b1111 + }; + + localparam fpu_features_t RV32F_Xsflt = '{ + Width: 32, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10111, + IntFmtMask: 4'b1110 + }; + + localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{ + Width: 32, + EnableVectors: 1'b1, + EnableNanBox: 1'b1, + FpFmtMask: 5'b10001, + IntFmtMask: 4'b0110 + }; + + + // FPU configuraion: implementation + typedef struct packed { + opgrp_fmt_unsigned_t PipeRegs; + opgrp_fmt_unit_types_t UnitTypes; + pipe_config_t PipeConfig; + } fpu_implementation_t; + + localparam fpu_implementation_t DEFAULT_NOREGS = '{ + PipeRegs: '{default: 0}, + UnitTypes: '{'{default: PARALLEL}, // ADDMUL + '{default: MERGED}, // DIVSQRT + '{default: PARALLEL}, // NONCOMP + '{default: MERGED}}, // CONV + PipeConfig: BEFORE + }; + + localparam fpu_implementation_t DEFAULT_SNITCH = '{ + PipeRegs: '{default: 1}, + UnitTypes: '{'{default: PARALLEL}, // ADDMUL + '{default: DISABLED}, // DIVSQRT + '{default: PARALLEL}, // NONCOMP + '{default: MERGED}}, // CONV + PipeConfig: BEFORE + }; + + // ----------------------- + // Synthesis optimization + // ----------------------- + localparam logic DONT_CARE = 1'b1; // the value to assign as don't care + + // ------------------------- + // General helper functions + // ------------------------- + function automatic int minimum(int a, int b); + return (a < b) ? a : b; + endfunction + + function automatic int maximum(int a, int b); + return (a > b) ? a : b; + endfunction + + // ------------------------------------------- + // Helper functions for FP formats and values + // ------------------------------------------- + // Returns the width of a FP format + function automatic int unsigned fp_width(fp_format_e fmt); + return FP_ENCODINGS[fmt].exp_bits + FP_ENCODINGS[fmt].man_bits + 1; + endfunction + + // Returns the widest FP format present + function automatic int unsigned max_fp_width(fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i]) + res = unsigned'(maximum(res, fp_width(fp_format_e'(i)))); + return res; + endfunction + + // Returns the narrowest FP format present + function automatic int unsigned min_fp_width(fmt_logic_t cfg); + automatic int unsigned res = max_fp_width(cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i]) + res = unsigned'(minimum(res, fp_width(fp_format_e'(i)))); + return res; + endfunction + + // Returns the number of expoent bits for a format + function automatic int unsigned exp_bits(fp_format_e fmt); + return FP_ENCODINGS[fmt].exp_bits; + endfunction + + // Returns the number of mantissa bits for a format + function automatic int unsigned man_bits(fp_format_e fmt); + return FP_ENCODINGS[fmt].man_bits; + endfunction + + // Returns the bias value for a given format (as per IEEE 754-2008) + function automatic int unsigned bias(fp_format_e fmt); + return unsigned'(2**(FP_ENCODINGS[fmt].exp_bits-1)-1); // symmetrical bias + endfunction + + function automatic fp_encoding_t super_format(fmt_logic_t cfg); + automatic fp_encoding_t res; + res = '0; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + if (cfg[fmt]) begin // only active format + res.exp_bits = unsigned'(maximum(res.exp_bits, exp_bits(fp_format_e'(fmt)))); + res.man_bits = unsigned'(maximum(res.man_bits, man_bits(fp_format_e'(fmt)))); + end + return res; + endfunction + + // ------------------------------------------- + // Helper functions for INT formats and values + // ------------------------------------------- + // Returns the widest INT format present + function automatic int unsigned max_int_width(ifmt_logic_t cfg); + automatic int unsigned res = 0; + for (int ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin + if (cfg[ifmt]) res = maximum(res, int_width(int_format_e'(ifmt))); + end + return res; + endfunction + + // -------------------------------------------------- + // Helper functions for operations and FPU structure + // -------------------------------------------------- + // Returns the operation group of the given operation + function automatic opgroup_e get_opgroup(operation_e op); + unique case (op) + FMADD, FNMSUB, ADD, MUL: return ADDMUL; + DIV, SQRT: return DIVSQRT; + SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP; + F2F, F2I, I2F, CPKAB, CPKCD: return CONV; + default: return NONCOMP; + endcase + endfunction + + // Returns the number of operands by operation group + function automatic int unsigned num_operands(opgroup_e grp); + unique case (grp) + ADDMUL: return 3; + DIVSQRT: return 2; + NONCOMP: return 2; + CONV: return 3; // vectorial casts use 3 operands + default: return 0; + endcase + endfunction + + // Returns the number of lanes according to width, format and vectors + function automatic int unsigned num_lanes(int unsigned width, fp_format_e fmt, logic vec); + return vec ? width / fp_width(fmt) : 1; // if no vectors, only one lane + endfunction + + // Returns the maximum number of lanes in the FPU according to width, format config and vectors + function automatic int unsigned max_num_lanes(int unsigned width, fmt_logic_t cfg, logic vec); + return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane + endfunction + + // Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice + function automatic fmt_logic_t get_lane_formats(int unsigned width, + fmt_logic_t cfg, + int unsigned lane_no); + automatic fmt_logic_t res; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active formats with the number of lanes for that format + res[fmt] = cfg[fmt] & (width / fp_width(fp_format_e'(fmt)) > lane_no); + return res; + endfunction + + // Returns a mask of active INT formats that are present in lane lane_no of a multiformat slice + function automatic ifmt_logic_t get_lane_int_formats(int unsigned width, + fmt_logic_t cfg, + ifmt_logic_t icfg, + int unsigned lane_no); + automatic ifmt_logic_t res; + automatic fmt_logic_t lanefmts; + res = '0; + lanefmts = get_lane_formats(width, cfg, lane_no); + + for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active int formats with the width of the float formats + if ((fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt)))) + res[ifmt] |= icfg[ifmt] && lanefmts[fmt]; + return res; + endfunction + + // Returns a mask of active FP formats that are present in lane lane_no of a CONV slice + function automatic fmt_logic_t get_conv_lane_formats(int unsigned width, + fmt_logic_t cfg, + int unsigned lane_no); + automatic fmt_logic_t res; + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active formats with the number of lanes for that format, CPK at least twice + res[fmt] = cfg[fmt] && ((width / fp_width(fp_format_e'(fmt)) > lane_no) || + (CPK_FORMATS[fmt] && (lane_no < 2))); + return res; + endfunction + + // Returns a mask of active INT formats that are present in lane lane_no of a CONV slice + function automatic ifmt_logic_t get_conv_lane_int_formats(int unsigned width, + fmt_logic_t cfg, + ifmt_logic_t icfg, + int unsigned lane_no); + automatic ifmt_logic_t res; + automatic fmt_logic_t lanefmts; + res = '0; + lanefmts = get_conv_lane_formats(width, cfg, lane_no); + + for (int unsigned ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) + for (int unsigned fmt = 0; fmt < NUM_FP_FORMATS; fmt++) + // Mask active int formats with the width of the float formats + res[ifmt] |= icfg[ifmt] && lanefmts[fmt] && + (fp_width(fp_format_e'(fmt)) == int_width(int_format_e'(ifmt))); + return res; + endfunction + + // Return whether any active format is set as MERGED + function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i] && types[i] == MERGED) + return 1'b1; + return 1'b0; + endfunction + + // Return whether the given format is the first active one set as MERGED + function automatic logic is_first_enabled_multi(fp_format_e fmt, + fmt_unit_types_t types, + fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt); + end + return 1'b0; + endfunction + + // Returns the first format that is active and is set as MERGED + function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) + if (cfg[i] && types[i] == MERGED) + return fp_format_e'(i); + return fp_format_e'(0); + endfunction + + // Returns the largest number of regs that is active and is set as MERGED + function automatic int unsigned get_num_regs_multi(fmt_unsigned_t regs, + fmt_unit_types_t types, + fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i] && types[i] == MERGED) res = maximum(res, regs[i]); + end + return res; + endfunction + +endpackage diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv new file mode 100644 index 00000000..4e677209 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv @@ -0,0 +1,76 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_rounding #( + parameter int unsigned AbsWidth=2 // Width of the abolute value, without sign bit +) ( + // Input value + input logic [AbsWidth-1:0] abs_value_i, // absolute value without sign + input logic sign_i, + // Rounding information + input logic [1:0] round_sticky_bits_i, // round and sticky bits {RS} + input fpnew_pkg::roundmode_e rnd_mode_i, + input logic effective_subtraction_i, // sign of inputs affects rounding of zeroes + // Output value + output logic [AbsWidth-1:0] abs_rounded_o, // absolute value without sign + output logic sign_o, + // Output classification + output logic exact_zero_o // output is an exact zero +); + + logic round_up; // Rounding decision + + // Take the rounding decision according to RISC-V spec + // RoundMode | Mnemonic | Meaning + // :--------:|:--------:|:------- + // 000 | RNE | Round to Nearest, ties to Even + // 001 | RTZ | Round towards Zero + // 010 | RDN | Round Down (towards -\infty) + // 011 | RUP | Round Up (towards \infty) + // 100 | RMM | Round to Nearest, ties to Max Magnitude + // 101 | ROD | Round towards odd (this mode is not define in RISC-V FP-SPEC) + // others | | *invalid* + always_comb begin : rounding_decision + unique case (rnd_mode_i) + fpnew_pkg::RNE: // Decide accoring to round/sticky bits + unique case (round_sticky_bits_i) + 2'b00, + 2'b01: round_up = 1'b0; // < ulp/2 away, round down + 2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result + 2'b11: round_up = 1'b1; // > ulp/2 away, round up + default: round_up = fpnew_pkg::DONT_CARE; + endcase + fpnew_pkg::RTZ: round_up = 1'b0; // always round down + fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i : 1'b0; // to 0 if +, away if - + fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if + + fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up + fpnew_pkg::ROD: round_up = ~abs_value_i[0] & (| round_sticky_bits_i); + default: round_up = fpnew_pkg::DONT_CARE; // propagate x + endcase + end + + // Perform the rounding, exponent change and overflow to inf happens automagically + assign abs_rounded_o = abs_value_i + round_up; + + // True zero result is a zero result without dirty round/sticky bits + assign exact_zero_o = (abs_value_i == '0) && (round_sticky_bits_i == '0); + + // In case of effective subtraction (thus signs of addition operands must have differed) and a + // true zero result, the result sign is '-' in case of RDN and '+' for other modes. + assign sign_o = (exact_zero_o && effective_subtraction_i) + ? (rnd_mode_i == fpnew_pkg::RDN) + : sign_i; + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv new file mode 100644 index 00000000..f6116a5d --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpnew_top.sv @@ -0,0 +1,185 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Stefan Mach + +module fpnew_top #( + // FPU configuration + parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, + parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, + parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, + parameter int unsigned EnableSIMDMask = 0, + // Do not change + localparam int unsigned NumLanes = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors), + localparam type MaskType = logic [NumLanes-1:0], + localparam int unsigned WIDTH = Features.Width, + localparam int unsigned NUM_OPERANDS = 3 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input logic vectorial_op_i, + input TagType tag_i, + input MaskType simd_mask_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output TagType tag_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS; + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS; + + // ---------------- + // Type Definition + // ---------------- + typedef struct packed { + logic [WIDTH-1:0] result; + fpnew_pkg::status_t status; + TagType tag; + } output_t; + + // Handshake signals for the blocks + logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy; + output_t [NUM_OPGROUPS-1:0] opgrp_outputs; + + logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed; + + // ----------- + // Input Side + // ----------- + assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)]; + + // NaN-boxing check + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + // NaN boxing is only generated if it's enabled and needed + if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check + for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands + assign is_boxed[fmt][op] = (!vectorial_op_i) + ? operands_i[op][WIDTH-1:FP_WIDTH] == '1 + : 1'b1; + end + end else begin : no_check + assign is_boxed[fmt] = '1; + end + end + + // Filter out the mask if not used + MaskType simd_mask; + assign simd_mask = simd_mask_i | ~{NumLanes{logic'(EnableSIMDMask)}}; + + // ------------------------- + // Generate Operation Blocks + // ------------------------- + for (genvar opgrp = 0; opgrp < int'(NUM_OPGROUPS); opgrp++) begin : gen_operation_groups + localparam int unsigned NUM_OPS = fpnew_pkg::num_operands(fpnew_pkg::opgroup_e'(opgrp)); + + logic in_valid; + logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed; + + assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp)); + + // slice out input boxing + always_comb begin : slice_inputs + for (int unsigned fmt = 0; fmt < NUM_FORMATS; fmt++) + input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0]; + end + + fpnew_opgroup_block #( + .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), + .Width ( WIDTH ), + .EnableVectors ( Features.EnableVectors ), + .FpFmtMask ( Features.FpFmtMask ), + .IntFmtMask ( Features.IntFmtMask ), + .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), + .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), + .PipeConfig ( Implementation.PipeConfig ), + .TagType ( TagType ), + .TrueSIMDClass ( TrueSIMDClass ) + ) i_opgroup_block ( + .clk_i, + .rst_ni, + .operands_i ( operands_i[NUM_OPS-1:0] ), + .is_boxed_i ( input_boxed ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .dst_fmt_i, + .int_fmt_i, + .vectorial_op_i, + .tag_i, + .simd_mask_i ( simd_mask ), + .in_valid_i ( in_valid ), + .in_ready_o ( opgrp_in_ready[opgrp] ), + .flush_i, + .result_o ( opgrp_outputs[opgrp].result ), + .status_o ( opgrp_outputs[opgrp].status ), + .extension_bit_o ( opgrp_ext[opgrp] ), + .tag_o ( opgrp_outputs[opgrp].tag ), + .out_valid_o ( opgrp_out_valid[opgrp] ), + .out_ready_i ( opgrp_out_ready[opgrp] ), + .busy_o ( opgrp_busy[opgrp] ) + ); + end + + // ------------------ + // Arbitrate Outputs + // ------------------ + output_t arbiter_output; + + // Round-Robin arbiter to decide which result to use + rr_arb_tree #( + .NumIn ( NUM_OPGROUPS ), + .DataType ( output_t ), + .AxiVldRdy ( 1'b1 ) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ( '0 ), + .req_i ( opgrp_out_valid ), + .gnt_o ( opgrp_out_ready ), + .data_i ( opgrp_outputs ), + .gnt_i ( out_ready_i ), + .req_o ( out_valid_o ), + .data_o ( arbiter_output ), + .idx_o ( /* unused */ ) + ); + + // Unpack output + assign result_o = arbiter_output.result; + assign status_o = arbiter_output.status; + assign tag_o = arbiter_output.tag; + + assign busy_o = (| opgrp_busy); + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv new file mode 100644 index 00000000..bda9c01f --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv @@ -0,0 +1,3413 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 04/03/2018 // +// Design Name: FPU // +// Module Name: control_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: the control logic of div and sqrt // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// Revision Date: 13/04/2018 // +// Lei Li // +// To fix some bug found in Control FSM // +// when Iteration_unit_num_S = 2'b10 // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module control_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI , + input logic Sqrt_start_SI, + input logic Start_SI, + input logic Kill_SI, + input logic Special_case_SBI, + input logic Special_case_dly_SBI, + input logic [C_PC-1:0] Precision_ctl_SI, + input logic [1:0] Format_sel_SI, + input logic [C_MANT_FP64:0] Numerator_DI, + input logic [C_EXP_FP64:0] Exp_num_DI, + input logic [C_MANT_FP64:0] Denominator_DI, + input logic [C_EXP_FP64:0] Exp_den_DI, + + + output logic Div_start_dly_SO , + output logic Sqrt_start_dly_SO, + output logic Div_enable_SO, + output logic Sqrt_enable_SO, + + + //To next stage + output logic Full_precision_SO, + output logic FP32_SO, + output logic FP64_SO, + output logic FP16_SO, + output logic FP16ALT_SO, + + output logic Ready_SO, + output logic Done_SO, + + output logic [C_MANT_FP64+4:0] Mant_result_prenorm_DO, + // output logic [3:0] Round_bit_DO, + output logic [C_EXP_FP64+1:0] Exp_result_prenorm_DO + ); + + logic [C_MANT_FP64+1+4:0] Partial_remainder_DN,Partial_remainder_DP; //58bits,r=q+2 + logic [C_MANT_FP64+4:0] Quotient_DP; //57bits + ///////////////////////////////////////////////////////////////////////////// + // Assign Inputs // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64+1:0] Numerator_se_D; //sign extension and hidden bit + logic [C_MANT_FP64+1:0] Denominator_se_D; //signa extension and hidden bit + logic [C_MANT_FP64+1:0] Denominator_se_DB; //1's complement + + assign Numerator_se_D={1'b0,Numerator_DI}; + + assign Denominator_se_D={1'b0,Denominator_DI}; + + always_comb + begin + if(FP32_SO) + begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} }; + end + else if(FP64_SO) begin + Denominator_se_DB=~Denominator_se_D; + end + else if(FP16_SO) begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} }; + end + else begin + Denominator_se_DB={~Denominator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; + end + end + + + logic [C_MANT_FP64+1:0] Mant_D_sqrt_Norm; + + assign Mant_D_sqrt_Norm=Exp_num_DI[0]?{1'b0,Numerator_DI}:{Numerator_DI,1'b0}; //for sqrt + + ///////////////////////////////////////////////////////////////////////////// + // Format Selection // + ///////////////////////////////////////////////////////////////////////////// + logic [1:0] Format_sel_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Format_sel_S<='b0; + end + else if(Start_SI&&Ready_SO) + begin + Format_sel_S<=Format_sel_SI; + end + else + begin + Format_sel_S<=Format_sel_S; + end + end + + assign FP32_SO = (Format_sel_S==2'b00); + assign FP64_SO = (Format_sel_S==2'b01); + assign FP16_SO = (Format_sel_S==2'b10); + assign FP16ALT_SO = (Format_sel_S==2'b11); + + + + ///////////////////////////////////////////////////////////////////////////// + // Precision Control // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_PC-1:0] Precision_ctl_S; + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Precision_ctl_S<='b0; + end + else if(Start_SI&&Ready_SO) + begin + Precision_ctl_S<=Precision_ctl_SI; + end + else + begin + Precision_ctl_S<=Precision_ctl_S; + end + end + assign Full_precision_SO = (Precision_ctl_S==6'h00); + + + + logic [5:0] State_ctl_S; + logic [5:0] State_Two_iteration_unit_S; + logic [5:0] State_Four_iteration_unit_S; + + assign State_Two_iteration_unit_S = Precision_ctl_S[C_PC-1:1]; //Two iteration units + assign State_Four_iteration_unit_S = Precision_ctl_S[C_PC-1:2]; //Four iteration units + always_comb + begin + case(Iteration_unit_num_S) +//////////////////////one iteration unit, start/////////////////////////////////////// + 2'b00: //one iteration unit + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h1b; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h38; //53+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0e; //11+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0b; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = Precision_ctl_S; + end + end + endcase + end +//////////////////////one iteration unit, end/////////////////////////////////////// + +//////////////////////two iteration units, start/////////////////////////////////////// + 2'b01: //two iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0d; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h1b; //53+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h06; //11+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h05; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Two_iteration_unit_S; + end + end + endcase + end +//////////////////////two iteration units, end/////////////////////////////////////// + +//////////////////////three iteration units, start/////////////////////////////////////// + 2'b10: //three iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h08; //24+3 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + 6'h0c,6'h0d,6'h0e: + begin + State_ctl_S = 6'h04; + end + 6'h0f,6'h10,6'h11: + begin + State_ctl_S = 6'h05; + end + 6'h12,6'h13,6'h14: + begin + State_ctl_S = 6'h06; + end + 6'h15,6'h16,6'h17: + begin + State_ctl_S = 6'h07; + end + default: + begin + State_ctl_S = 6'h08; //24+3 more iterations for rounding bits + end + endcase + end + 2'b01: //FP64 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h12; //53+4 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + 6'h0c,6'h0d,6'h0e: + begin + State_ctl_S = 6'h04; + end + 6'h0f,6'h10,6'h11: + begin + State_ctl_S = 6'h05; + end + 6'h12,6'h13,6'h14: + begin + State_ctl_S = 6'h06; + end + 6'h15,6'h16,6'h17: + begin + State_ctl_S = 6'h07; + end + 6'h18,6'h19,6'h1a: + begin + State_ctl_S = 6'h08; + end + 6'h1b,6'h1c,6'h1d: + begin + State_ctl_S = 6'h09; + end + 6'h1e,6'h1f,6'h20: + begin + State_ctl_S = 6'h0a; + end + 6'h21,6'h22,6'h23: + begin + State_ctl_S = 6'h0b; + end + 6'h24,6'h25,6'h26: + begin + State_ctl_S = 6'h0c; + end + 6'h27,6'h28,6'h29: + begin + State_ctl_S = 6'h0d; + end + 6'h2a,6'h2b,6'h2c: + begin + State_ctl_S = 6'h0e; + end + 6'h2d,6'h2e,6'h2f: + begin + State_ctl_S = 6'h0f; + end + 6'h30,6'h31,6'h32: + begin + State_ctl_S = 6'h10; + end + 6'h33,6'h34,6'h35: + begin + State_ctl_S = 6'h11; + end + default: + begin + State_ctl_S = 6'h12; //53+4 more iterations for rounding bits + end + endcase + end + 2'b10: //FP16 + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h04; //12+3 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + 6'h09,6'h0a,6'h0b: + begin + State_ctl_S = 6'h03; + end + default: + begin + State_ctl_S = 6'h04; //12+3 more iterations for rounding bits + end + endcase + end + 2'b11: //FP16ALT + begin + case(Precision_ctl_S) + 6'h00: + begin + State_ctl_S = 6'h03; //8+4 more iterations for rounding bits + end + 6'h06,6'h07,6'h08: + begin + State_ctl_S = 6'h02; + end + default: + begin + State_ctl_S = 6'h03; //8+4 more iterations for rounding bits + end + endcase + end + endcase + end +//////////////////////three iteration units, end/////////////////////////////////////// + +//////////////////////four iteration units, start/////////////////////////////////////// + 2'b11: //four iteration units + begin + case(Format_sel_S) + 2'b00: //FP32 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h06; //24+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b01: //FP64 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h0d; //53+3 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b10: //FP16 + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h03; //11+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + 2'b11: //FP16ALT + begin + if(Full_precision_SO) + begin + State_ctl_S = 6'h02; //8+4 more iterations for rounding bits + end + else + begin + State_ctl_S = State_Four_iteration_unit_S; + end + end + endcase + end +//////////////////////four iteration units, end/////////////////////////////////////// + + endcase + end + + + ///////////////////////////////////////////////////////////////////////////// + // control logic // + ///////////////////////////////////////////////////////////////////////////// + + logic Div_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // generate Div_start_dly_S signal + begin + if(~Rst_RBI) + begin + Div_start_dly_S<=1'b0; + end + else if(Div_start_SI&&Ready_SO) + begin + Div_start_dly_S<=1'b1; + end + else + begin + Div_start_dly_S<=1'b0; + end + end + + assign Div_start_dly_SO=Div_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin // generate Div_enable_SO signal + if(~Rst_RBI) + Div_enable_SO<=1'b0; + // Synchronous reset with Flush + else if (Kill_SI) + Div_enable_SO <= 1'b0; + else if(Div_start_SI&&Ready_SO) + Div_enable_SO<=1'b1; + else if(Done_SO) + Div_enable_SO<=1'b0; + else + Div_enable_SO<=Div_enable_SO; + end + + logic Sqrt_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // generate Sqrt_start_dly_SI signal + begin + if(~Rst_RBI) + begin + Sqrt_start_dly_S<=1'b0; + end + else if(Sqrt_start_SI&&Ready_SO) + begin + Sqrt_start_dly_S<=1'b1; + end + else + begin + Sqrt_start_dly_S<=1'b0; + end + end + assign Sqrt_start_dly_SO=Sqrt_start_dly_S; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin // generate Sqrt_enable_SO signal + if(~Rst_RBI) + Sqrt_enable_SO<=1'b0; + else if (Kill_SI) + Sqrt_enable_SO <= 1'b0; + else if(Sqrt_start_SI&&Ready_SO) + Sqrt_enable_SO<=1'b1; + else if(Done_SO) + Sqrt_enable_SO<=1'b0; + else + Sqrt_enable_SO<=Sqrt_enable_SO; + end + + logic [5:0] Crtl_cnt_S; + logic Start_dly_S; + + assign Start_dly_S=Div_start_dly_S |Sqrt_start_dly_S; + + logic Fsm_enable_S; + assign Fsm_enable_S=( (Start_dly_S | (| Crtl_cnt_S)) && (~Kill_SI) && Special_case_dly_SBI); + + logic Final_state_S; + assign Final_state_S= (Crtl_cnt_S==State_ctl_S); + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //control_FSM + begin + if (~Rst_RBI) + begin + Crtl_cnt_S <= '0; + end + else if (Final_state_S | Kill_SI) + begin + Crtl_cnt_S <= '0; + end + else if(Fsm_enable_S) // one cycle Start_SI + begin + Crtl_cnt_S <= Crtl_cnt_S+1; + end + else + begin + Crtl_cnt_S <= '0; + end + end // always_ff + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate Done_SO, they can share this Done_SO. + begin + if(~Rst_RBI) + begin + Done_SO<=1'b0; + end + else if(Start_SI&&Ready_SO) + begin + if(~Special_case_SBI) + begin + Done_SO<=1'b1; + end + else + begin + Done_SO<=1'b0; + end + end + else if(Final_state_S) + begin + Done_SO<=1'b1; + end + else + begin + Done_SO<=1'b0; + end + end + + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) //Generate Ready_SO + begin + if(~Rst_RBI) + begin + Ready_SO<=1'b1; + end + + else if(Start_SI&&Ready_SO) + begin + if(~Special_case_SBI) + begin + Ready_SO<=1'b1; + end + else + begin + Ready_SO<=1'b0; + end + end + else if(Final_state_S | Kill_SI) + begin + Ready_SO<=1'b1; + end + else + begin + Ready_SO<=Ready_SO; + end + end + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b00, start // + //////////////////////////////////////////////////////////////////////////// + + logic Qcnt_one_0; + logic Qcnt_one_1; + logic [1:0] Qcnt_one_2; + logic [2:0] Qcnt_one_3; + logic [3:0] Qcnt_one_4; + logic [4:0] Qcnt_one_5; + logic [5:0] Qcnt_one_6; + logic [6:0] Qcnt_one_7; + logic [7:0] Qcnt_one_8; + logic [8:0] Qcnt_one_9; + logic [9:0] Qcnt_one_10; + logic [10:0] Qcnt_one_11; + logic [11:0] Qcnt_one_12; + logic [12:0] Qcnt_one_13; + logic [13:0] Qcnt_one_14; + logic [14:0] Qcnt_one_15; + logic [15:0] Qcnt_one_16; + logic [16:0] Qcnt_one_17; + logic [17:0] Qcnt_one_18; + logic [18:0] Qcnt_one_19; + logic [19:0] Qcnt_one_20; + logic [20:0] Qcnt_one_21; + logic [21:0] Qcnt_one_22; + logic [22:0] Qcnt_one_23; + logic [23:0] Qcnt_one_24; + logic [24:0] Qcnt_one_25; + logic [25:0] Qcnt_one_26; + logic [26:0] Qcnt_one_27; + logic [27:0] Qcnt_one_28; + logic [28:0] Qcnt_one_29; + logic [29:0] Qcnt_one_30; + logic [30:0] Qcnt_one_31; + logic [31:0] Qcnt_one_32; + logic [32:0] Qcnt_one_33; + logic [33:0] Qcnt_one_34; + logic [34:0] Qcnt_one_35; + logic [35:0] Qcnt_one_36; + logic [36:0] Qcnt_one_37; + logic [37:0] Qcnt_one_38; + logic [38:0] Qcnt_one_39; + logic [39:0] Qcnt_one_40; + logic [40:0] Qcnt_one_41; + logic [41:0] Qcnt_one_42; + logic [42:0] Qcnt_one_43; + logic [43:0] Qcnt_one_44; + logic [44:0] Qcnt_one_45; + logic [45:0] Qcnt_one_46; + logic [46:0] Qcnt_one_47; + logic [47:0] Qcnt_one_48; + logic [48:0] Qcnt_one_49; + logic [49:0] Qcnt_one_50; + logic [50:0] Qcnt_one_51; + logic [51:0] Qcnt_one_52; + logic [52:0] Qcnt_one_53; + logic [53:0] Qcnt_one_54; + logic [54:0] Qcnt_one_55; + logic [55:0] Qcnt_one_56; + logic [56:0] Qcnt_one_57; + logic [57:0] Qcnt_one_58; + logic [58:0] Qcnt_one_59; + logic [59:0] Qcnt_one_60; + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b00, end // + //////////////////////////////////////////////////////////////////////////// + + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b01, start // + //////////////////////////////////////////////////////////////////////////// + logic [1:0] Qcnt_two_0; + logic [2:0] Qcnt_two_1; + logic [4:0] Qcnt_two_2; + logic [6:0] Qcnt_two_3; + logic [8:0] Qcnt_two_4; + logic [10:0] Qcnt_two_5; + logic [12:0] Qcnt_two_6; + logic [14:0] Qcnt_two_7; + logic [16:0] Qcnt_two_8; + logic [18:0] Qcnt_two_9; + logic [20:0] Qcnt_two_10; + logic [22:0] Qcnt_two_11; + logic [24:0] Qcnt_two_12; + logic [26:0] Qcnt_two_13; + logic [28:0] Qcnt_two_14; + logic [30:0] Qcnt_two_15; + logic [32:0] Qcnt_two_16; + logic [34:0] Qcnt_two_17; + logic [36:0] Qcnt_two_18; + logic [38:0] Qcnt_two_19; + logic [40:0] Qcnt_two_20; + logic [42:0] Qcnt_two_21; + logic [44:0] Qcnt_two_22; + logic [46:0] Qcnt_two_23; + logic [48:0] Qcnt_two_24; + logic [50:0] Qcnt_two_25; + logic [52:0] Qcnt_two_26; + logic [54:0] Qcnt_two_27; + logic [56:0] Qcnt_two_28; + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b01, end // + //////////////////////////////////////////////////////////////////////////// + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b10, start // + //////////////////////////////////////////////////////////////////////////// + logic [2:0] Qcnt_three_0; + logic [4:0] Qcnt_three_1; + logic [7:0] Qcnt_three_2; + logic [10:0] Qcnt_three_3; + logic [13:0] Qcnt_three_4; + logic [16:0] Qcnt_three_5; + logic [19:0] Qcnt_three_6; + logic [22:0] Qcnt_three_7; + logic [25:0] Qcnt_three_8; + logic [28:0] Qcnt_three_9; + logic [31:0] Qcnt_three_10; + logic [34:0] Qcnt_three_11; + logic [37:0] Qcnt_three_12; + logic [40:0] Qcnt_three_13; + logic [43:0] Qcnt_three_14; + logic [46:0] Qcnt_three_15; + logic [49:0] Qcnt_three_16; + logic [52:0] Qcnt_three_17; + logic [55:0] Qcnt_three_18; + logic [58:0] Qcnt_three_19; + logic [61:0] Qcnt_three_20; + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b10, end // + //////////////////////////////////////////////////////////////////////////// + + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b11, start // + //////////////////////////////////////////////////////////////////////////// + logic [3:0] Qcnt_four_0; + logic [6:0] Qcnt_four_1; + logic [10:0] Qcnt_four_2; + logic [14:0] Qcnt_four_3; + logic [18:0] Qcnt_four_4; + logic [22:0] Qcnt_four_5; + logic [26:0] Qcnt_four_6; + logic [30:0] Qcnt_four_7; + logic [34:0] Qcnt_four_8; + logic [38:0] Qcnt_four_9; + logic [42:0] Qcnt_four_10; + logic [46:0] Qcnt_four_11; + logic [50:0] Qcnt_four_12; + logic [54:0] Qcnt_four_13; + logic [58:0] Qcnt_four_14; + + ///////////////////////////////////////////////////////////////////////////// + // Declarations for square root when Iteration_unit_num_S = 2'b11, end // + //////////////////////////////////////////////////////////////////////////// + + + + logic [C_MANT_FP64+1+4:0] Sqrt_R0,Sqrt_Q0,Q_sqrt0,Q_sqrt_com_0; + logic [C_MANT_FP64+1+4:0] Sqrt_R1,Sqrt_Q1,Q_sqrt1,Q_sqrt_com_1; + logic [C_MANT_FP64+1+4:0] Sqrt_R2,Sqrt_Q2,Q_sqrt2,Q_sqrt_com_2; + logic [C_MANT_FP64+1+4:0] Sqrt_R3,Sqrt_Q3,Q_sqrt3,Q_sqrt_com_3,Sqrt_R4; //Sqrt_Q4; + + + logic [1:0] Sqrt_DI [3:0]; + logic [1:0] Sqrt_DO [3:0]; + logic Sqrt_carry_DO; + + + logic [C_MANT_FP64+1+4:0] Iteration_cell_a_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_b_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_a_BMASK_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_b_BMASK_D [3:0]; + logic Iteration_cell_carry_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_sum_D [3:0]; + logic [C_MANT_FP64+1+4:0] Iteration_cell_sum_AMASK_D [3:0]; + + + logic [3:0] Sqrt_quotinent_S; + + + always_comb + begin // + case (Format_sel_S) + 2'b00: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP32+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP32+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt0[C_MANT_FP32+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt1[C_MANT_FP32+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt2[C_MANT_FP32+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP32){1'b0}},~Q_sqrt3[C_MANT_FP32+5:0] }; + end + 2'b01: + begin + Sqrt_quotinent_S = {Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]}; + Q_sqrt_com_0=~Q_sqrt0; + Q_sqrt_com_1=~Q_sqrt1; + Q_sqrt_com_2=~Q_sqrt2; + Q_sqrt_com_3=~Q_sqrt3; + end + 2'b10: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt0[C_MANT_FP16+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt1[C_MANT_FP16+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt2[C_MANT_FP16+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16){1'b0}},~Q_sqrt3[C_MANT_FP16+5:0] }; + end + 2'b11: + begin + Sqrt_quotinent_S = {(~Iteration_cell_sum_AMASK_D[0][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[1][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[2][C_MANT_FP16ALT+5]),(~Iteration_cell_sum_AMASK_D[3][C_MANT_FP16ALT+5])}; + Q_sqrt_com_0 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt0[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_1 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt1[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_2 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt2[C_MANT_FP16ALT+5:0] }; + Q_sqrt_com_3 ={ {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}},~Q_sqrt3[C_MANT_FP16ALT+5:0] }; + end + endcase + end + + + + assign Qcnt_one_0= {1'b0}; //qk for each feedback + assign Qcnt_one_1= {Quotient_DP[0]}; + assign Qcnt_one_2= {Quotient_DP[1:0]}; + assign Qcnt_one_3= {Quotient_DP[2:0]}; + assign Qcnt_one_4= {Quotient_DP[3:0]}; + assign Qcnt_one_5= {Quotient_DP[4:0]}; + assign Qcnt_one_6= {Quotient_DP[5:0]}; + assign Qcnt_one_7= {Quotient_DP[6:0]}; + assign Qcnt_one_8= {Quotient_DP[7:0]}; + assign Qcnt_one_9= {Quotient_DP[8:0]}; + assign Qcnt_one_10= {Quotient_DP[9:0]}; + assign Qcnt_one_11= {Quotient_DP[10:0]}; + assign Qcnt_one_12= {Quotient_DP[11:0]}; + assign Qcnt_one_13= {Quotient_DP[12:0]}; + assign Qcnt_one_14= {Quotient_DP[13:0]}; + assign Qcnt_one_15= {Quotient_DP[14:0]}; + assign Qcnt_one_16= {Quotient_DP[15:0]}; + assign Qcnt_one_17= {Quotient_DP[16:0]}; + assign Qcnt_one_18= {Quotient_DP[17:0]}; + assign Qcnt_one_19= {Quotient_DP[18:0]}; + assign Qcnt_one_20= {Quotient_DP[19:0]}; + assign Qcnt_one_21= {Quotient_DP[20:0]}; + assign Qcnt_one_22= {Quotient_DP[21:0]}; + assign Qcnt_one_23= {Quotient_DP[22:0]}; + assign Qcnt_one_24= {Quotient_DP[23:0]}; + assign Qcnt_one_25= {Quotient_DP[24:0]}; + assign Qcnt_one_26= {Quotient_DP[25:0]}; + assign Qcnt_one_27= {Quotient_DP[26:0]}; + assign Qcnt_one_28= {Quotient_DP[27:0]}; + assign Qcnt_one_29= {Quotient_DP[28:0]}; + assign Qcnt_one_30= {Quotient_DP[29:0]}; + assign Qcnt_one_31= {Quotient_DP[30:0]}; + assign Qcnt_one_32= {Quotient_DP[31:0]}; + assign Qcnt_one_33= {Quotient_DP[32:0]}; + assign Qcnt_one_34= {Quotient_DP[33:0]}; + assign Qcnt_one_35= {Quotient_DP[34:0]}; + assign Qcnt_one_36= {Quotient_DP[35:0]}; + assign Qcnt_one_37= {Quotient_DP[36:0]}; + assign Qcnt_one_38= {Quotient_DP[37:0]}; + assign Qcnt_one_39= {Quotient_DP[38:0]}; + assign Qcnt_one_40= {Quotient_DP[39:0]}; + assign Qcnt_one_41= {Quotient_DP[40:0]}; + assign Qcnt_one_42= {Quotient_DP[41:0]}; + assign Qcnt_one_43= {Quotient_DP[42:0]}; + assign Qcnt_one_44= {Quotient_DP[43:0]}; + assign Qcnt_one_45= {Quotient_DP[44:0]}; + assign Qcnt_one_46= {Quotient_DP[45:0]}; + assign Qcnt_one_47= {Quotient_DP[46:0]}; + assign Qcnt_one_48= {Quotient_DP[47:0]}; + assign Qcnt_one_49= {Quotient_DP[48:0]}; + assign Qcnt_one_50= {Quotient_DP[49:0]}; + assign Qcnt_one_51= {Quotient_DP[50:0]}; + assign Qcnt_one_52= {Quotient_DP[51:0]}; + assign Qcnt_one_53= {Quotient_DP[52:0]}; + assign Qcnt_one_54= {Quotient_DP[53:0]}; + assign Qcnt_one_55= {Quotient_DP[54:0]}; + assign Qcnt_one_56= {Quotient_DP[55:0]}; + assign Qcnt_one_57= {Quotient_DP[56:0]}; + + + assign Qcnt_two_0 = {1'b0, Sqrt_quotinent_S[3]}; //qk for each feedback + assign Qcnt_two_1 = {Quotient_DP[1:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_2 = {Quotient_DP[3:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_3 = {Quotient_DP[5:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_4 = {Quotient_DP[7:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_5 = {Quotient_DP[9:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_6 = {Quotient_DP[11:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_7 = {Quotient_DP[13:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_8 = {Quotient_DP[15:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_9 = {Quotient_DP[17:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_10 = {Quotient_DP[19:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_11 = {Quotient_DP[21:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_12 = {Quotient_DP[23:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_13 = {Quotient_DP[25:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_14 = {Quotient_DP[27:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_15 = {Quotient_DP[29:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_16 = {Quotient_DP[31:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_17 = {Quotient_DP[33:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_18 = {Quotient_DP[35:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_19 = {Quotient_DP[37:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_20 = {Quotient_DP[39:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_21 = {Quotient_DP[41:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_22 = {Quotient_DP[43:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_23 = {Quotient_DP[45:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_24 = {Quotient_DP[47:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_25 = {Quotient_DP[49:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_26 = {Quotient_DP[51:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_27 = {Quotient_DP[53:0],Sqrt_quotinent_S[3]}; + assign Qcnt_two_28 = {Quotient_DP[55:0],Sqrt_quotinent_S[3]}; + + + assign Qcnt_three_0 = {1'b0, Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; //qk for each feedback + assign Qcnt_three_1 = {Quotient_DP[2:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_2 = {Quotient_DP[5:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_3 = {Quotient_DP[8:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_4 = {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_5 = {Quotient_DP[14:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_6 = {Quotient_DP[17:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_7 = {Quotient_DP[20:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_8 = {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_9 = {Quotient_DP[26:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_10 = {Quotient_DP[29:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_11 = {Quotient_DP[32:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_12 = {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_13 = {Quotient_DP[38:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_14 = {Quotient_DP[41:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_15 = {Quotient_DP[44:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_16 = {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_17 = {Quotient_DP[50:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_18 = {Quotient_DP[53:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + assign Qcnt_three_19 = {Quotient_DP[56:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2]}; + + + assign Qcnt_four_0 = {1'b0, Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_1 = {Quotient_DP[3:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_2 = {Quotient_DP[7:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_3 = {Quotient_DP[11:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_4 = {Quotient_DP[15:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_5 = {Quotient_DP[19:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_6 = {Quotient_DP[23:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_7 = {Quotient_DP[27:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_8 = {Quotient_DP[31:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_9 = {Quotient_DP[35:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_10 = {Quotient_DP[39:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_11 = {Quotient_DP[43:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_12 = {Quotient_DP[47:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_13 = {Quotient_DP[51:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + assign Qcnt_four_14 = {Quotient_DP[55:0],Sqrt_quotinent_S[3],Sqrt_quotinent_S[2],Sqrt_quotinent_S[1]}; + + + + + always_comb begin // the intermediate operands for sqrt + + case(Iteration_unit_num_S) + 2'b00: + begin + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b00, start // + ///////////////////////////////////////////////////////////////////////////// + + + + + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_0}; + Sqrt_Q0=Q_sqrt_com_0; + end + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_one_1}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_one_2}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt0={{(C_MANT_FP64+3){1'b0}},Qcnt_one_3}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_one_4}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_one_5}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_one_6}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_one_7}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_one_8}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_one_9}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_one_10}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt0={{(C_MANT_FP64-5){1'b0}},Qcnt_one_11}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_one_12}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_one_13}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_one_14}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b001111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt0={{(C_MANT_FP64-9){1'b0}},Qcnt_one_15}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_one_16}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_one_17}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_one_18}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_one_19}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_one_20}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_one_21}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_one_22}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b010111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt0={{(C_MANT_FP64-17){1'b0}},Qcnt_one_23}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_one_24}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_one_25}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_one_26}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-21){1'b0}},Qcnt_one_27}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_one_28}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_one_29}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_one_30}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b011111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_one_31}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_one_32}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_one_33}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_one_34}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-29){1'b0}},Qcnt_one_35}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_one_36}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_one_37}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_one_38}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b100111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-33){1'b0}},Qcnt_one_39}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_one_40}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_one_41}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_one_42}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_one_43}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_one_44}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_one_45}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_one_46}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b101111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-41){1'b0}},Qcnt_one_47}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_one_48}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_one_49}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_one_50}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-45){1'b0}},Qcnt_one_51}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_one_52}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_one_53}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_one_54}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b110111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_one_55}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + 6'b111000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_one_56}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + end + + default: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0='0; + Sqrt_Q0='0; + end + endcase + end + + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b00, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b01: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b01, start // + ///////////////////////////////////////////////////////////////////////////// + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt0={{(C_MANT_FP64+4){1'b0}},Qcnt_two_1[2:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt1={{(C_MANT_FP64+3){1'b0}},Qcnt_two_1[2:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_two_2[4:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_two_2[4:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64){1'b0}},Qcnt_two_3[6:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt1={{(C_MANT_FP64-1){1'b0}},Qcnt_two_3[6:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-2){1'b0}},Qcnt_two_4[8:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt1={{(C_MANT_FP64-3){1'b0}},Qcnt_two_4[8:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_two_5[10:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_two_5[10:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-6){1'b0}},Qcnt_two_6[12:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-7){1'b0}},Qcnt_two_6[12:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt0={{(C_MANT_FP64-8){1'b0}},Qcnt_two_7[14:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt1={{(C_MANT_FP64-9){1'b0}},Qcnt_two_7[14:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_two_8[16:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_two_8[16:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-12){1'b0}},Qcnt_two_9[18:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt1={{(C_MANT_FP64-13){1'b0}},Qcnt_two_9[18:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-14){1'b0}},Qcnt_two_10[20:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt1={{(C_MANT_FP64-15){1'b0}},Qcnt_two_10[20:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_two_11[22:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_two_11[22:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-18){1'b0}},Qcnt_two_12[24:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-19){1'b0}},Qcnt_two_12[24:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt0={{(C_MANT_FP64-20){1'b0}},Qcnt_two_13[26:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-21){1'b0}},Qcnt_two_13[26:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_two_14[28:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_two_14[28:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b001111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-24){1'b0}},Qcnt_two_15[30:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-25){1'b0}},Qcnt_two_15[30:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-26){1'b0}},Qcnt_two_16[32:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-27){1'b0}},Qcnt_two_16[32:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_two_17[34:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_two_17[34:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-30){1'b0}},Qcnt_two_18[36:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-31){1'b0}},Qcnt_two_18[36:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-32){1'b0}},Qcnt_two_19[38:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-33){1'b0}},Qcnt_two_19[38:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_two_20[40:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_two_20[40:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-36){1'b0}},Qcnt_two_21[42:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-37){1'b0}},Qcnt_two_21[42:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-38){1'b0}},Qcnt_two_22[44:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-39){1'b0}},Qcnt_two_22[44:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b010111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_two_23[46:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_two_23[46:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-42){1'b0}},Qcnt_two_24[48:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-43){1'b0}},Qcnt_two_24[48:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-44){1'b0}},Qcnt_two_25[50:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-45){1'b0}},Qcnt_two_25[50:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_two_26[52:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_two_26[52:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-48){1'b0}},Qcnt_two_27[54:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-49){1'b0}},Qcnt_two_27[54:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + 6'b011100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-50){1'b0}},Qcnt_two_28[56:1]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-51){1'b0}},Qcnt_two_28[56:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + default: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_two_0[1]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_two_0[1:0]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + end + + endcase + end + + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b01, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b10: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b10, start // + ///////////////////////////////////////////////////////////////////////////// + + case(Crtl_cnt_S) + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt0={{(C_MANT_FP64+2){1'b0}},Qcnt_three_1[4:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt1={{(C_MANT_FP64+1){1'b0}},Qcnt_three_1[4:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt2={{(C_MANT_FP64){1'b0}},Qcnt_three_1[4:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt0={{(C_MANT_FP64-1){1'b0}},Qcnt_three_2[7:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt1={{(C_MANT_FP64-2){1'b0}},Qcnt_three_2[7:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt2={{(C_MANT_FP64-3){1'b0}},Qcnt_three_2[7:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt0={{(C_MANT_FP64-4){1'b0}},Qcnt_three_3[10:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt1={{(C_MANT_FP64-5){1'b0}},Qcnt_three_3[10:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt2={{(C_MANT_FP64-6){1'b0}},Qcnt_three_3[10:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_three_4[13:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_three_4[13:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_three_4[13:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt0={{(C_MANT_FP64-10){1'b0}},Qcnt_three_5[16:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt1={{(C_MANT_FP64-11){1'b0}},Qcnt_three_5[16:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt2={{(C_MANT_FP64-12){1'b0}},Qcnt_three_5[16:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt0={{(C_MANT_FP64-13){1'b0}},Qcnt_three_6[19:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt1={{(C_MANT_FP64-14){1'b0}},Qcnt_three_6[19:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt2={{(C_MANT_FP64-15){1'b0}},Qcnt_three_6[19:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b000111: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt0={{(C_MANT_FP64-16){1'b0}},Qcnt_three_7[22:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt1={{(C_MANT_FP64-17){1'b0}},Qcnt_three_7[22:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt2={{(C_MANT_FP64-18){1'b0}},Qcnt_three_7[22:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_three_8[25:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_three_8[25:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_three_8[25:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-22){1'b0}},Qcnt_three_9[28:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-23){1'b0}},Qcnt_three_9[28:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-24){1'b0}},Qcnt_three_9[28:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-25){1'b0}},Qcnt_three_10[31:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-26){1'b0}},Qcnt_three_10[31:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-27){1'b0}},Qcnt_three_10[31:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-28){1'b0}},Qcnt_three_11[34:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-29){1'b0}},Qcnt_three_11[34:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-30){1'b0}},Qcnt_three_11[34:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_three_12[37:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_three_12[37:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_three_12[37:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-34){1'b0}},Qcnt_three_13[40:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-35){1'b0}},Qcnt_three_13[40:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-36){1'b0}},Qcnt_three_13[40:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001110: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-37){1'b0}},Qcnt_three_14[43:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-38){1'b0}},Qcnt_three_14[43:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-39){1'b0}},Qcnt_three_14[43:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b001111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-40){1'b0}},Qcnt_three_15[46:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-41){1'b0}},Qcnt_three_15[46:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-42){1'b0}},Qcnt_three_15[46:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_three_16[49:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_three_16[49:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_three_16[49:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-46){1'b0}},Qcnt_three_17[52:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-47){1'b0}},Qcnt_three_17[52:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-48){1'b0}},Qcnt_three_17[52:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + 6'b010010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-49){1'b0}},Qcnt_three_18[55:2]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-50){1'b0}},Qcnt_three_18[55:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-51){1'b0}},Qcnt_three_18[55:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + + default : + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_three_0[2]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_three_0[2:1]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_three_0[2:0]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + end + endcase + + end + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b10, end // + ///////////////////////////////////////////////////////////////////////////// + + + 2'b11: + begin + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b11, start // + ///////////////////////////////////////////////////////////////////////////// + + case(Crtl_cnt_S) + + 6'b000000: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000001: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-7:C_MANT_FP64-8]; + Q_sqrt0={{(C_MANT_FP64+1){1'b0}},Qcnt_four_1[6:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-9:C_MANT_FP64-10]; + Q_sqrt1={{(C_MANT_FP64){1'b0}},Qcnt_four_1[6:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-11:C_MANT_FP64-12]; + Q_sqrt2={{(C_MANT_FP64-1){1'b0}},Qcnt_four_1[6:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-13:C_MANT_FP64-14]; + Q_sqrt3={{(C_MANT_FP64-2){1'b0}},Qcnt_four_1[6:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000010: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-15:C_MANT_FP64-16]; + Q_sqrt0={{(C_MANT_FP64-3){1'b0}},Qcnt_four_2[10:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-17:C_MANT_FP64-18]; + Q_sqrt1={{(C_MANT_FP64-4){1'b0}},Qcnt_four_2[10:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-19:C_MANT_FP64-20]; + Q_sqrt2={{(C_MANT_FP64-5){1'b0}},Qcnt_four_2[10:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-21:C_MANT_FP64-22]; + Q_sqrt3={{(C_MANT_FP64-6){1'b0}},Qcnt_four_2[10:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000011: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-23:C_MANT_FP64-24]; + Q_sqrt0={{(C_MANT_FP64-7){1'b0}},Qcnt_four_3[14:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-25:C_MANT_FP64-26]; + Q_sqrt1={{(C_MANT_FP64-8){1'b0}},Qcnt_four_3[14:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-27:C_MANT_FP64-28]; + Q_sqrt2={{(C_MANT_FP64-9){1'b0}},Qcnt_four_3[14:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-29:C_MANT_FP64-30]; + Q_sqrt3={{(C_MANT_FP64-10){1'b0}},Qcnt_four_3[14:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000100: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-31:C_MANT_FP64-32]; + Q_sqrt0={{(C_MANT_FP64-11){1'b0}},Qcnt_four_4[18:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-33:C_MANT_FP64-34]; + Q_sqrt1={{(C_MANT_FP64-12){1'b0}},Qcnt_four_4[18:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-35:C_MANT_FP64-36]; + Q_sqrt2={{(C_MANT_FP64-13){1'b0}},Qcnt_four_4[18:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-37:C_MANT_FP64-38]; + Q_sqrt3={{(C_MANT_FP64-14){1'b0}},Qcnt_four_4[18:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000101: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-39:C_MANT_FP64-40]; + Q_sqrt0={{(C_MANT_FP64-15){1'b0}},Qcnt_four_5[22:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-41:C_MANT_FP64-42]; + Q_sqrt1={{(C_MANT_FP64-16){1'b0}},Qcnt_four_5[22:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-43:C_MANT_FP64-44]; + Q_sqrt2={{(C_MANT_FP64-17){1'b0}},Qcnt_four_5[22:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-45:C_MANT_FP64-46]; + Q_sqrt3={{(C_MANT_FP64-18){1'b0}},Qcnt_four_5[22:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000110: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64-47:C_MANT_FP64-48]; + Q_sqrt0={{(C_MANT_FP64-19){1'b0}},Qcnt_four_6[26:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-49:C_MANT_FP64-50]; + Q_sqrt1={{(C_MANT_FP64-20){1'b0}},Qcnt_four_6[26:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-51:C_MANT_FP64-52]; + Q_sqrt2={{(C_MANT_FP64-21){1'b0}},Qcnt_four_6[26:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-22){1'b0}},Qcnt_four_6[26:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b000111: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-23){1'b0}},Qcnt_four_7[30:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-24){1'b0}},Qcnt_four_7[30:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-25){1'b0}},Qcnt_four_7[30:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-26){1'b0}},Qcnt_four_7[30:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001000: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-27){1'b0}},Qcnt_four_8[34:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-28){1'b0}},Qcnt_four_8[34:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-29){1'b0}},Qcnt_four_8[34:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-30){1'b0}},Qcnt_four_8[34:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001001: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-31){1'b0}},Qcnt_four_9[38:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-32){1'b0}},Qcnt_four_9[38:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-33){1'b0}},Qcnt_four_9[38:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-34){1'b0}},Qcnt_four_9[38:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001010: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-35){1'b0}},Qcnt_four_10[42:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-36){1'b0}},Qcnt_four_10[42:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-37){1'b0}},Qcnt_four_10[42:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-38){1'b0}},Qcnt_four_10[42:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001011: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-39){1'b0}},Qcnt_four_11[46:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-40){1'b0}},Qcnt_four_11[46:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-41){1'b0}},Qcnt_four_11[46:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-42){1'b0}},Qcnt_four_11[46:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001100: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-43){1'b0}},Qcnt_four_12[50:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-44){1'b0}},Qcnt_four_12[50:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-45){1'b0}},Qcnt_four_12[50:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-46){1'b0}},Qcnt_four_12[50:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + 6'b001101: + begin + Sqrt_DI[0]=2'b00; + Q_sqrt0={{(C_MANT_FP64-47){1'b0}},Qcnt_four_13[54:3]}; + Sqrt_Q0=Quotient_DP[0]?Q_sqrt_com_0:Q_sqrt0; + Sqrt_DI[1]=2'b00; + Q_sqrt1={{(C_MANT_FP64-48){1'b0}},Qcnt_four_13[54:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=2'b00; + Q_sqrt2={{(C_MANT_FP64-49){1'b0}},Qcnt_four_13[54:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=2'b00; + Q_sqrt3={{(C_MANT_FP64-50){1'b0}},Qcnt_four_13[54:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + + default: + begin + Sqrt_DI[0]=Mant_D_sqrt_Norm[C_MANT_FP64+1:C_MANT_FP64]; + Q_sqrt0={{(C_MANT_FP64+5){1'b0}},Qcnt_four_0[3]}; + Sqrt_Q0=Q_sqrt_com_0; + Sqrt_DI[1]=Mant_D_sqrt_Norm[C_MANT_FP64-1:C_MANT_FP64-2]; + Q_sqrt1={{(C_MANT_FP64+4){1'b0}},Qcnt_four_0[3:2]}; + Sqrt_Q1=Sqrt_quotinent_S[3]?Q_sqrt_com_1:Q_sqrt1; + Sqrt_DI[2]=Mant_D_sqrt_Norm[C_MANT_FP64-3:C_MANT_FP64-4]; + Q_sqrt2={{(C_MANT_FP64+3){1'b0}},Qcnt_four_0[3:1]}; + Sqrt_Q2=Sqrt_quotinent_S[2]?Q_sqrt_com_2:Q_sqrt2; + Sqrt_DI[3]=Mant_D_sqrt_Norm[C_MANT_FP64-5:C_MANT_FP64-6]; + Q_sqrt3={{(C_MANT_FP64+2){1'b0}},Qcnt_four_0[3:0]}; + Sqrt_Q3=Sqrt_quotinent_S[1]?Q_sqrt_com_3:Q_sqrt3; + end + endcase + end + endcase + ///////////////////////////////////////////////////////////////////////////// + // Operands for square root when Iteration_unit_num_S = 2'b11, end // + ///////////////////////////////////////////////////////////////////////////// + end + + + + assign Sqrt_R0= ((Sqrt_start_dly_S)?'0:{Partial_remainder_DP[C_MANT_FP64+5:0]}); + assign Sqrt_R1= {Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+2:0],Sqrt_DO[0]} ; + assign Sqrt_R2= {Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+2:0],Sqrt_DO[1]}; + assign Sqrt_R3= {Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+2:0],Sqrt_DO[2]}; + assign Sqrt_R4= {Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+5],Iteration_cell_sum_AMASK_D[3][C_MANT_FP64+2:0],Sqrt_DO[3]}; + + logic [C_MANT_FP64+5:0] Denominator_se_format_DB; // + + assign Denominator_se_format_DB={Denominator_se_DB[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-1]}, + Denominator_se_DB[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} ; + // for iteration cell_U0 + logic [C_MANT_FP64+5:0] First_iteration_cell_div_a_D,First_iteration_cell_div_b_D; + logic Sel_b_for_first_S; + + + assign First_iteration_cell_div_a_D=(Div_start_dly_S)?{Numerator_se_D[C_MANT_FP64+1:C_MANT_FP64-C_MANT_FP16ALT],{FP16ALT_SO?FP16ALT_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP16ALT-2:C_MANT_FP64-C_MANT_FP16],{FP16_SO?FP16_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP16-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP16-2:C_MANT_FP64-C_MANT_FP32],{FP32_SO?FP32_SO:Numerator_se_D[C_MANT_FP64-C_MANT_FP32-1]}, + Numerator_se_D[C_MANT_FP64-C_MANT_FP32-2:C_MANT_FP64-C_MANT_FP64],FP64_SO,3'b0} + :{Partial_remainder_DP[C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Quotient_DP[0]:Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+2]}, + Partial_remainder_DP[C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Quotient_DP[0],3'b0}; + assign Sel_b_for_first_S=(Div_start_dly_S)?1:Quotient_DP[0]; + assign First_iteration_cell_div_b_D=Sel_b_for_first_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[0]=Sqrt_enable_SO?Sqrt_R0:{First_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[0]=Sqrt_enable_SO?Sqrt_Q0:{First_iteration_cell_div_b_D}; + + + + // for iteration cell_U1 + logic [C_MANT_FP64+5:0] Sec_iteration_cell_div_a_D,Sec_iteration_cell_div_b_D; + logic Sel_b_for_sec_S; + generate + if(|Iteration_unit_num_S) + begin + assign Sel_b_for_sec_S=~Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+5]; + assign Sec_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[0][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_sec_S:Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[0][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_sec_S,3'b0}; + assign Sec_iteration_cell_div_b_D=Sel_b_for_sec_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[1]=Sqrt_enable_SO?Sqrt_R1:{Sec_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[1]=Sqrt_enable_SO?Sqrt_Q1:{Sec_iteration_cell_div_b_D}; + end + endgenerate + + // for iteration cell_U2 + logic [C_MANT_FP64+5:0] Thi_iteration_cell_div_a_D,Thi_iteration_cell_div_b_D; + logic Sel_b_for_thi_S; + generate + if((Iteration_unit_num_S==2'b10) | (Iteration_unit_num_S==2'b11)) + begin + assign Sel_b_for_thi_S=~Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+5]; + assign Thi_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[1][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_thi_S:Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[1][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_thi_S,3'b0}; + assign Thi_iteration_cell_div_b_D=Sel_b_for_thi_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[2]=Sqrt_enable_SO?Sqrt_R2:{Thi_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[2]=Sqrt_enable_SO?Sqrt_Q2:{Thi_iteration_cell_div_b_D}; + end + endgenerate + + // for iteration cell_U3 + logic [C_MANT_FP64+5:0] Fou_iteration_cell_div_a_D,Fou_iteration_cell_div_b_D; + logic Sel_b_for_fou_S; + + generate + if(Iteration_unit_num_S==2'b11) + begin + assign Sel_b_for_fou_S=~Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+5]; + assign Fou_iteration_cell_div_a_D={Iteration_cell_sum_AMASK_D[2][C_MANT_FP64+4:C_MANT_FP64-C_MANT_FP16ALT+3],{FP16ALT_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16ALT+1:C_MANT_FP64-C_MANT_FP16+3],{FP16_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP16+1:C_MANT_FP64-C_MANT_FP32+3],{FP32_SO?Sel_b_for_fou_S:Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+2]}, + Iteration_cell_sum_AMASK_D[2][C_MANT_FP64-C_MANT_FP32+1:C_MANT_FP64-C_MANT_FP64+3],FP64_SO&&Sel_b_for_fou_S,3'b0}; + assign Fou_iteration_cell_div_b_D=Sel_b_for_fou_S?Denominator_se_format_DB:{Denominator_se_D,4'b0}; + assign Iteration_cell_a_BMASK_D[3]=Sqrt_enable_SO?Sqrt_R3:{Fou_iteration_cell_div_a_D}; + assign Iteration_cell_b_BMASK_D[3]=Sqrt_enable_SO?Sqrt_Q3:{Fou_iteration_cell_div_b_D}; + end + endgenerate + + ///////////////////////////////////////////////////////////////////////////// + // Masking Contrl // + ///////////////////////////////////////////////////////////////////////////// + + + logic [C_MANT_FP64+1+4:0] Mask_bits_ctl_S; //For extension + + assign Mask_bits_ctl_S =58'h3ff_ffff_ffff_ffff; //It is not needed. The corresponding process is handled the above codes + + ///////////////////////////////////////////////////////////////////////////// + // Iteration Instances with masking control // + ///////////////////////////////////////////////////////////////////////////// + + + logic Div_enable_SI [3:0]; + logic Div_start_dly_SI [3:0]; + logic Sqrt_enable_SI [3:0]; + generate + genvar i,j; + for (i=0; i <= Iteration_unit_num_S ; i++) + begin + for (j = 0; j <= C_MANT_FP64+5; j++) begin + assign Iteration_cell_a_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_a_BMASK_D[i][j]; + assign Iteration_cell_b_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_b_BMASK_D[i][j]; + assign Iteration_cell_sum_AMASK_D[i][j] = Mask_bits_ctl_S[j] && Iteration_cell_sum_D[i][j]; + end + + assign Div_enable_SI[i] = Div_enable_SO; + assign Div_start_dly_SI[i] = Div_start_dly_S; + assign Sqrt_enable_SI[i] = Sqrt_enable_SO; + iteration_div_sqrt_mvp #(C_MANT_FP64+6) iteration_div_sqrt + ( + .A_DI (Iteration_cell_a_D[i] ), + .B_DI (Iteration_cell_b_D[i] ), + .Div_enable_SI (Div_enable_SI[i] ), + .Div_start_dly_SI (Div_start_dly_SI[i] ), + .Sqrt_enable_SI (Sqrt_enable_SI[i] ), + .D_DI (Sqrt_DI[i] ), + .D_DO (Sqrt_DO[i] ), + .Sum_DO (Iteration_cell_sum_D[i] ), + .Carry_out_DO (Iteration_cell_carry_D[i] ) + ); + + end + + endgenerate + + + + always_comb + begin + case (Iteration_unit_num_S) + 2'b00: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R1:Iteration_cell_sum_AMASK_D[0]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b01: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R2:Iteration_cell_sum_AMASK_D[1]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b10: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R3:Iteration_cell_sum_AMASK_D[2]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + 2'b11: + begin + if(Fsm_enable_S) + Partial_remainder_DN = Sqrt_enable_SO?Sqrt_R4:Iteration_cell_sum_AMASK_D[3]; + else + Partial_remainder_DN = Partial_remainder_DP; + end + endcase + end + + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // partial_remainder + begin + if(~Rst_RBI) + begin + Partial_remainder_DP <= '0; + end + else + begin + Partial_remainder_DP <= Partial_remainder_DN; + end + end + + logic [C_MANT_FP64+4:0] Quotient_DN; + + always_comb // Can choosen the different carry-outs based on different operations + begin + case (Iteration_unit_num_S) + 2'b00: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+3:0],Sqrt_quotinent_S[3]} :{Quotient_DP[C_MANT_FP64+3:0],Iteration_cell_carry_D[0]}; + else + Quotient_DN= Quotient_DP; + end + 2'b01: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+2:0],Sqrt_quotinent_S[3:2]} :{Quotient_DP[C_MANT_FP64+2:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1]}; + else + Quotient_DN= Quotient_DP; + end + 2'b10: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64+1:0],Sqrt_quotinent_S[3:1]} : {Quotient_DP[C_MANT_FP64+1:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2]}; + else + Quotient_DN= Quotient_DP; + end + 2'b11: + begin + if(Fsm_enable_S) + Quotient_DN= Sqrt_enable_SO ? {Quotient_DP[C_MANT_FP64:0],Sqrt_quotinent_S } : {Quotient_DP[C_MANT_FP64:0],Iteration_cell_carry_D[0],Iteration_cell_carry_D[1],Iteration_cell_carry_D[2],Iteration_cell_carry_D[3]}; + else + Quotient_DN= Quotient_DP; + end + endcase + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) // Quotient + begin + if(~Rst_RBI) + begin + Quotient_DP <= '0; + end + else + Quotient_DP <= Quotient_DN; + end + + + ///////////////////////////////////////////////////////////////////////////// + // Precision Control for outputs // + ///////////////////////////////////////////////////////////////////////////// + + +//////////////////////one iteration unit, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b00) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-1:0],{(C_MANT_FP64-C_MANT_FP32+4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-5:0],{(C_MANT_FP64-C_MANT_FP32+4+5){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-7:0],{(C_MANT_FP64-C_MANT_FP32+4+7){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-11:0],{(C_MANT_FP64-C_MANT_FP32+4+11){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-13:0],{(C_MANT_FP64-C_MANT_FP32+4+13){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64:0],{(4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h33: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h32: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h31: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-4:0],{(4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-6:0],{(4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-10:0],{(4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h29: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}}}; //Precision_ctl_S+1 + end + 6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-12:0],{(4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h27: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}}}; //Precision_ctl_S+1 + end + 6'h26: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h25: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}}}; //Precision_ctl_S+1 + end + 6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-16:0],{(4+16){1'b0}}}; //Precision_ctl_S+1 + end + 6'h23: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}}}; //Precision_ctl_S+1 + end + 6'h22: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-18:0],{(4+18){1'b0}}}; //Precision_ctl_S+1 + end + 6'h21: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}}}; //Precision_ctl_S+1 + end + 6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-22:0],{(4+22){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-24:0],{(4+24){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}}}; //Precision_ctl_S+1 + end + 6'h1a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}}}; //Precision_ctl_S+1 + end + 6'h19: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}}}; //Precision_ctl_S+1 + end + 6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-28:0],{(4+28){1'b0}}}; //Precision_ctl_S+1 + end + 6'h17: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}}}; //Precision_ctl_S+1 + end + 6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-30:0],{(4+30){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}}}; //Precision_ctl_S+1 + end + 6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-34:0],{(4+34){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}}}; //Precision_ctl_S+1 + end + 6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-36:0],{(4+36){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-40:0],{(4+40){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-42:0],{(4+42){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16:0],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}}}; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}}}; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}}}; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////one iteration unit, end////////////////////////////////////////// + +//////////////////////two iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b01) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17,6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-2:0],{(C_MANT_FP64-C_MANT_FP32+4+2){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f,6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-10:0],{(C_MANT_FP64-C_MANT_FP32+4+10){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-14:0],{(C_MANT_FP64-C_MANT_FP32+4+14){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h33,6'h32: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(4+1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-3:0],{(4+3){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2d,6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-7:0],{(4+7){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2b,6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(4+9){1'b0}} }; //Precision_ctl_S+1 + end + 6'h29,6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1 + end + 6'h27,6'h26: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(4+13){1'b0}} }; //Precision_ctl_S+1 + end + 6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-15:0],{(4+15){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h21,6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-19:0],{(4+19){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1f,6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(4+21){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1d,6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1b,6'h1a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(4+25){1'b0}} }; //Precision_ctl_S+1 + end + 6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-27:0],{(4+27){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-31:0],{(4+31){1'b0}} }; //Precision_ctl_S+1 + end + 6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(4+33){1'b0}} }; //Precision_ctl_S+1 + end + 6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0f,6'h0e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(4+37){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-39:0],{(4+39){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-43:0],{(4+43){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(4+45){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],1'b0}; //+3 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+3:0],{(C_MANT_FP64-C_MANT_FP16+1){1'b0}} }; //+3 + end + 6'h0a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-1:0],{(C_MANT_FP64-C_MANT_FP16+4+1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-3:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////two iteration units, end////////////////////////////////////////// + +//////////////////////three iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b10) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3 + end + 6'h17,6'h16,6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h14,6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-3:0],{(C_MANT_FP64-C_MANT_FP32+4+3){1'b0}}}; //Precision_ctl_S+1 + end + 6'h11,6'h10,6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-6:0],{(C_MANT_FP64-C_MANT_FP32+4+6){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-9:0],{(C_MANT_FP64-C_MANT_FP32+4+9){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-15:0],{(C_MANT_FP64-C_MANT_FP32+4+15){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+3:0],{(C_MANT_FP64-C_MANT_FP32+1){1'b0}}}; //+3 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + 6'h34,6'h33: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+1:1],{(4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h32,6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-2:0],{(4+2){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e,6'h2d: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(4+5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2c,6'h2b,6'h2a: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-8:0],{(4+8){1'b0}} }; //Precision_ctl_S+1 + end + 6'h29,6'h28,6'h27: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-11:0],{(4+11){1'b0}} }; //Precision_ctl_S+1 + end + 6'h26,6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-14:0],{(4+14){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22,6'h21: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(4+17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h20,6'h1f,6'h1e: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-20:0],{(4+20){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1d,6'h1c,6'h1b: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-23:0],{(4+23){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1a,6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-26:0],{(4+26){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16,6'h15: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(4+29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h14,6'h13,6'h12: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-32:0],{(4+32){1'b0}} }; //Precision_ctl_S+1 + end + 6'h11,6'h10,6'h0f: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-35:0],{(4+35){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-38:0],{(4+38){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(4+41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-44:0],{(4+44){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = Quotient_DP[C_MANT_FP64+4:0]; //+4 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + 6'h0a,6'h09: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h08,6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16-2:0],{(C_MANT_FP64-C_MANT_FP16+4+2){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+4:0],{(C_MANT_FP64-C_MANT_FP16){1'b0}} }; //+4 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+1:1],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////three iteration units, end////////////////////////////////////////// + +//////////////////////four iteration units, start/////////////////////////////////////// + generate + if(Iteration_unit_num_S==2'b11) + begin + always_comb + begin + case (Format_sel_S) + 2'b00: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + 6'h17,6'h16,6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32:0],{(C_MANT_FP64-C_MANT_FP32+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h13,6'h12,6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-4:0],{(C_MANT_FP64-C_MANT_FP32+4+4){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0f,6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-8:0],{(C_MANT_FP64-C_MANT_FP32+4+8){1'b0}}}; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-12:0],{(C_MANT_FP64-C_MANT_FP32+4+12){1'b0}}}; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32-16:0],{(C_MANT_FP64-C_MANT_FP32+4+16){1'b0}}}; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP32+4:0],{(C_MANT_FP64-C_MANT_FP32){1'b0}}}; //+4 + end + endcase + end + + 2'b01: + begin + case (Precision_ctl_S) + 6'h00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3 + end + 6'h34: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}} }; //Precision_ctl_S+1 + end + 6'h33,6'h32,6'h31,6'h30: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-1:0],{(5){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2f,6'h2e,6'h2d,6'h2c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-5:0],{(9){1'b0}} }; //Precision_ctl_S+1 + end + 6'h2b,6'h2a,6'h29,6'h28: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-9:0],{(13){1'b0}} }; //Precision_ctl_S+1 + end + 6'h27,6'h26,6'h25,6'h24: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-13:0],{(17){1'b0}} }; //Precision_ctl_S+1 + end + 6'h23,6'h22,6'h21,6'h20: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-17:0],{(21){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1f,6'h1e,6'h1d,6'h1c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-21:0],{(25){1'b0}} }; //Precision_ctl_S+1 + end + 6'h1b,6'h1a,6'h19,6'h18: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-25:0],{(29){1'b0}} }; //Precision_ctl_S+1 + end + 6'h17,6'h16,6'h15,6'h14: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-29:0],{(33){1'b0}} }; //Precision_ctl_S+1 + end + 6'h13,6'h12,6'h11,6'h10: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-33:0],{(37){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0f,6'h0e,6'h0d,6'h0c: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-37:0],{(41){1'b0}} }; //Precision_ctl_S+1 + end + 6'h0b,6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-41:0],{(45){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64-45:0],{(49){1'b0}} }; //Precision_ctl_S+1 + end + default: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP64+3:0],{(1){1'b0}}}; //+3 + end + endcase + end + + 2'b10: + begin + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5 + end + 6'h0a,6'h09,6'h08: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1:1],{(C_MANT_FP64-C_MANT_FP16+4){1'b0}} }; //Precision_ctl_S+1 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+1-4:0],{(C_MANT_FP64-C_MANT_FP16+4+3){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16+5:0],{(C_MANT_FP64-C_MANT_FP16-1){1'b0}} }; //+5 + end + endcase + end + + 2'b11: + begin + + case (Precision_ctl_S) + 6'b00: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + 6'h07,6'h06: + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT:0],{(C_MANT_FP64-C_MANT_FP16ALT+4){1'b0}} }; //Precision_ctl_S+1 + end + default : + begin + Mant_result_prenorm_DO = {Quotient_DP[C_MANT_FP16ALT+4:0],{(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; //+4 + end + endcase + end + endcase + end + end + endgenerate +//////////////////////four iteration units, end/////////////////////////////////////// + + + + + +// resultant exponent + logic [C_EXP_FP64+1:0] Exp_result_prenorm_DN,Exp_result_prenorm_DP; + + logic [C_EXP_FP64+1:0] Exp_add_a_D; + logic [C_EXP_FP64+1:0] Exp_add_b_D; + logic [C_EXP_FP64+1:0] Exp_add_c_D; + + integer C_BIAS_AONE, C_HALF_BIAS; + always_comb + begin // + case (Format_sel_S) + 2'b00: + begin + C_BIAS_AONE =C_BIAS_AONE_FP32; + C_HALF_BIAS =C_HALF_BIAS_FP32; + end + 2'b01: + begin + C_BIAS_AONE =C_BIAS_AONE_FP64; + C_HALF_BIAS =C_HALF_BIAS_FP64; + end + 2'b10: + begin + C_BIAS_AONE =C_BIAS_AONE_FP16; + C_HALF_BIAS =C_HALF_BIAS_FP16; + end + 2'b11: + begin + C_BIAS_AONE =C_BIAS_AONE_FP16ALT; + C_HALF_BIAS =C_HALF_BIAS_FP16ALT; + end + endcase + end + +//For division, exponent=(Exp_a_D-LZ1)-(Exp_b_D-LZ2)+BIAS +//For square root, exponent=(Exp_a_D-LZ1)/2+(Exp_a_D-LZ1)%2+C_HALF_BIAS +//For exponent, in preprorces module, (Exp_a_D-LZ1) and (Exp_b_D-LZ2) have been processed with the corresponding process for denormal numbers. + + assign Exp_add_a_D = {Sqrt_start_dly_S?{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64:1]}:{Exp_num_DI[C_EXP_FP64],Exp_num_DI[C_EXP_FP64],Exp_num_DI}}; + assign Exp_add_b_D = {Sqrt_start_dly_S?{1'b0,{C_EXP_ZERO_FP64},Exp_num_DI[0]}:{~Exp_den_DI[C_EXP_FP64],~Exp_den_DI[C_EXP_FP64],~Exp_den_DI}}; + assign Exp_add_c_D = {Div_start_dly_S?{{C_BIAS_AONE}}:{{C_HALF_BIAS}}}; + assign Exp_result_prenorm_DN = (Start_dly_S)?{Exp_add_a_D + Exp_add_b_D + Exp_add_c_D}:Exp_result_prenorm_DP; + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_result_prenorm_DP <= '0; + end + else + begin + Exp_result_prenorm_DP<= Exp_result_prenorm_DN; + end + end + + assign Exp_result_prenorm_DO = Exp_result_prenorm_DP; + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv new file mode 100644 index 00000000..b3f41fec --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// This file contains all div_sqrt_top_mvp parameters +// Authors : Lei Li (lile@iis.ee.ethz.ch) + +package defs_div_sqrt_mvp; + + // op command + localparam C_RM = 3; + localparam C_RM_NEAREST = 3'h0; + localparam C_RM_TRUNC = 3'h1; + localparam C_RM_PLUSINF = 3'h2; + localparam C_RM_MINUSINF = 3'h3; + localparam C_PC = 6; // Precision Control + localparam C_FS = 2; // Format Selection + localparam C_IUNC = 2; // Iteration Unit Number Control + localparam Iteration_unit_num_S = 2'b10; + + // FP64 + localparam C_OP_FP64 = 64; + localparam C_MANT_FP64 = 52; + localparam C_EXP_FP64 = 11; + localparam C_BIAS_FP64 = 1023; + localparam C_BIAS_AONE_FP64 = 11'h400; + localparam C_HALF_BIAS_FP64 = 511; + localparam C_EXP_ZERO_FP64 = 11'h000; + localparam C_EXP_ONE_FP64 = 13'h001; // Bit width is in agreement with in norm + localparam C_EXP_INF_FP64 = 11'h7FF; + localparam C_MANT_ZERO_FP64 = 52'h0; + localparam C_MANT_NAN_FP64 = 52'h8_0000_0000_0000; + localparam C_PZERO_FP64 = 64'h0000_0000_0000_0000; + localparam C_MZERO_FP64 = 64'h8000_0000_0000_0000; + localparam C_QNAN_FP64 = 64'h7FF8_0000_0000_0000; + + // FP32 + localparam C_OP_FP32 = 32; + localparam C_MANT_FP32 = 23; + localparam C_EXP_FP32 = 8; + localparam C_BIAS_FP32 = 127; + localparam C_BIAS_AONE_FP32 = 8'h80; + localparam C_HALF_BIAS_FP32 = 63; + localparam C_EXP_ZERO_FP32 = 8'h00; + localparam C_EXP_INF_FP32 = 8'hFF; + localparam C_MANT_ZERO_FP32 = 23'h0; + localparam C_PZERO_FP32 = 32'h0000_0000; + localparam C_MZERO_FP32 = 32'h8000_0000; + localparam C_QNAN_FP32 = 32'h7FC0_0000; + + // FP16 + localparam C_OP_FP16 = 16; + localparam C_MANT_FP16 = 10; + localparam C_EXP_FP16 = 5; + localparam C_BIAS_FP16 = 15; + localparam C_BIAS_AONE_FP16 = 5'h10; + localparam C_HALF_BIAS_FP16 = 7; + localparam C_EXP_ZERO_FP16 = 5'h00; + localparam C_EXP_INF_FP16 = 5'h1F; + localparam C_MANT_ZERO_FP16 = 10'h0; + localparam C_PZERO_FP16 = 16'h0000; + localparam C_MZERO_FP16 = 16'h8000; + localparam C_QNAN_FP16 = 16'h7E00; + + // FP16alt + localparam C_OP_FP16ALT = 16; + localparam C_MANT_FP16ALT = 7; + localparam C_EXP_FP16ALT = 8; + localparam C_BIAS_FP16ALT = 127; + localparam C_BIAS_AONE_FP16ALT = 8'h80; + localparam C_HALF_BIAS_FP16ALT = 63; + localparam C_EXP_ZERO_FP16ALT = 8'h00; + localparam C_EXP_INF_FP16ALT = 8'hFF; + localparam C_MANT_ZERO_FP16ALT = 7'h0; + localparam C_QNAN_FP16ALT = 16'h7FC0; + +endpackage : defs_div_sqrt_mvp diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv new file mode 100644 index 00000000..3af6081b --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv @@ -0,0 +1,180 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li -- lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 03/03/2018 // +// Design Name: div_sqrt_top_mvp // +// Module Name: div_sqrt_top_mvp.sv // +// Project Name: The shared divisor and square root // +// Language: SystemVerilog // +// // +// Description: The top of div and sqrt // +// // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module div_sqrt_top_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + + //Input Operands + input logic [C_OP_FP64-1:0] Operand_a_DI, + input logic [C_OP_FP64-1:0] Operand_b_DI, + + // Input Control + input logic [C_RM-1:0] RM_SI, //Rounding Mode + input logic [C_PC-1:0] Precision_ctl_SI, // Precision Control + input logic [C_FS-1:0] Format_sel_SI, // Format Selection, + input logic Kill_SI, + + //Output Result + output logic [C_OP_FP64-1:0] Result_DO, + + //Output-Flags + output logic [4:0] Fflags_SO, + output logic Ready_SO, + output logic Done_SO + ); + + + + + + //Operand components + logic [C_EXP_FP64:0] Exp_a_D; + logic [C_EXP_FP64:0] Exp_b_D; + logic [C_MANT_FP64:0] Mant_a_D; + logic [C_MANT_FP64:0] Mant_b_D; + + logic [C_EXP_FP64+1:0] Exp_z_D; + logic [C_MANT_FP64+4:0] Mant_z_D; + logic Sign_z_D; + logic Start_S; + logic [C_RM-1:0] RM_dly_S; + logic Div_enable_S; + logic Sqrt_enable_S; + logic Inf_a_S; + logic Inf_b_S; + logic Zero_a_S; + logic Zero_b_S; + logic NaN_a_S; + logic NaN_b_S; + logic SNaN_S; + logic Special_case_SB,Special_case_dly_SB; + + logic Full_precision_S; + logic FP32_S; + logic FP64_S; + logic FP16_S; + logic FP16ALT_S; + + + preprocess_mvp preprocess_U0 + ( + .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ), + .Sqrt_start_SI (Sqrt_start_SI ), + .Ready_SI (Ready_SO ), + .Operand_a_DI (Operand_a_DI ), + .Operand_b_DI (Operand_b_DI ), + .RM_SI (RM_SI ), + .Format_sel_SI (Format_sel_SI ), + .Start_SO (Start_S ), + .Exp_a_DO_norm (Exp_a_D ), + .Exp_b_DO_norm (Exp_b_D ), + .Mant_a_DO_norm (Mant_a_D ), + .Mant_b_DO_norm (Mant_b_D ), + .RM_dly_SO (RM_dly_S ), + .Sign_z_DO (Sign_z_D ), + .Inf_a_SO (Inf_a_S ), + .Inf_b_SO (Inf_b_S ), + .Zero_a_SO (Zero_a_S ), + .Zero_b_SO (Zero_b_S ), + .NaN_a_SO (NaN_a_S ), + .NaN_b_SO (NaN_b_S ), + .SNaN_SO (SNaN_S ), + .Special_case_SBO (Special_case_SB ), + .Special_case_dly_SBO (Special_case_dly_SB) + ); + + nrbd_nrsc_mvp nrbd_nrsc_U0 + ( + .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ) , + .Sqrt_start_SI (Sqrt_start_SI ), + .Start_SI (Start_S ), + .Kill_SI (Kill_SI ), + .Special_case_SBI (Special_case_SB ), + .Special_case_dly_SBI (Special_case_dly_SB), + .Div_enable_SO (Div_enable_S ), + .Sqrt_enable_SO (Sqrt_enable_S ), + .Precision_ctl_SI (Precision_ctl_SI ), + .Format_sel_SI (Format_sel_SI ), + .Exp_a_DI (Exp_a_D ), + .Exp_b_DI (Exp_b_D ), + .Mant_a_DI (Mant_a_D ), + .Mant_b_DI (Mant_b_D ), + .Full_precision_SO (Full_precision_S ), + .FP32_SO (FP32_S ), + .FP64_SO (FP64_S ), + .FP16_SO (FP16_S ), + .FP16ALT_SO (FP16ALT_S ), + .Ready_SO (Ready_SO ), + .Done_SO (Done_SO ), + .Exp_z_DO (Exp_z_D ), + .Mant_z_DO (Mant_z_D ) + ); + + + norm_div_sqrt_mvp fpu_norm_U0 + ( + .Mant_in_DI (Mant_z_D ), + .Exp_in_DI (Exp_z_D ), + .Sign_in_DI (Sign_z_D ), + .Div_enable_SI (Div_enable_S ), + .Sqrt_enable_SI (Sqrt_enable_S ), + .Inf_a_SI (Inf_a_S ), + .Inf_b_SI (Inf_b_S ), + .Zero_a_SI (Zero_a_S ), + .Zero_b_SI (Zero_b_S ), + .NaN_a_SI (NaN_a_S ), + .NaN_b_SI (NaN_b_S ), + .SNaN_SI (SNaN_S ), + .RM_SI (RM_dly_S ), + .Full_precision_SI (Full_precision_S ), + .FP32_SI (FP32_S ), + .FP64_SI (FP64_S ), + .FP16_SI (FP16_S ), + .FP16ALT_SI (FP16ALT_S ), + .Result_DO (Result_DO ), + .Fflags_SO (Fflags_SO ) //{NV,DZ,OF,UF,NX} + ); + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv new file mode 100644 index 00000000..0c645e6e --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv @@ -0,0 +1,61 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 12/01/2017 // +// Design Name: FPU // +// Module Name: iteration_div_sqrt_mvp // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: iteration unit for div and sqrt // +// // +// // +// Revision: 03/14/2018 // +// For div_sqrt_mvp // +//////////////////////////////////////////////////////////////////////////////// + +module iteration_div_sqrt_mvp +#( + parameter WIDTH=25 +) + (//Input + + input logic [WIDTH-1:0] A_DI, + input logic [WIDTH-1:0] B_DI, + input logic Div_enable_SI, + input logic Div_start_dly_SI, + input logic Sqrt_enable_SI, + input logic [1:0] D_DI, + + output logic [1:0] D_DO, + output logic [WIDTH-1:0] Sum_DO, + output logic Carry_out_DO + ); + + logic D_carry_D; + logic Sqrt_cin_D; + logic Cin_D; + + assign D_DO[0]=~D_DI[0]; + assign D_DO[1]=~(D_DI[1] ^ D_DI[0]); + assign D_carry_D=D_DI[1] | D_DI[0]; + assign Sqrt_cin_D=Sqrt_enable_SI&&D_carry_D; + assign Cin_D=Div_enable_SI?1'b0:Sqrt_cin_D; + assign {Carry_out_DO,Sum_DO}=A_DI+B_DI+Cin_D; + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv new file mode 100644 index 00000000..590abe96 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv @@ -0,0 +1,470 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 09/03/2018 // +// Design Name: FPU // +// Module Name: norm_div_sqrt_mvp.sv // +// Project Name: // +// Language: SystemVerilog // +// // +// Description: Floating point Normalizer/Rounding unit // +// Since this module is design as a combinatinal logic, it can// +// be added arbinary register stages for different frequency // +// in the wrapper module. // +// // +// // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan // +// // +// // +// // +// // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module norm_div_sqrt_mvp + (//Inputs + input logic [C_MANT_FP64+4:0] Mant_in_DI, // Include the needed 4-bit for rounding and hidden bit + input logic signed [C_EXP_FP64+1:0] Exp_in_DI, + input logic Sign_in_DI, + input logic Div_enable_SI, + input logic Sqrt_enable_SI, + input logic Inf_a_SI, + input logic Inf_b_SI, + input logic Zero_a_SI, + input logic Zero_b_SI, + input logic NaN_a_SI, + input logic NaN_b_SI, + input logic SNaN_SI, + input logic [C_RM-1:0] RM_SI, + input logic Full_precision_SI, + input logic FP32_SI, + input logic FP64_SI, + input logic FP16_SI, + input logic FP16ALT_SI, + //Outputs + output logic [C_EXP_FP64+C_MANT_FP64:0] Result_DO, + output logic [4:0] Fflags_SO //{NV,DZ,OF,UF,NX} + ); + + + logic Sign_res_D; + + logic NV_OP_S; + logic Exp_OF_S; + logic Exp_UF_S; + logic Div_Zero_S; + logic In_Exact_S; + + ///////////////////////////////////////////////////////////////////////////// + // Normalization // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64:0] Mant_res_norm_D; + logic [C_EXP_FP64-1:0] Exp_res_norm_D; + + ///////////////////////////////////////////////////////////////////////////// + // Right shift operations for negtive exponents // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_EXP_FP64+1:0] Exp_Max_RS_FP64_D; + logic [C_EXP_FP32+1:0] Exp_Max_RS_FP32_D; + logic [C_EXP_FP16+1:0] Exp_Max_RS_FP16_D; + logic [C_EXP_FP16ALT+1:0] Exp_Max_RS_FP16ALT_D; + // + assign Exp_Max_RS_FP64_D=Exp_in_DI[C_EXP_FP64:0]+C_MANT_FP64+1; // to check exponent after (C_MANT_FP64+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP32_D=Exp_in_DI[C_EXP_FP32:0]+C_MANT_FP32+1; // to check exponent after (C_MANT_FP32+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP16_D=Exp_in_DI[C_EXP_FP16:0]+C_MANT_FP16+1; // to check exponent after (C_MANT_FP16+1)-bit >> when Exp_in_DI is negative + assign Exp_Max_RS_FP16ALT_D=Exp_in_DI[C_EXP_FP16ALT:0]+C_MANT_FP16ALT+1; // to check exponent after (C_MANT_FP16ALT+1)-bit >> when Exp_in_DI is negative + logic [C_EXP_FP64+1:0] Num_RS_D; + assign Num_RS_D=~Exp_in_DI+1+1; // How many right shifts(RS) are needed to generate a denormal number? >> is need only when Exp_in_DI is negative + logic [C_MANT_FP64:0] Mant_RS_D; + logic [C_MANT_FP64+4:0] Mant_forsticky_D; + assign {Mant_RS_D,Mant_forsticky_D} ={Mant_in_DI,{(C_MANT_FP64+1){1'b0}} } >>(Num_RS_D); // +// + logic [C_EXP_FP64+1:0] Exp_subOne_D; + assign Exp_subOne_D = Exp_in_DI -1; + + //normalization + logic [1:0] Mant_lower_D; + logic Mant_sticky_bit_D; + logic [C_MANT_FP64+4:0] Mant_forround_D; + + always_comb + begin + + if(NaN_a_SI) // if a is NaN, return NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = SNaN_SI; + end + + else if(NaN_b_SI) //if b is NaN, return NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = SNaN_SI; + end + + else if(Inf_a_SI) + begin + if(Div_enable_SI&&Inf_b_SI) //Inf/Inf, retrurn NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + else if (Sqrt_enable_SI && Sign_in_DI) begin // catch sqrt(-inf) + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end else begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Div_enable_SI&&Inf_b_SI) + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Zero_a_SI) + begin + if(Div_enable_SI&&Zero_b_SI) + begin + Div_Zero_S=1'b1; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + else + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Div_enable_SI&&(Zero_b_SI)) //div Zero + begin + Div_Zero_S=1'b1; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Sign_in_DI&&Sqrt_enable_SI) //sqrt(-a) + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D={1'b0,C_MANT_NAN_FP64}; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=1'b0; + NV_OP_S = 1'b1; + end + + else if((Exp_in_DI[C_EXP_FP64:0]=='0)) + begin + if(Mant_in_DI!='0) //Exp=0, Mant!=0, it is denormal + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D={1'b0,Mant_in_DI[C_MANT_FP64+4:5]}; + Exp_res_norm_D='0; + Mant_forround_D={Mant_in_DI[4:0],{(C_MANT_FP64){1'b0}} }; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else // Zero + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='0; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if((Exp_in_DI[C_EXP_FP64:0]==C_EXP_ONE_FP64)&&(~Mant_in_DI[C_MANT_FP64+4])) //denormal + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+4:4]; + Exp_res_norm_D='0; + Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if(Exp_in_DI[C_EXP_FP64+1]) //minus //consider format + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b1; + Mant_res_norm_D={Mant_RS_D[C_MANT_FP64:0]}; + Exp_res_norm_D='0; + Mant_forround_D={Mant_forsticky_D[C_MANT_FP64+4:0]}; //?? + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if( (Exp_in_DI[C_EXP_FP32]&&FP32_SI) | (Exp_in_DI[C_EXP_FP64]&&FP64_SI) | (Exp_in_DI[C_EXP_FP16]&&FP16_SI) | (Exp_in_DI[C_EXP_FP16ALT]&&FP16ALT_SI) ) //OF + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D='0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else if( ((Exp_in_DI[C_EXP_FP32-1:0]=='1)&&FP32_SI) | ((Exp_in_DI[C_EXP_FP64-1:0]=='1)&&FP64_SI) | ((Exp_in_DI[C_EXP_FP16-1:0]=='1)&&FP16_SI) | ((Exp_in_DI[C_EXP_FP16ALT-1:0]=='1)&&FP16ALT_SI) )//255 + begin + if(~Mant_in_DI[C_MANT_FP64+4]) // MSB=0 + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3]; + Exp_res_norm_D=Exp_subOne_D; + Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else if(Mant_in_DI!='0) //NaN + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + else //infinity + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b1; + Exp_UF_S=1'b0; + Mant_res_norm_D= '0; + Exp_res_norm_D='1; + Mant_forround_D='0; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + end + + else if(Mant_in_DI[C_MANT_FP64+4]) //normal numbers with 1.XXX + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D= Mant_in_DI[C_MANT_FP64+4:4]; + Exp_res_norm_D=Exp_in_DI[C_EXP_FP64-1:0]; + Mant_forround_D={Mant_in_DI[3:0],{(C_MANT_FP64+1){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + else //normal numbers with 0.1XX + begin + Div_Zero_S=1'b0; + Exp_OF_S=1'b0; + Exp_UF_S=1'b0; + Mant_res_norm_D=Mant_in_DI[C_MANT_FP64+3:3]; + Exp_res_norm_D=Exp_subOne_D; + Mant_forround_D={Mant_in_DI[2:0],{(C_MANT_FP64+2){1'b0}}}; + Sign_res_D=Sign_in_DI; + NV_OP_S = 1'b0; + end + + end + + ///////////////////////////////////////////////////////////////////////////// + // Rounding enable only for full precision (Full_precision_SI==1'b1) // + ///////////////////////////////////////////////////////////////////////////// + + logic [C_MANT_FP64:0] Mant_upper_D; + logic [C_MANT_FP64+1:0] Mant_upperRounded_D; + logic Mant_roundUp_S; + logic Mant_rounded_S; + + always_comb //determine which bits for Mant_lower_D and Mant_sticky_bit_D + begin + if(FP32_SI) + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP32], {(C_MANT_FP64-C_MANT_FP32){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-1:C_MANT_FP64-C_MANT_FP32-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP32-3:0]; + end + else if(FP64_SI) + begin + Mant_upper_D = Mant_res_norm_D[C_MANT_FP64:0]; + Mant_lower_D = Mant_forround_D[C_MANT_FP64+4:C_MANT_FP64+3]; + Mant_sticky_bit_D = | Mant_forround_D[C_MANT_FP64+3:0]; + end + else if(FP16_SI) + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16], {(C_MANT_FP64-C_MANT_FP16){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-1:C_MANT_FP64-C_MANT_FP16-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16-3:30]; + end + else //FP16ALT + begin + Mant_upper_D = {Mant_res_norm_D[C_MANT_FP64:C_MANT_FP64-C_MANT_FP16ALT], {(C_MANT_FP64-C_MANT_FP16ALT){1'b0}} }; + Mant_lower_D = Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-1:C_MANT_FP64-C_MANT_FP16ALT-2]; + Mant_sticky_bit_D = | Mant_res_norm_D[C_MANT_FP64-C_MANT_FP16ALT-3:30]; + end + end + + assign Mant_rounded_S = (|(Mant_lower_D))| Mant_sticky_bit_D; + + + + + always_comb //determine whether to round up or not + begin + Mant_roundUp_S = 1'b0; + case (RM_SI) + C_RM_NEAREST : + Mant_roundUp_S = Mant_lower_D[1] && ((Mant_lower_D[0] | Mant_sticky_bit_D )| ( (FP32_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP32]) | (FP64_SI&&Mant_upper_D[0]) | (FP16_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16]) | (FP16ALT_SI&&Mant_upper_D[C_MANT_FP64-C_MANT_FP16ALT]) ) ); + C_RM_TRUNC : + Mant_roundUp_S = 0; + C_RM_PLUSINF : + Mant_roundUp_S = Mant_rounded_S & ~Sign_in_DI; + C_RM_MINUSINF: + Mant_roundUp_S = Mant_rounded_S & Sign_in_DI; + default : + Mant_roundUp_S = 0; + endcase // case (RM_DI) + end // always_comb begin + + logic Mant_renorm_S; + logic [C_MANT_FP64:0] Mant_roundUp_Vector_S; // for all the formats + + assign Mant_roundUp_Vector_S={7'h0,(FP16ALT_SI&&Mant_roundUp_S),2'h0,(FP16_SI&&Mant_roundUp_S),12'h0,(FP32_SI&&Mant_roundUp_S),28'h0,(FP64_SI&&Mant_roundUp_S)}; + + + assign Mant_upperRounded_D = Mant_upper_D + Mant_roundUp_Vector_S; + assign Mant_renorm_S = Mant_upperRounded_D[C_MANT_FP64+1]; + + ///////////////////////////////////////////////////////////////////////////// + // Renormalization for Rounding // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64-1:0] Mant_res_round_D; + logic [C_EXP_FP64-1:0] Exp_res_round_D; + + + assign Mant_res_round_D = (Mant_renorm_S)?Mant_upperRounded_D[C_MANT_FP64:1]:Mant_upperRounded_D[C_MANT_FP64-1:0]; // including the process of the hidden bit + assign Exp_res_round_D = Exp_res_norm_D+Mant_renorm_S; + + ///////////////////////////////////////////////////////////////////////////// + // Output Assignments // + ///////////////////////////////////////////////////////////////////////////// + logic [C_MANT_FP64-1:0] Mant_before_format_ctl_D; + logic [C_EXP_FP64-1:0] Exp_before_format_ctl_D; + assign Mant_before_format_ctl_D = Full_precision_SI ? Mant_res_round_D : Mant_res_norm_D; + assign Exp_before_format_ctl_D = Full_precision_SI ? Exp_res_round_D : Exp_res_norm_D; + + always_comb //NaN Boxing + begin // + if(FP32_SI) + begin + Result_DO ={32'hffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP32-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP32]}; + end + else if(FP64_SI) + begin + Result_DO ={Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP64-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:0]}; + end + else if(FP16_SI) + begin + Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16]}; + end + else + begin + Result_DO ={48'hffff_ffff_ffff,Sign_res_D,Exp_before_format_ctl_D[C_EXP_FP16ALT-1:0],Mant_before_format_ctl_D[C_MANT_FP64-1:C_MANT_FP64-C_MANT_FP16ALT]}; + end + end + +assign In_Exact_S = (~Full_precision_SI) | Mant_rounded_S; +assign Fflags_SO = {NV_OP_S,Div_Zero_S,Exp_OF_S,Exp_UF_S,In_Exact_S}; //{NV,DZ,OF,UF,NX} + +endmodule // norm_div_sqrt_mvp diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv new file mode 100644 index 00000000..62bd147f --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv @@ -0,0 +1,104 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 10/04/2018 // +// Design Name: FPU // +// Module Name: nrbd_nrsc_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: non restroring binary divisior/ square root // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module nrbd_nrsc_mvp + + (//Input + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + input logic Start_SI, + input logic Kill_SI, + input logic Special_case_SBI, + input logic Special_case_dly_SBI, + input logic [C_PC-1:0] Precision_ctl_SI, + input logic [1:0] Format_sel_SI, + input logic [C_MANT_FP64:0] Mant_a_DI, + input logic [C_MANT_FP64:0] Mant_b_DI, + input logic [C_EXP_FP64:0] Exp_a_DI, + input logic [C_EXP_FP64:0] Exp_b_DI, + //output + output logic Div_enable_SO, + output logic Sqrt_enable_SO, + + output logic Full_precision_SO, + output logic FP32_SO, + output logic FP64_SO, + output logic FP16_SO, + output logic FP16ALT_SO, + output logic Ready_SO, + output logic Done_SO, + output logic [C_MANT_FP64+4:0] Mant_z_DO, + output logic [C_EXP_FP64+1:0] Exp_z_DO + ); + + + logic Div_start_dly_S,Sqrt_start_dly_S; + + +control_mvp control_U0 +( .Clk_CI (Clk_CI ), + .Rst_RBI (Rst_RBI ), + .Div_start_SI (Div_start_SI ), + .Sqrt_start_SI (Sqrt_start_SI ), + .Start_SI (Start_SI ), + .Kill_SI (Kill_SI ), + .Special_case_SBI (Special_case_SBI ), + .Special_case_dly_SBI (Special_case_dly_SBI ), + .Precision_ctl_SI (Precision_ctl_SI ), + .Format_sel_SI (Format_sel_SI ), + .Numerator_DI (Mant_a_DI ), + .Exp_num_DI (Exp_a_DI ), + .Denominator_DI (Mant_b_DI ), + .Exp_den_DI (Exp_b_DI ), + .Div_start_dly_SO (Div_start_dly_S ), + .Sqrt_start_dly_SO (Sqrt_start_dly_S ), + .Div_enable_SO (Div_enable_SO ), + .Sqrt_enable_SO (Sqrt_enable_SO ), + .Full_precision_SO (Full_precision_SO ), + .FP32_SO (FP32_SO ), + .FP64_SO (FP64_SO ), + .FP16_SO (FP16_SO ), + .FP16ALT_SO (FP16ALT_SO ), + .Ready_SO (Ready_SO ), + .Done_SO (Done_SO ), + .Mant_result_prenorm_DO (Mant_z_DO ), + .Exp_result_prenorm_DO (Exp_z_DO ) +); + + + +endmodule diff --git a/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv new file mode 100644 index 00000000..9e0d25f3 --- /dev/null +++ b/test/type_param/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv @@ -0,0 +1,425 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the “License”); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +//////////////////////////////////////////////////////////////////////////////// +// Company: IIS @ ETHZ - Federal Institute of Technology // +// // +// Engineers: Lei Li //lile@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// // +// // +// // +// Create Date: 01/03/2018 // +// Design Name: FPU // +// Module Name: preprocess_mvp.sv // +// Project Name: Private FPU // +// Language: SystemVerilog // +// // +// Description: decode and data preparation // +// // +// Revision Date: 12/04/2018 // +// Lei Li // +// To address some requirements by Stefan and add low power // +// control for special cases // +// // +// // +//////////////////////////////////////////////////////////////////////////////// + +import defs_div_sqrt_mvp::*; + +module preprocess_mvp + ( + input logic Clk_CI, + input logic Rst_RBI, + input logic Div_start_SI, + input logic Sqrt_start_SI, + input logic Ready_SI, + //Input Operands + input logic [C_OP_FP64-1:0] Operand_a_DI, + input logic [C_OP_FP64-1:0] Operand_b_DI, + input logic [C_RM-1:0] RM_SI, //Rounding Mode + input logic [C_FS-1:0] Format_sel_SI, // Format Selection + + // to control + output logic Start_SO, + output logic [C_EXP_FP64:0] Exp_a_DO_norm, + output logic [C_EXP_FP64:0] Exp_b_DO_norm, + output logic [C_MANT_FP64:0] Mant_a_DO_norm, + output logic [C_MANT_FP64:0] Mant_b_DO_norm, + + output logic [C_RM-1:0] RM_dly_SO, + + output logic Sign_z_DO, + output logic Inf_a_SO, + output logic Inf_b_SO, + output logic Zero_a_SO, + output logic Zero_b_SO, + output logic NaN_a_SO, + output logic NaN_b_SO, + output logic SNaN_SO, + output logic Special_case_SBO, + output logic Special_case_dly_SBO + ); + + //Hidden Bits + logic Hb_a_D; + logic Hb_b_D; + + logic [C_EXP_FP64-1:0] Exp_a_D; + logic [C_EXP_FP64-1:0] Exp_b_D; + logic [C_MANT_FP64-1:0] Mant_a_NonH_D; + logic [C_MANT_FP64-1:0] Mant_b_NonH_D; + logic [C_MANT_FP64:0] Mant_a_D; + logic [C_MANT_FP64:0] Mant_b_D; + + ///////////////////////////////////////////////////////////////////////////// + // Disassemble operands + ///////////////////////////////////////////////////////////////////////////// + logic Sign_a_D,Sign_b_D; + logic Start_S; + + always_comb + begin + case(Format_sel_SI) + 2'b00: + begin + Sign_a_D = Operand_a_DI[C_OP_FP32-1]; + Sign_b_D = Operand_b_DI[C_OP_FP32-1]; + Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP32-2:C_MANT_FP32]}; + Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP32-2:C_MANT_FP32]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP32-1:0],29'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP32-1:0],29'h0}; + end + 2'b01: + begin + Sign_a_D = Operand_a_DI[C_OP_FP64-1]; + Sign_b_D = Operand_b_DI[C_OP_FP64-1]; + Exp_a_D = Operand_a_DI[C_OP_FP64-2:C_MANT_FP64]; + Exp_b_D = Operand_b_DI[C_OP_FP64-2:C_MANT_FP64]; + Mant_a_NonH_D = Operand_a_DI[C_MANT_FP64-1:0]; + Mant_b_NonH_D = Operand_b_DI[C_MANT_FP64-1:0]; + end + 2'b10: + begin + Sign_a_D = Operand_a_DI[C_OP_FP16-1]; + Sign_b_D = Operand_b_DI[C_OP_FP16-1]; + Exp_a_D = {6'h00, Operand_a_DI[C_OP_FP16-2:C_MANT_FP16]}; + Exp_b_D = {6'h00, Operand_b_DI[C_OP_FP16-2:C_MANT_FP16]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16-1:0],42'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16-1:0],42'h0}; + end + 2'b11: + begin + Sign_a_D = Operand_a_DI[C_OP_FP16ALT-1]; + Sign_b_D = Operand_b_DI[C_OP_FP16ALT-1]; + Exp_a_D = {3'h0, Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]}; + Exp_b_D = {3'h0, Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT]}; + Mant_a_NonH_D = {Operand_a_DI[C_MANT_FP16ALT-1:0],45'h0}; + Mant_b_NonH_D = {Operand_b_DI[C_MANT_FP16ALT-1:0],45'h0}; + end + endcase + end + + + assign Mant_a_D = {Hb_a_D,Mant_a_NonH_D}; + assign Mant_b_D = {Hb_b_D,Mant_b_NonH_D}; + + assign Hb_a_D = | Exp_a_D; // hidden bit + assign Hb_b_D = | Exp_b_D; // hidden bit + + assign Start_S= Div_start_SI | Sqrt_start_SI; + + + + ///////////////////////////////////////////////////////////////////////////// + // preliminary checks for infinite/zero/NaN operands // + ///////////////////////////////////////////////////////////////////////////// + + logic Mant_a_prenorm_zero_S; + logic Mant_b_prenorm_zero_S; + + logic Exp_a_prenorm_zero_S; + logic Exp_b_prenorm_zero_S; + assign Exp_a_prenorm_zero_S = ~Hb_a_D; + assign Exp_b_prenorm_zero_S = ~Hb_b_D; + + logic Exp_a_prenorm_Inf_NaN_S; + logic Exp_b_prenorm_Inf_NaN_S; + + logic Mant_a_prenorm_QNaN_S; + logic Mant_a_prenorm_SNaN_S; + logic Mant_b_prenorm_QNaN_S; + logic Mant_b_prenorm_SNaN_S; + + assign Mant_a_prenorm_QNaN_S=Mant_a_NonH_D[C_MANT_FP64-1]&&(~(|Mant_a_NonH_D[C_MANT_FP64-2:0])); + assign Mant_a_prenorm_SNaN_S=(~Mant_a_NonH_D[C_MANT_FP64-1])&&((|Mant_a_NonH_D[C_MANT_FP64-2:0])); + assign Mant_b_prenorm_QNaN_S=Mant_b_NonH_D[C_MANT_FP64-1]&&(~(|Mant_b_NonH_D[C_MANT_FP64-2:0])); + assign Mant_b_prenorm_SNaN_S=(~Mant_b_NonH_D[C_MANT_FP64-1])&&((|Mant_b_NonH_D[C_MANT_FP64-2:0])); + + always_comb + begin + case(Format_sel_SI) + 2'b00: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP32-1:0] == C_MANT_ZERO_FP32); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP32-2:C_MANT_FP32] == C_EXP_INF_FP32); + end + 2'b01: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP64-1:0] == C_MANT_ZERO_FP64); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP64-2:C_MANT_FP64] == C_EXP_INF_FP64); + end + 2'b10: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16-1:0] == C_MANT_ZERO_FP16); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16-2:C_MANT_FP16] == C_EXP_INF_FP16); + end + 2'b11: + begin + Mant_a_prenorm_zero_S=(Operand_a_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT); + Mant_b_prenorm_zero_S=(Operand_b_DI[C_MANT_FP16ALT-1:0] == C_MANT_ZERO_FP16ALT); + Exp_a_prenorm_Inf_NaN_S=(Operand_a_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT); + Exp_b_prenorm_Inf_NaN_S=(Operand_b_DI[C_OP_FP16ALT-2:C_MANT_FP16ALT] == C_EXP_INF_FP16ALT); + end + endcase + end + + + + + logic Zero_a_SN,Zero_a_SP; + logic Zero_b_SN,Zero_b_SP; + logic Inf_a_SN,Inf_a_SP; + logic Inf_b_SN,Inf_b_SP; + logic NaN_a_SN,NaN_a_SP; + logic NaN_b_SN,NaN_b_SP; + logic SNaN_SN,SNaN_SP; + + assign Zero_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_zero_S&&Mant_a_prenorm_zero_S):Zero_a_SP; + assign Zero_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_zero_S&&Mant_b_prenorm_zero_S):Zero_b_SP; + assign Inf_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&Mant_a_prenorm_zero_S):Inf_a_SP; + assign Inf_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&Mant_b_prenorm_zero_S):Inf_b_SP; + assign NaN_a_SN = (Start_S&&Ready_SI)?(Exp_a_prenorm_Inf_NaN_S&&(~Mant_a_prenorm_zero_S)):NaN_a_SP; + assign NaN_b_SN = (Start_S&&Ready_SI)?(Exp_b_prenorm_Inf_NaN_S&&(~Mant_b_prenorm_zero_S)):NaN_b_SP; + assign SNaN_SN = (Start_S&&Ready_SI) ? ((Mant_a_prenorm_SNaN_S&&NaN_a_SN) | (Mant_b_prenorm_SNaN_S&&NaN_b_SN)) : SNaN_SP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Zero_a_SP <='0; + Zero_b_SP <='0; + Inf_a_SP <='0; + Inf_b_SP <='0; + NaN_a_SP <='0; + NaN_b_SP <='0; + SNaN_SP <= '0; + end + else + begin + Inf_a_SP <=Inf_a_SN; + Inf_b_SP <=Inf_b_SN; + Zero_a_SP <=Zero_a_SN; + Zero_b_SP <=Zero_b_SN; + NaN_a_SP <=NaN_a_SN; + NaN_b_SP <=NaN_b_SN; + SNaN_SP <= SNaN_SN; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Low power control + ///////////////////////////////////////////////////////////////////////////// + + assign Special_case_SBO=(~{(Div_start_SI)?(Zero_a_SN | Zero_b_SN | Inf_a_SN | Inf_b_SN | NaN_a_SN | NaN_b_SN): (Zero_a_SN | Inf_a_SN | NaN_a_SN | Sign_a_D) })&&(Start_S&&Ready_SI); + + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Special_case_dly_SBO <= '0; + end + else if((Start_S&&Ready_SI)) + begin + Special_case_dly_SBO <= Special_case_SBO; + end + else if(Special_case_dly_SBO) + begin + Special_case_dly_SBO <= 1'b1; + end + else + begin + Special_case_dly_SBO <= '0; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Delay sign for normalization and round // + ///////////////////////////////////////////////////////////////////////////// + + logic Sign_z_DN; + logic Sign_z_DP; + + always_comb + begin + if(Div_start_SI&&Ready_SI) + Sign_z_DN = Sign_a_D ^ Sign_b_D; + else if(Sqrt_start_SI&&Ready_SI) + Sign_z_DN = Sign_a_D; + else + Sign_z_DN = Sign_z_DP; + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Sign_z_DP <= '0; + end + else + begin + Sign_z_DP <= Sign_z_DN; + end + end + + logic [C_RM-1:0] RM_DN; + logic [C_RM-1:0] RM_DP; + + always_comb + begin + if(Start_S&&Ready_SI) + RM_DN = RM_SI; + else + RM_DN = RM_DP; + end + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + RM_DP <= '0; + end + else + begin + RM_DP <= RM_DN; + end + end + assign RM_dly_SO = RM_DP; + + logic [5:0] Mant_leadingOne_a, Mant_leadingOne_b; + logic Mant_zero_S_a,Mant_zero_S_b; + + lzc #( + .WIDTH ( C_MANT_FP64+1 ), + .MODE ( 1 ) + ) LOD_Ua ( + .in_i ( Mant_a_D ), + .cnt_o ( Mant_leadingOne_a ), + .empty_o ( Mant_zero_S_a ) + ); + + logic [C_MANT_FP64:0] Mant_a_norm_DN,Mant_a_norm_DP; + + assign Mant_a_norm_DN = ((Start_S&&Ready_SI))?(Mant_a_D<<(Mant_leadingOne_a)):Mant_a_norm_DP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Mant_a_norm_DP <= '0; + end + else + begin + Mant_a_norm_DP<=Mant_a_norm_DN; + end + end + + logic [C_EXP_FP64:0] Exp_a_norm_DN,Exp_a_norm_DP; + assign Exp_a_norm_DN = ((Start_S&&Ready_SI))?(Exp_a_D-Mant_leadingOne_a+(|Mant_leadingOne_a)):Exp_a_norm_DP; //Covering the process of denormal numbers + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_a_norm_DP <= '0; + end + else + begin + Exp_a_norm_DP<=Exp_a_norm_DN; + end + end + + lzc #( + .WIDTH ( C_MANT_FP64+1 ), + .MODE ( 1 ) + ) LOD_Ub ( + .in_i ( Mant_b_D ), + .cnt_o ( Mant_leadingOne_b ), + .empty_o ( Mant_zero_S_b ) + ); + + + logic [C_MANT_FP64:0] Mant_b_norm_DN,Mant_b_norm_DP; + + assign Mant_b_norm_DN = ((Start_S&&Ready_SI))?(Mant_b_D<<(Mant_leadingOne_b)):Mant_b_norm_DP; + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Mant_b_norm_DP <= '0; + end + else + begin + Mant_b_norm_DP<=Mant_b_norm_DN; + end + end + + logic [C_EXP_FP64:0] Exp_b_norm_DN,Exp_b_norm_DP; + assign Exp_b_norm_DN = ((Start_S&&Ready_SI))?(Exp_b_D-Mant_leadingOne_b+(|Mant_leadingOne_b)):Exp_b_norm_DP; //Covering the process of denormal numbers + + always_ff @(posedge Clk_CI, negedge Rst_RBI) + begin + if(~Rst_RBI) + begin + Exp_b_norm_DP <= '0; + end + else + begin + Exp_b_norm_DP<=Exp_b_norm_DN; + end + end + + ///////////////////////////////////////////////////////////////////////////// + // Output assignments // + ///////////////////////////////////////////////////////////////////////////// + + assign Start_SO=Start_S; + assign Exp_a_DO_norm=Exp_a_norm_DP; + assign Exp_b_DO_norm=Exp_b_norm_DP; + assign Mant_a_DO_norm=Mant_a_norm_DP; + assign Mant_b_DO_norm=Mant_b_norm_DP; + assign Sign_z_DO=Sign_z_DP; + assign Inf_a_SO=Inf_a_SP; + assign Inf_b_SO=Inf_b_SP; + assign Zero_a_SO=Zero_a_SP; + assign Zero_b_SO=Zero_b_SP; + assign NaN_a_SO=NaN_a_SP; + assign NaN_b_SO=NaN_b_SP; + assign SNaN_SO=SNaN_SP; + +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh b/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh new file mode 100644 index 00000000..14bb1944 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/include/axi/assign.svh @@ -0,0 +1,541 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Wolfgang Roenninger + +// Macros to assign AXI Interfaces and Structs + +`ifndef AXI_ASSIGN_SVH_ +`define AXI_ASSIGN_SVH_ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Internal implementation for assigning one AXI struct or interface to another struct or interface. +// The path to the signals on each side is defined by the `__sep*` arguments. The `__opt_as` +// argument allows to use this standalone (with `__opt_as = assign`) or in assignments inside +// processes (with `__opt_as` void). +`define __AXI_TO_AW(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``atop = __rhs``__rhs_sep``atop; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_W(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``strb = __rhs``__rhs_sep``strb; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_B(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_AR(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_REQ(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + `__AXI_TO_AW(__opt_as, __lhs.aw, __lhs_sep, __rhs.aw, __rhs_sep) \ + __opt_as __lhs.aw_valid = __rhs.aw_valid; \ + `__AXI_TO_W(__opt_as, __lhs.w, __lhs_sep, __rhs.w, __rhs_sep) \ + __opt_as __lhs.w_valid = __rhs.w_valid; \ + __opt_as __lhs.b_ready = __rhs.b_ready; \ + `__AXI_TO_AR(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ + __opt_as __lhs.ar_valid = __rhs.ar_valid; \ + __opt_as __lhs.r_ready = __rhs.r_ready; +`define __AXI_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs.aw_ready = __rhs.aw_ready; \ + __opt_as __lhs.ar_ready = __rhs.ar_ready; \ + __opt_as __lhs.w_ready = __rhs.w_ready; \ + __opt_as __lhs.b_valid = __rhs.b_valid; \ + `__AXI_TO_B(__opt_as, __lhs.b, __lhs_sep, __rhs.b, __rhs_sep) \ + __opt_as __lhs.r_valid = __rhs.r_valid; \ + `__AXI_TO_R(__opt_as, __lhs.r, __lhs_sep, __rhs.r, __rhs_sep) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning one AXI4+ATOP interface to another, as if you would do `assign slv = mst;` +// +// The channel assignments `AXI_ASSIGN_XX(dst, src)` assign all payload and the valid signal of the +// `XX` channel from the `src` to the `dst` interface and they assign the ready signal from the +// `src` to the `dst` interface. +// The interface assignment `AXI_ASSIGN(dst, src)` assigns all channels including handshakes as if +// `src` was the master of `dst`. +// +// Usage Example: +// `AXI_ASSIGN(slv, mst) +// `AXI_ASSIGN_AW(dst, src) +// `AXI_ASSIGN_R(dst, src) +`define AXI_ASSIGN_AW(dst, src) \ + `__AXI_TO_AW(assign, dst.aw, _, src.aw, _) \ + assign dst.aw_valid = src.aw_valid; \ + assign src.aw_ready = dst.aw_ready; +`define AXI_ASSIGN_W(dst, src) \ + `__AXI_TO_W(assign, dst.w, _, src.w, _) \ + assign dst.w_valid = src.w_valid; \ + assign src.w_ready = dst.w_ready; +`define AXI_ASSIGN_B(dst, src) \ + `__AXI_TO_B(assign, dst.b, _, src.b, _) \ + assign dst.b_valid = src.b_valid; \ + assign src.b_ready = dst.b_ready; +`define AXI_ASSIGN_AR(dst, src) \ + `__AXI_TO_AR(assign, dst.ar, _, src.ar, _) \ + assign dst.ar_valid = src.ar_valid; \ + assign src.ar_ready = dst.ar_ready; +`define AXI_ASSIGN_R(dst, src) \ + `__AXI_TO_R(assign, dst.r, _, src.r, _) \ + assign dst.r_valid = src.r_valid; \ + assign src.r_ready = dst.r_ready; +`define AXI_ASSIGN(slv, mst) \ + `AXI_ASSIGN_AW(slv, mst) \ + `AXI_ASSIGN_W(slv, mst) \ + `AXI_ASSIGN_B(mst, slv) \ + `AXI_ASSIGN_AR(slv, mst) \ + `AXI_ASSIGN_R(mst, slv) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning a AXI4+ATOP interface to a monitor modport, as if you would do `assign mon = axi_if;` +// +// The channel assignment `AXI_ASSIGN_MONITOR(mon_dv, axi_if)` assigns all signals from `axi_if` +// to the `mon_dv` interface. +// +// Usage Example: +// `AXI_ASSIGN_MONITOR(mon_dv, axi_if) +`define AXI_ASSIGN_MONITOR(mon_dv, axi_if) \ + `__AXI_TO_AW(assign, mon_dv.aw, _, axi_if.aw, _) \ + assign mon_dv.aw_valid = axi_if.aw_valid; \ + assign mon_dv.aw_ready = axi_if.aw_ready; \ + `__AXI_TO_W(assign, mon_dv.w, _, axi_if.w, _) \ + assign mon_dv.w_valid = axi_if.w_valid; \ + assign mon_dv.w_ready = axi_if.w_ready; \ + `__AXI_TO_B(assign, mon_dv.b, _, axi_if.b, _) \ + assign mon_dv.b_valid = axi_if.b_valid; \ + assign mon_dv.b_ready = axi_if.b_ready; \ + `__AXI_TO_AR(assign, mon_dv.ar, _, axi_if.ar, _) \ + assign mon_dv.ar_valid = axi_if.ar_valid; \ + assign mon_dv.ar_ready = axi_if.ar_ready; \ + `__AXI_TO_R(assign, mon_dv.r, _, axi_if.r, _) \ + assign mon_dv.r_valid = axi_if.r_valid; \ + assign mon_dv.r_ready = axi_if.r_ready; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting an interface from channel or request/response structs inside a process. +// +// The channel macros `AXI_SET_FROM_XX(axi_if, xx_struct)` set the payload signals of the `axi_if` +// interface from the signals in `xx_struct`. They do not set the handshake signals. +// The request macro `AXI_SET_FROM_REQ(axi_if, req_struct)` sets all request channels (AW, W, AR) +// and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the `axi_if` +// interface from the signals in `req_struct`. +// The response macro `AXI_SET_FROM_RESP(axi_if, resp_struct)` sets both response channels (B and R) +// and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the `axi_if` +// interface from the signals in `resp_struct`. +// +// Usage Example: +// always_comb begin +// `AXI_SET_FROM_REQ(my_if, my_req_struct) +// end +`define AXI_SET_FROM_AW(axi_if, aw_struct) `__AXI_TO_AW(, axi_if.aw, _, aw_struct, .) +`define AXI_SET_FROM_W(axi_if, w_struct) `__AXI_TO_W(, axi_if.w, _, w_struct, .) +`define AXI_SET_FROM_B(axi_if, b_struct) `__AXI_TO_B(, axi_if.b, _, b_struct, .) +`define AXI_SET_FROM_AR(axi_if, ar_struct) `__AXI_TO_AR(, axi_if.ar, _, ar_struct, .) +`define AXI_SET_FROM_R(axi_if, r_struct) `__AXI_TO_R(, axi_if.r, _, r_struct, .) +`define AXI_SET_FROM_REQ(axi_if, req_struct) `__AXI_TO_REQ(, axi_if, _, req_struct, .) +`define AXI_SET_FROM_RESP(axi_if, resp_struct) `__AXI_TO_RESP(, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning an interface from channel or request/response structs outside a process. +// +// The channel macros `AXI_ASSIGN_FROM_XX(axi_if, xx_struct)` assign the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not assign the handshake signals. +// The request macro `AXI_ASSIGN_FROM_REQ(axi_if, req_struct)` assigns all request channels (AW, W, +// AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_ASSIGN_FROM_RESP(axi_if, resp_struct)` assigns both response channels (B +// and R) and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the +// `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// `AXI_ASSIGN_FROM_REQ(my_if, my_req_struct) +`define AXI_ASSIGN_FROM_AW(axi_if, aw_struct) `__AXI_TO_AW(assign, axi_if.aw, _, aw_struct, .) +`define AXI_ASSIGN_FROM_W(axi_if, w_struct) `__AXI_TO_W(assign, axi_if.w, _, w_struct, .) +`define AXI_ASSIGN_FROM_B(axi_if, b_struct) `__AXI_TO_B(assign, axi_if.b, _, b_struct, .) +`define AXI_ASSIGN_FROM_AR(axi_if, ar_struct) `__AXI_TO_AR(assign, axi_if.ar, _, ar_struct, .) +`define AXI_ASSIGN_FROM_R(axi_if, r_struct) `__AXI_TO_R(assign, axi_if.r, _, r_struct, .) +`define AXI_ASSIGN_FROM_REQ(axi_if, req_struct) `__AXI_TO_REQ(assign, axi_if, _, req_struct, .) +`define AXI_ASSIGN_FROM_RESP(axi_if, resp_struct) `__AXI_TO_RESP(assign, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from an interface inside a process. +// +// The channel macros `AXI_SET_TO_XX(xx_struct, axi_if)` set the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not set the handshake +// signals. +// The request macro `AXI_SET_TO_REQ(axi_if, req_struct)` sets all signals of `req_struct` (i.e., +// request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR valid and +// B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_SET_TO_RESP(axi_if, resp_struct)` sets all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// always_comb begin +// `AXI_SET_TO_REQ(my_req_struct, my_if) +// end +`define AXI_SET_TO_AW(aw_struct, axi_if) `__AXI_TO_AW(, aw_struct, ., axi_if.aw, _) +`define AXI_SET_TO_W(w_struct, axi_if) `__AXI_TO_W(, w_struct, ., axi_if.w, _) +`define AXI_SET_TO_B(b_struct, axi_if) `__AXI_TO_B(, b_struct, ., axi_if.b, _) +`define AXI_SET_TO_AR(ar_struct, axi_if) `__AXI_TO_AR(, ar_struct, ., axi_if.ar, _) +`define AXI_SET_TO_R(r_struct, axi_if) `__AXI_TO_R(, r_struct, ., axi_if.r, _) +`define AXI_SET_TO_REQ(req_struct, axi_if) `__AXI_TO_REQ(, req_struct, ., axi_if, _) +`define AXI_SET_TO_RESP(resp_struct, axi_if) `__AXI_TO_RESP(, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from an interface outside a process. +// +// The channel macros `AXI_ASSIGN_TO_XX(xx_struct, axi_if)` assign the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not assign the handshake +// signals. +// The request macro `AXI_ASSIGN_TO_REQ(axi_if, req_struct)` assigns all signals of `req_struct` +// (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR +// valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_ASSIGN_TO_RESP(axi_if, resp_struct)` assigns all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// `AXI_ASSIGN_TO_REQ(my_req_struct, my_if) +`define AXI_ASSIGN_TO_AW(aw_struct, axi_if) `__AXI_TO_AW(assign, aw_struct, ., axi_if.aw, _) +`define AXI_ASSIGN_TO_W(w_struct, axi_if) `__AXI_TO_W(assign, w_struct, ., axi_if.w, _) +`define AXI_ASSIGN_TO_B(b_struct, axi_if) `__AXI_TO_B(assign, b_struct, ., axi_if.b, _) +`define AXI_ASSIGN_TO_AR(ar_struct, axi_if) `__AXI_TO_AR(assign, ar_struct, ., axi_if.ar, _) +`define AXI_ASSIGN_TO_R(r_struct, axi_if) `__AXI_TO_R(assign, r_struct, ., axi_if.r, _) +`define AXI_ASSIGN_TO_REQ(req_struct, axi_if) `__AXI_TO_REQ(assign, req_struct, ., axi_if, _) +`define AXI_ASSIGN_TO_RESP(resp_struct, axi_if) `__AXI_TO_RESP(assign, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from another struct inside a process. +// +// The channel macros `AXI_SET_XX_STRUCT(lhs, rhs)` set the fields of the `lhs` channel struct to +// the fields of the `rhs` channel struct. They do not set the handshake signals, which are not +// part of channel structs. +// The request macro `AXI_SET_REQ_STRUCT(lhs, rhs)` sets all fields of the `lhs` request struct to +// the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) payload +// and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_SET_RESP_STRUCT(lhs, rhs)` sets all fields of the `lhs` response struct +// to the fields of the `rhs` response struct. This includes all response channel (B and R) payload +// and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// always_comb begin +// `AXI_SET_REQ_STRUCT(my_req_struct, another_req_struct) +// end +`define AXI_SET_AW_STRUCT(lhs, rhs) `__AXI_TO_AW(, lhs, ., rhs, .) +`define AXI_SET_W_STRUCT(lhs, rhs) `__AXI_TO_W(, lhs, ., rhs, .) +`define AXI_SET_B_STRUCT(lhs, rhs) `__AXI_TO_B(, lhs, ., rhs, .) +`define AXI_SET_AR_STRUCT(lhs, rhs) `__AXI_TO_AR(, lhs, ., rhs, .) +`define AXI_SET_R_STRUCT(lhs, rhs) `__AXI_TO_R(, lhs, ., rhs, .) +`define AXI_SET_REQ_STRUCT(lhs, rhs) `__AXI_TO_REQ(, lhs, ., rhs, .) +`define AXI_SET_RESP_STRUCT(lhs, rhs) `__AXI_TO_RESP(, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from another struct outside a process. +// +// The channel macros `AXI_ASSIGN_XX_STRUCT(lhs, rhs)` assign the fields of the `lhs` channel struct +// to the fields of the `rhs` channel struct. They do not assign the handshake signals, which are +// not part of the channel structs. +// The request macro `AXI_ASSIGN_REQ_STRUCT(lhs, rhs)` assigns all fields of the `lhs` request +// struct to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) +// payload and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_ASSIGN_RESP_STRUCT(lhs, rhs)` assigns all fields of the `lhs` response +// struct to the fields of the `rhs` response struct. This includes all response channel (B and R) +// payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// `AXI_ASSIGN_REQ_STRUCT(my_req_struct, another_req_struct) +`define AXI_ASSIGN_AW_STRUCT(lhs, rhs) `__AXI_TO_AW(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_W_STRUCT(lhs, rhs) `__AXI_TO_W(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_B_STRUCT(lhs, rhs) `__AXI_TO_B(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_AR_STRUCT(lhs, rhs) `__AXI_TO_AR(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_R_STRUCT(lhs, rhs) `__AXI_TO_R(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_REQ_STRUCT(lhs, rhs) `__AXI_TO_REQ(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_RESP_STRUCT(lhs, rhs) `__AXI_TO_RESP(assign, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Internal implementation for assigning one Lite structs or interface to another struct or +// interface. The path to the signals on each side is defined by the `__sep*` arguments. The +// `__opt_as` argument allows to use this standalne (with `__opt_as = assign`) or in assignments +// inside processes (with `__opt_as` void). +`define __AXI_LITE_TO_AX(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; +`define __AXI_LITE_TO_W(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``strb = __rhs``__rhs_sep``strb; +`define __AXI_LITE_TO_B(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; +`define __AXI_LITE_TO_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; +`define __AXI_LITE_TO_REQ(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + `__AXI_LITE_TO_AX(__opt_as, __lhs.aw, __lhs_sep, __rhs.aw, __rhs_sep) \ + __opt_as __lhs.aw_valid = __rhs.aw_valid; \ + `__AXI_LITE_TO_W(__opt_as, __lhs.w, __lhs_sep, __rhs.w, __rhs_sep) \ + __opt_as __lhs.w_valid = __rhs.w_valid; \ + __opt_as __lhs.b_ready = __rhs.b_ready; \ + `__AXI_LITE_TO_AX(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ + __opt_as __lhs.ar_valid = __rhs.ar_valid; \ + __opt_as __lhs.r_ready = __rhs.r_ready; +`define __AXI_LITE_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs.aw_ready = __rhs.aw_ready; \ + __opt_as __lhs.ar_ready = __rhs.ar_ready; \ + __opt_as __lhs.w_ready = __rhs.w_ready; \ + __opt_as __lhs.b_valid = __rhs.b_valid; \ + `__AXI_LITE_TO_B(__opt_as, __lhs.b, __lhs_sep, __rhs.b, __rhs_sep) \ + __opt_as __lhs.r_valid = __rhs.r_valid; \ + `__AXI_LITE_TO_R(__opt_as, __lhs.r, __lhs_sep, __rhs.r, __rhs_sep) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning one AXI-Lite interface to another, as if you would do `assign slv = mst;` +// +// The channel assignments `AXI_LITE_ASSIGN_XX(dst, src)` assign all payload and the valid signal of +// the `XX` channel from the `src` to the `dst` interface and they assign the ready signal from the +// `src` to the `dst` interface. +// The interface assignment `AXI_LITE_ASSIGN(dst, src)` assigns all channels including handshakes as +// if `src` was the master of `dst`. +// +// Usage Example: +// `AXI_LITE_ASSIGN(slv, mst) +// `AXI_LITE_ASSIGN_AW(dst, src) +// `AXI_LITE_ASSIGN_R(dst, src) +`define AXI_LITE_ASSIGN_AW(dst, src) \ + `__AXI_LITE_TO_AX(assign, dst.aw, _, src.aw, _) \ + assign dst.aw_valid = src.aw_valid; \ + assign src.aw_ready = dst.aw_ready; +`define AXI_LITE_ASSIGN_W(dst, src) \ + `__AXI_LITE_TO_W(assign, dst.w, _, src.w, _) \ + assign dst.w_valid = src.w_valid; \ + assign src.w_ready = dst.w_ready; +`define AXI_LITE_ASSIGN_B(dst, src) \ + `__AXI_LITE_TO_B(assign, dst.b, _, src.b, _) \ + assign dst.b_valid = src.b_valid; \ + assign src.b_ready = dst.b_ready; +`define AXI_LITE_ASSIGN_AR(dst, src) \ + `__AXI_LITE_TO_AX(assign, dst.ar, _, src.ar, _) \ + assign dst.ar_valid = src.ar_valid; \ + assign src.ar_ready = dst.ar_ready; +`define AXI_LITE_ASSIGN_R(dst, src) \ + `__AXI_LITE_TO_R(assign, dst.r, _, src.r, _) \ + assign dst.r_valid = src.r_valid; \ + assign src.r_ready = dst.r_ready; +`define AXI_LITE_ASSIGN(slv, mst) \ + `AXI_LITE_ASSIGN_AW(slv, mst) \ + `AXI_LITE_ASSIGN_W(slv, mst) \ + `AXI_LITE_ASSIGN_B(mst, slv) \ + `AXI_LITE_ASSIGN_AR(slv, mst) \ + `AXI_LITE_ASSIGN_R(mst, slv) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting a Lite interface from channel or request/response structs inside a process. +// +// The channel macros `AXI_LITE_SET_FROM_XX(axi_if, xx_struct)` set the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not set the handshake signals. +// The request macro `AXI_LITE_SET_FROM_REQ(axi_if, req_struct)` sets all request channels (AW, W, +// AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_LITE_SET_FROM_RESP(axi_if, resp_struct)` sets both response channels (B +// and R) and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the +// `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_FROM_REQ(my_if, my_req_struct) +// end +`define AXI_LITE_SET_FROM_AW(axi_if, aw_struct) `__AXI_LITE_TO_AX(, axi_if.aw, _, aw_struct, .) +`define AXI_LITE_SET_FROM_W(axi_if, w_struct) `__AXI_LITE_TO_W(, axi_if.w, _, w_struct, .) +`define AXI_LITE_SET_FROM_B(axi_if, b_struct) `__AXI_LITE_TO_B(, axi_if.b, _, b_struct, .) +`define AXI_LITE_SET_FROM_AR(axi_if, ar_struct) `__AXI_LITE_TO_AX(, axi_if.ar, _, ar_struct, .) +`define AXI_LITE_SET_FROM_R(axi_if, r_struct) `__AXI_LITE_TO_R(, axi_if.r, _, r_struct, .) +`define AXI_LITE_SET_FROM_REQ(axi_if, req_struct) `__AXI_LITE_TO_REQ(, axi_if, _, req_struct, .) +`define AXI_LITE_SET_FROM_RESP(axi_if, resp_struct) `__AXI_LITE_TO_RESP(, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning a Lite interface from channel or request/response structs outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_FROM_XX(axi_if, xx_struct)` assign the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not assign the handshake signals. +// The request macro `AXI_LITE_ASSIGN_FROM_REQ(axi_if, req_struct)` assigns all request channels +// (AW, W, AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_LITE_ASSIGN_FROM_RESP(axi_if, resp_struct)` assigns both response +// channels (B and R) and the response-side handshake signals (B and R valid and AW, W, and AR +// ready) of the `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// `AXI_LITE_ASSIGN_FROM_REQ(my_if, my_req_struct) +`define AXI_LITE_ASSIGN_FROM_AW(axi_if, aw_struct) `__AXI_LITE_TO_AX(assign, axi_if.aw, _, aw_struct, .) +`define AXI_LITE_ASSIGN_FROM_W(axi_if, w_struct) `__AXI_LITE_TO_W(assign, axi_if.w, _, w_struct, .) +`define AXI_LITE_ASSIGN_FROM_B(axi_if, b_struct) `__AXI_LITE_TO_B(assign, axi_if.b, _, b_struct, .) +`define AXI_LITE_ASSIGN_FROM_AR(axi_if, ar_struct) `__AXI_LITE_TO_AX(assign, axi_if.ar, _, ar_struct, .) +`define AXI_LITE_ASSIGN_FROM_R(axi_if, r_struct) `__AXI_LITE_TO_R(assign, axi_if.r, _, r_struct, .) +`define AXI_LITE_ASSIGN_FROM_REQ(axi_if, req_struct) `__AXI_LITE_TO_REQ(assign, axi_if, _, req_struct, .) +`define AXI_LITE_ASSIGN_FROM_RESP(axi_if, resp_struct) `__AXI_LITE_TO_RESP(assign, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from an interface inside a process. +// +// The channel macros `AXI_LITE_SET_TO_XX(xx_struct, axi_if)` set the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not set the handshake +// signals. +// The request macro `AXI_LITE_SET_TO_REQ(axi_if, req_struct)` sets all signals of `req_struct` +// (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR +// valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_LITE_SET_TO_RESP(axi_if, resp_struct)` sets all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_TO_REQ(my_req_struct, my_if) +// end +`define AXI_LITE_SET_TO_AW(aw_struct, axi_if) `__AXI_LITE_TO_AX(, aw_struct, ., axi_if.aw, _) +`define AXI_LITE_SET_TO_W(w_struct, axi_if) `__AXI_LITE_TO_W(, w_struct, ., axi_if.w, _) +`define AXI_LITE_SET_TO_B(b_struct, axi_if) `__AXI_LITE_TO_B(, b_struct, ., axi_if.b, _) +`define AXI_LITE_SET_TO_AR(ar_struct, axi_if) `__AXI_LITE_TO_AX(, ar_struct, ., axi_if.ar, _) +`define AXI_LITE_SET_TO_R(r_struct, axi_if) `__AXI_LITE_TO_R(, r_struct, ., axi_if.r, _) +`define AXI_LITE_SET_TO_REQ(req_struct, axi_if) `__AXI_LITE_TO_REQ(, req_struct, ., axi_if, _) +`define AXI_LITE_SET_TO_RESP(resp_struct, axi_if) `__AXI_LITE_TO_RESP(, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from an interface outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_TO_XX(xx_struct, axi_if)` assign the signals of `xx_struct` +// to the payload signals of that channel in the `axi_if` interface. They do not assign the +// handshake signals. +// The request macro `AXI_LITE_ASSIGN_TO_REQ(axi_if, req_struct)` assigns all signals of +// `req_struct` (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, +// W, and AR valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_LITE_ASSIGN_TO_RESP(axi_if, resp_struct)` assigns all signals of +// `resp_struct` (i.e., response channel (B and R) payload and response-side handshake signals (B +// and R valid and AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// `AXI_LITE_ASSIGN_TO_REQ(my_req_struct, my_if) +`define AXI_LITE_ASSIGN_TO_AW(aw_struct, axi_if) `__AXI_LITE_TO_AX(assign, aw_struct, ., axi_if.aw, _) +`define AXI_LITE_ASSIGN_TO_W(w_struct, axi_if) `__AXI_LITE_TO_W(assign, w_struct, ., axi_if.w, _) +`define AXI_LITE_ASSIGN_TO_B(b_struct, axi_if) `__AXI_LITE_TO_B(assign, b_struct, ., axi_if.b, _) +`define AXI_LITE_ASSIGN_TO_AR(ar_struct, axi_if) `__AXI_LITE_TO_AX(assign, ar_struct, ., axi_if.ar, _) +`define AXI_LITE_ASSIGN_TO_R(r_struct, axi_if) `__AXI_LITE_TO_R(assign, r_struct, ., axi_if.r, _) +`define AXI_LITE_ASSIGN_TO_REQ(req_struct, axi_if) `__AXI_LITE_TO_REQ(assign, req_struct, ., axi_if, _) +`define AXI_LITE_ASSIGN_TO_RESP(resp_struct, axi_if) `__AXI_LITE_TO_RESP(assign, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from another struct inside a process. +// +// The channel macros `AXI_LITE_SET_XX_STRUCT(lhs, rhs)` set the fields of the `lhs` channel struct +// to the fields of the `rhs` channel struct. They do not set the handshake signals, which are not +// part of channel structs. +// The request macro `AXI_LITE_SET_REQ_STRUCT(lhs, rhs)` sets all fields of the `lhs` request struct +// to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) payload +// and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_LITE_SET_RESP_STRUCT(lhs, rhs)` sets all fields of the `lhs` response +// struct to the fields of the `rhs` response struct. This includes all response channel (B and R) +// payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_REQ_STRUCT(my_req_struct, another_req_struct) +// end +`define AXI_LITE_SET_AW_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(, lhs, ., rhs, .) +`define AXI_LITE_SET_W_STRUCT(lhs, rhs) `__AXI_LITE_TO_W(, lhs, ., rhs, .) +`define AXI_LITE_SET_B_STRUCT(lhs, rhs) `__AXI_LITE_TO_B(, lhs, ., rhs, .) +`define AXI_LITE_SET_AR_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(, lhs, ., rhs, .) +`define AXI_LITE_SET_R_STRUCT(lhs, rhs) `__AXI_LITE_TO_R(, lhs, ., rhs, .) +`define AXI_LITE_SET_REQ_STRUCT(lhs, rhs) `__AXI_LITE_TO_REQ(, lhs, ., rhs, .) +`define AXI_LITE_SET_RESP_STRUCT(lhs, rhs) `__AXI_LITE_TO_RESP(, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from another struct outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_XX_STRUCT(lhs, rhs)` assign the fields of the `lhs` channel +// struct to the fields of the `rhs` channel struct. They do not assign the handshake signals, +// which are not part of the channel structs. +// The request macro `AXI_LITE_ASSIGN_REQ_STRUCT(lhs, rhs)` assigns all fields of the `lhs` request +// struct to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) +// payload and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_LITE_ASSIGN_RESP_STRUCT(lhs, rhs)` assigns all fields of the `lhs` +// response struct to the fields of the `rhs` response struct. This includes all response channel +// (B and R) payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// `AXI_LITE_ASSIGN_REQ_STRUCT(my_req_struct, another_req_struct) +`define AXI_LITE_ASSIGN_AW_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_W_STRUCT(lhs, rhs) `__AXI_LITE_TO_W(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_B_STRUCT(lhs, rhs) `__AXI_LITE_TO_B(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_AR_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_R_STRUCT(lhs, rhs) `__AXI_LITE_TO_R(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_REQ_STRUCT(lhs, rhs) `__AXI_LITE_TO_REQ(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_RESP_STRUCT(lhs, rhs) `__AXI_LITE_TO_RESP(assign, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +`endif diff --git a/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh b/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh new file mode 100644 index 00000000..a2a860e5 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/include/axi/typedef.svh @@ -0,0 +1,211 @@ +// Copyright (c) 2019 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Florian Zaruba +// - Wolfgang Roenninger + +// Macros to define AXI and AXI-Lite Channel and Request/Response Structs + +`ifndef AXI_TYPEDEF_SVH_ +`define AXI_TYPEDEF_SVH_ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// AXI4+ATOP Channel and Request/Response Structs +// +// Usage Example: +// `AXI_TYPEDEF_AW_CHAN_T(axi_aw_t, axi_addr_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_W_CHAN_T(axi_w_t, axi_data_t, axi_strb_t, axi_user_t) +// `AXI_TYPEDEF_B_CHAN_T(axi_b_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_AR_CHAN_T(axi_ar_t, axi_addr_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_R_CHAN_T(axi_r_t, axi_data_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_REQ_T(axi_req_t, axi_aw_t, axi_w_t, axi_ar_t) +// `AXI_TYPEDEF_RESP_T(axi_resp_t, axi_b_t, axi_r_t) +`define AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + addr_t addr; \ + axi_pkg::len_t len; \ + axi_pkg::size_t size; \ + axi_pkg::burst_t burst; \ + logic lock; \ + axi_pkg::cache_t cache; \ + axi_pkg::prot_t prot; \ + axi_pkg::qos_t qos; \ + axi_pkg::region_t region; \ + axi_pkg::atop_t atop; \ + user_t user; \ + } aw_chan_t; +`define AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) \ + typedef struct packed { \ + data_t data; \ + strb_t strb; \ + logic last; \ + user_t user; \ + } w_chan_t; +`define AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + axi_pkg::resp_t resp; \ + user_t user; \ + } b_chan_t; +`define AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + addr_t addr; \ + axi_pkg::len_t len; \ + axi_pkg::size_t size; \ + axi_pkg::burst_t burst; \ + logic lock; \ + axi_pkg::cache_t cache; \ + axi_pkg::prot_t prot; \ + axi_pkg::qos_t qos; \ + axi_pkg::region_t region; \ + user_t user; \ + } ar_chan_t; +`define AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + data_t data; \ + axi_pkg::resp_t resp; \ + logic last; \ + user_t user; \ + } r_chan_t; +`define AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) \ + typedef struct packed { \ + aw_chan_t aw; \ + logic aw_valid; \ + w_chan_t w; \ + logic w_valid; \ + logic b_ready; \ + ar_chan_t ar; \ + logic ar_valid; \ + logic r_ready; \ + } req_t; +`define AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) \ + typedef struct packed { \ + logic aw_ready; \ + logic ar_ready; \ + logic w_ready; \ + logic b_valid; \ + b_chan_t b; \ + logic r_valid; \ + r_chan_t r; \ + } resp_t; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// All AXI4+ATOP Channels and Request/Response Structs in One Macro +// +// This can be used whenever the user is not interested in "precise" control of the naming of the +// individual channels. +// +// Usage Example: +// `AXI_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) +// +// This defines `axi_req_t` and `axi_resp_t` request/response structs as well as `axi_aw_chan_t`, +// `axi_w_chan_t`, `axi_b_chan_t`, `axi_ar_chan_t`, and `axi_r_chan_t` channel structs. +`define AXI_TYPEDEF_ALL(__name, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ + `AXI_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t, __id_t, __user_t) \ + `AXI_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t, __user_t) \ + `AXI_TYPEDEF_B_CHAN_T(__name``_b_chan_t, __id_t, __user_t) \ + `AXI_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t, __id_t, __user_t) \ + `AXI_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t, __id_t, __user_t) \ + `AXI_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ + `AXI_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// AXI4-Lite Channel and Request/Response Structs +// +// Usage Example: +// `AXI_LITE_TYPEDEF_AW_CHAN_T(axi_lite_aw_t, axi_lite_addr_t) +// `AXI_LITE_TYPEDEF_W_CHAN_T(axi_lite_w_t, axi_lite_data_t, axi_lite_strb_t) +// `AXI_LITE_TYPEDEF_B_CHAN_T(axi_lite_b_t) +// `AXI_LITE_TYPEDEF_AR_CHAN_T(axi_lite_ar_t, axi_lite_addr_t) +// `AXI_LITE_TYPEDEF_R_CHAN_T(axi_lite_r_t, axi_lite_data_t) +// `AXI_LITE_TYPEDEF_REQ_T(axi_lite_req_t, axi_lite_aw_t, axi_lite_w_t, axi_lite_ar_t) +// `AXI_LITE_TYPEDEF_RESP_T(axi_lite_resp_t, axi_lite_b_t, axi_lite_r_t) +`define AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_lite_t, addr_t) \ + typedef struct packed { \ + addr_t addr; \ + axi_pkg::prot_t prot; \ + } aw_chan_lite_t; +`define AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_lite_t, data_t, strb_t) \ + typedef struct packed { \ + data_t data; \ + strb_t strb; \ + } w_chan_lite_t; +`define AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_lite_t) \ + typedef struct packed { \ + axi_pkg::resp_t resp; \ + } b_chan_lite_t; +`define AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_lite_t, addr_t) \ + typedef struct packed { \ + addr_t addr; \ + axi_pkg::prot_t prot; \ + } ar_chan_lite_t; +`define AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_lite_t, data_t) \ + typedef struct packed { \ + data_t data; \ + axi_pkg::resp_t resp; \ + } r_chan_lite_t; +`define AXI_LITE_TYPEDEF_REQ_T(req_lite_t, aw_chan_lite_t, w_chan_lite_t, ar_chan_lite_t) \ + typedef struct packed { \ + aw_chan_lite_t aw; \ + logic aw_valid; \ + w_chan_lite_t w; \ + logic w_valid; \ + logic b_ready; \ + ar_chan_lite_t ar; \ + logic ar_valid; \ + logic r_ready; \ + } req_lite_t; +`define AXI_LITE_TYPEDEF_RESP_T(resp_lite_t, b_chan_lite_t, r_chan_lite_t) \ + typedef struct packed { \ + logic aw_ready; \ + logic w_ready; \ + b_chan_lite_t b; \ + logic b_valid; \ + logic ar_ready; \ + r_chan_lite_t r; \ + logic r_valid; \ + } resp_lite_t; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// All AXI4-Lite Channels and Request/Response Structs in One Macro +// +// This can be used whenever the user is not interested in "precise" control of the naming of the +// individual channels. +// +// Usage Example: +// `AXI_LITE_TYPEDEF_ALL(axi_lite, addr_t, data_t, strb_t) +// +// This defines `axi_lite_req_t` and `axi_lite_resp_t` request/response structs as well as +// `axi_lite_aw_chan_t`, `axi_lite_w_chan_t`, `axi_lite_b_chan_t`, `axi_lite_ar_chan_t`, and +// `axi_lite_r_chan_t` channel structs. +`define AXI_LITE_TYPEDEF_ALL(__name, __addr_t, __data_t, __strb_t) \ + `AXI_LITE_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t) \ + `AXI_LITE_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t) \ + `AXI_LITE_TYPEDEF_B_CHAN_T(__name``_b_chan_t) \ + `AXI_LITE_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t) \ + `AXI_LITE_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t) \ + `AXI_LITE_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ + `AXI_LITE_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +`endif diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv new file mode 100644 index 00000000..4a2ecff4 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_atop_filter.sv @@ -0,0 +1,444 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Wolfgang Roenninger + +/// Filter atomic operations (ATOPs) in a protocol-compliant manner. +/// +/// This module filters atomic operations (ATOPs), i.e., write transactions that have a non-zero +/// `aw_atop` value, from its `slv` to its `mst` port. This module guarantees that: +/// +/// 1) `aw_atop` is always zero on the `mst` port; +/// +/// 2) write transactions with non-zero `aw_atop` on the `slv` port are handled in conformance with +/// the AXI standard by replying to such write transactions with the proper B and R responses. +/// The response code on atomic operations that reach this module is always SLVERR +/// (implementation-specific, not defined in the AXI standard). +/// +/// ## Intended usage +/// This module is intended to be placed between masters that may issue ATOPs and slaves that do not +/// support ATOPs. That way, this module ensures that the AXI protocol remains in a defined state on +/// systems with mixed ATOP capabilities. +/// +/// ## Specification reminder +/// The AXI standard specifies that there may be no ordering requirements between different atomic +/// bursts (i.e., a burst started by an AW with ATOP other than 0) and none between atomic bursts +/// and non-atomic bursts [E2.1.4]. That is, **an atomic burst may never have the same ID as any +/// other write or read burst that is in-flight at the same time**. +module axi_atop_filter #( + /// AXI ID width + parameter int unsigned AxiIdWidth = 0, + /// Maximum number of in-flight AXI write transactions + parameter int unsigned AxiMaxWriteTxns = 0, + /// AXI request type + parameter type req_t = logic, + /// AXI response type + parameter type resp_t = logic +) ( + /// Rising-edge clock of both ports + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Slave port request + input req_t slv_req_i, + /// Slave port response + output resp_t slv_resp_o, + /// Master port request + output req_t mst_req_o, + /// Master port response + input resp_t mst_resp_i +); + + // Minimum counter width is 2 to detect underflows. + localparam int unsigned COUNTER_WIDTH = (AxiMaxWriteTxns == 1) ? 2 : $clog2(AxiMaxWriteTxns+1); + typedef struct packed { + logic underflow; + logic [COUNTER_WIDTH-1:0] cnt; + } cnt_t; + cnt_t w_cnt_d, w_cnt_q; + + typedef enum logic [2:0] { + W_FEEDTHROUGH, BLOCK_AW, ABSORB_W, HOLD_B, INJECT_B, WAIT_R + } w_state_e; + w_state_e w_state_d, w_state_q; + + typedef enum logic [1:0] { R_FEEDTHROUGH, INJECT_R, R_HOLD } r_state_e; + r_state_e r_state_d, r_state_q; + + typedef logic [AxiIdWidth-1:0] id_t; + id_t id_d, id_q; + + typedef logic [7:0] len_t; + len_t r_beats_d, r_beats_q; + + typedef struct packed { + len_t len; + } r_resp_cmd_t; + r_resp_cmd_t r_resp_cmd_push, r_resp_cmd_pop; + + logic aw_without_complete_w_downstream, + complete_w_without_aw_downstream, + r_resp_cmd_push_valid, r_resp_cmd_push_ready, + r_resp_cmd_pop_valid, r_resp_cmd_pop_ready; + + // An AW without a complete W burst is in-flight downstream if the W counter is > 0 and not + // underflowed. + assign aw_without_complete_w_downstream = !w_cnt_q.underflow && (w_cnt_q.cnt > 0); + // A complete W burst without AW is in-flight downstream if the W counter is -1. + assign complete_w_without_aw_downstream = w_cnt_q.underflow && &(w_cnt_q.cnt); + + // Manage AW, W, and B channels. + always_comb begin + // Defaults: + // Disable AW and W handshakes. + mst_req_o.aw_valid = 1'b0; + slv_resp_o.aw_ready = 1'b0; + mst_req_o.w_valid = 1'b0; + slv_resp_o.w_ready = 1'b0; + // Feed write responses through. + mst_req_o.b_ready = slv_req_i.b_ready; + slv_resp_o.b_valid = mst_resp_i.b_valid; + slv_resp_o.b = mst_resp_i.b; + // Keep ID stored for B and R response. + id_d = id_q; + // Do not push R response commands. + r_resp_cmd_push_valid = 1'b0; + // Keep the current state. + w_state_d = w_state_q; + + unique case (w_state_q) + W_FEEDTHROUGH: begin + // Feed AW channel through if the maximum number of outstanding bursts is not reached. + if (complete_w_without_aw_downstream || (w_cnt_q.cnt < AxiMaxWriteTxns)) begin + mst_req_o.aw_valid = slv_req_i.aw_valid; + slv_resp_o.aw_ready = mst_resp_i.aw_ready; + end + // Feed W channel through if .. + if (aw_without_complete_w_downstream // .. downstream is missing W bursts .. + // .. or a new non-ATOP AW is being applied and there is not already a complete W burst + // downstream (to prevent underflows of w_cnt). + || ((slv_req_i.aw_valid && slv_req_i.aw.atop[5:4] == axi_pkg::ATOP_NONE) + && !complete_w_without_aw_downstream) + ) begin + mst_req_o.w_valid = slv_req_i.w_valid; + slv_resp_o.w_ready = mst_resp_i.w_ready; + end + // Filter out AWs that are atomic operations. + if (slv_req_i.aw_valid && slv_req_i.aw.atop[5:4] != axi_pkg::ATOP_NONE) begin + mst_req_o.aw_valid = 1'b0; // Do not let AW pass to master port. + slv_resp_o.aw_ready = 1'b1; // Absorb AW on slave port. + id_d = slv_req_i.aw.id; // Store ID for B response. + // All atomic operations except atomic stores require a response on the R channel. + if (slv_req_i.aw.atop[5:4] != axi_pkg::ATOP_ATOMICSTORE) begin + // Push R response command. We do not have to wait for the ready of the register + // because we know it is ready: we are its only master and will wait for the register to + // be emptied before going back to the `W_FEEDTHROUGH` state. + r_resp_cmd_push_valid = 1'b1; + end + // If downstream is missing W beats, block the AW channel and let the W bursts complete. + if (aw_without_complete_w_downstream) begin + w_state_d = BLOCK_AW; + // If downstream is not missing W beats, absorb the W beats for this atomic AW. + end else begin + mst_req_o.w_valid = 1'b0; // Do not let W beats pass to master port. + slv_resp_o.w_ready = 1'b1; // Absorb W beats on slave port. + if (slv_req_i.w_valid && slv_req_i.w.last) begin + // If the W beat is valid and the last, proceed by injecting the B response. + // However, if there is a non-handshaked B on our response port, we must let that + // complete first. + if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin + w_state_d = HOLD_B; + end else begin + w_state_d = INJECT_B; + end + end else begin + // Otherwise continue with absorbing W beats. + w_state_d = ABSORB_W; + end + end + end + end + + BLOCK_AW: begin + // Feed W channel through to let outstanding bursts complete. + if (aw_without_complete_w_downstream) begin + mst_req_o.w_valid = slv_req_i.w_valid; + slv_resp_o.w_ready = mst_resp_i.w_ready; + end else begin + // If there are no more outstanding W bursts, start absorbing the next W burst. + slv_resp_o.w_ready = 1'b1; + if (slv_req_i.w_valid && slv_req_i.w.last) begin + // If the W beat is valid and the last, proceed by injecting the B response. + if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin + w_state_d = HOLD_B; + end else begin + w_state_d = INJECT_B; + end + end else begin + // Otherwise continue with absorbing W beats. + w_state_d = ABSORB_W; + end + end + end + + ABSORB_W: begin + // Absorb all W beats of the current burst. + slv_resp_o.w_ready = 1'b1; + if (slv_req_i.w_valid && slv_req_i.w.last) begin + if (slv_resp_o.b_valid && !slv_req_i.b_ready) begin + w_state_d = HOLD_B; + end else begin + w_state_d = INJECT_B; + end + end + end + + HOLD_B: begin + // Proceed with injection of B response upon handshake. + if (slv_resp_o.b_valid && slv_req_i.b_ready) begin + w_state_d = INJECT_B; + end + end + + INJECT_B: begin + // Pause forwarding of B response. + mst_req_o.b_ready = 1'b0; + // Inject error response instead. Since the B channel has an ID and the atomic burst we are + // replying to is guaranteed to be the only burst with this ID in flight, we do not have to + // observe any ordering and can immediately inject on the B channel. + slv_resp_o.b = '0; + slv_resp_o.b.id = id_q; + slv_resp_o.b.resp = axi_pkg::RESP_SLVERR; + slv_resp_o.b_valid = 1'b1; + if (slv_req_i.b_ready) begin + // If not all beats of the R response have been injected, wait for them. Otherwise, return + // to `W_FEEDTHROUGH`. + if (r_resp_cmd_pop_valid && !r_resp_cmd_pop_ready) begin + w_state_d = WAIT_R; + end else begin + w_state_d = W_FEEDTHROUGH; + end + end + end + + WAIT_R: begin + // Wait with returning to `W_FEEDTHROUGH` until all beats of the R response have been + // injected. + if (!r_resp_cmd_pop_valid) begin + w_state_d = W_FEEDTHROUGH; + end + end + + default: w_state_d = W_FEEDTHROUGH; + endcase + end + // Connect signals on AW and W channel that are not managed by the control FSM from slave port to + // master port. + // Feed-through of the AW and W vectors, make sure that downstream aw.atop is always zero + always_comb begin + // overwrite the atop signal + mst_req_o.aw = slv_req_i.aw; + mst_req_o.aw.atop = '0; + end + assign mst_req_o.w = slv_req_i.w; + + // Manage R channel. + always_comb begin + // Defaults: + // Feed read responses through. + slv_resp_o.r = mst_resp_i.r; + slv_resp_o.r_valid = mst_resp_i.r_valid; + mst_req_o.r_ready = slv_req_i.r_ready; + // Do not pop R response command. + r_resp_cmd_pop_ready = 1'b0; + // Keep the current value of the beats counter. + r_beats_d = r_beats_q; + // Keep the current state. + r_state_d = r_state_q; + + unique case (r_state_q) + R_FEEDTHROUGH: begin + if (mst_resp_i.r_valid && !slv_req_i.r_ready) begin + r_state_d = R_HOLD; + end else if (r_resp_cmd_pop_valid) begin + // Upon a command to inject an R response, immediately proceed with doing so because there + // are no ordering requirements with other bursts that may be ongoing on the R channel at + // this moment. + r_beats_d = r_resp_cmd_pop.len; + r_state_d = INJECT_R; + end + end + + INJECT_R: begin + mst_req_o.r_ready = 1'b0; + slv_resp_o.r = '0; + slv_resp_o.r.id = id_q; + slv_resp_o.r.resp = axi_pkg::RESP_SLVERR; + slv_resp_o.r.last = (r_beats_q == '0); + slv_resp_o.r_valid = 1'b1; + if (slv_req_i.r_ready) begin + if (slv_resp_o.r.last) begin + r_resp_cmd_pop_ready = 1'b1; + r_state_d = R_FEEDTHROUGH; + end else begin + r_beats_d -= 1; + end + end + end + + R_HOLD: begin + if (mst_resp_i.r_valid && slv_req_i.r_ready) begin + r_state_d = R_FEEDTHROUGH; + end + end + + default: r_state_d = R_FEEDTHROUGH; + endcase + end + // Feed all signals on AR through. + assign mst_req_o.ar = slv_req_i.ar; + assign mst_req_o.ar_valid = slv_req_i.ar_valid; + assign slv_resp_o.ar_ready = mst_resp_i.ar_ready; + + // Keep track of outstanding downstream write bursts and responses. + always_comb begin + w_cnt_d = w_cnt_q; + if (mst_req_o.aw_valid && mst_resp_i.aw_ready) begin + w_cnt_d.cnt += 1; + end + if (mst_req_o.w_valid && mst_resp_i.w_ready && mst_req_o.w.last) begin + w_cnt_d.cnt -= 1; + end + if (w_cnt_q.underflow && (w_cnt_d.cnt == '0)) begin + w_cnt_d.underflow = 1'b0; + end else if (w_cnt_q.cnt == '0 && &(w_cnt_d.cnt)) begin + w_cnt_d.underflow = 1'b1; + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + id_q <= '0; + r_beats_q <= '0; + r_state_q <= R_FEEDTHROUGH; + w_cnt_q <= '{default: '0}; + w_state_q <= W_FEEDTHROUGH; + end else begin + id_q <= id_d; + r_beats_q <= r_beats_d; + r_state_q <= r_state_d; + w_cnt_q <= w_cnt_d; + w_state_q <= w_state_d; + end + end + + stream_register #( + .T(r_resp_cmd_t) + ) r_resp_cmd ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .clr_i (1'b0), + .testmode_i (1'b0), + .valid_i (r_resp_cmd_push_valid), + .ready_o (r_resp_cmd_push_ready), + .data_i (r_resp_cmd_push), + .valid_o (r_resp_cmd_pop_valid), + .ready_i (r_resp_cmd_pop_ready), + .data_o (r_resp_cmd_pop) + ); + assign r_resp_cmd_push.len = slv_req_i.aw.len; + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (AxiIdWidth >= 1) else $fatal(1, "AXI ID width must be at least 1!"); + assert (AxiMaxWriteTxns >= 1) + else $fatal(1, "Maximum number of outstanding write transactions must be at least 1!"); + end +`endif +// pragma translate_on +endmodule + +`include "axi/assign.svh" +`include "axi/typedef.svh" + +/// Interface variant of [`axi_atop_filter`](module.axi_atop_filter). +module axi_atop_filter_intf #( + /// AXI ID width + parameter int unsigned AXI_ID_WIDTH = 0, + /// AXI address width + parameter int unsigned AXI_ADDR_WIDTH = 0, + /// AXI data width + parameter int unsigned AXI_DATA_WIDTH = 0, + /// AXI user signal width + parameter int unsigned AXI_USER_WIDTH = 0, + /// Maximum number of in-flight AXI write transactions + parameter int unsigned AXI_MAX_WRITE_TXNS = 0 +) ( + /// Rising-edge clock of both ports + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Slave interface port + AXI_BUS.Slave slv, + /// Master interface port + AXI_BUS.Master mst +); + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, slv) + `AXI_ASSIGN_FROM_RESP(slv, slv_resp) + + `AXI_ASSIGN_FROM_REQ(mst, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, mst) + + axi_atop_filter #( + .AxiIdWidth ( AXI_ID_WIDTH ), + // Maximum number of AXI write bursts outstanding at the same time + .AxiMaxWriteTxns ( AXI_MAX_WRITE_TXNS ), + // AXI request & response type + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_atop_filter ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (AXI_ADDR_WIDTH >= 1) else $fatal(1, "AXI ADDR width must be at least 1!"); + assert (AXI_DATA_WIDTH >= 1) else $fatal(1, "AXI DATA width must be at least 1!"); + assert (AXI_USER_WIDTH >= 1) else $fatal(1, "AXI USER width must be at least 1!"); + end +`endif +// pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv new file mode 100644 index 00000000..6c31321b --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_cut.sv @@ -0,0 +1,265 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Fabian Schuiki +// - Andreas Kurth + +/// An AXI4 cut. +/// +/// Breaks all combinatorial paths between its input and output. +module axi_cut #( + // bypass enable + parameter bit Bypass = 1'b0, + // AXI channel structs + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + // AXI request & response structs + parameter type req_t = logic, + parameter type resp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + // salve port + input req_t slv_req_i, + output resp_t slv_resp_o, + // master port + output req_t mst_req_o, + input resp_t mst_resp_i +); + + // a spill register for each channel + spill_register #( + .T ( aw_chan_t ), + .Bypass ( Bypass ) + ) i_reg_aw ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.aw_valid ), + .ready_o ( slv_resp_o.aw_ready ), + .data_i ( slv_req_i.aw ), + .valid_o ( mst_req_o.aw_valid ), + .ready_i ( mst_resp_i.aw_ready ), + .data_o ( mst_req_o.aw ) + ); + + spill_register #( + .T ( w_chan_t ), + .Bypass ( Bypass ) + ) i_reg_w ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.w_valid ), + .ready_o ( slv_resp_o.w_ready ), + .data_i ( slv_req_i.w ), + .valid_o ( mst_req_o.w_valid ), + .ready_i ( mst_resp_i.w_ready ), + .data_o ( mst_req_o.w ) + ); + + spill_register #( + .T ( b_chan_t ), + .Bypass ( Bypass ) + ) i_reg_b ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_resp_i.b_valid ), + .ready_o ( mst_req_o.b_ready ), + .data_i ( mst_resp_i.b ), + .valid_o ( slv_resp_o.b_valid ), + .ready_i ( slv_req_i.b_ready ), + .data_o ( slv_resp_o.b ) + ); + + spill_register #( + .T ( ar_chan_t ), + .Bypass ( Bypass ) + ) i_reg_ar ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.ar_valid ), + .ready_o ( slv_resp_o.ar_ready ), + .data_i ( slv_req_i.ar ), + .valid_o ( mst_req_o.ar_valid ), + .ready_i ( mst_resp_i.ar_ready ), + .data_o ( mst_req_o.ar ) + ); + + spill_register #( + .T ( r_chan_t ), + .Bypass ( Bypass ) + ) i_reg_r ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_resp_i.r_valid ), + .ready_o ( mst_req_o.r_ready ), + .data_i ( mst_resp_i.r ), + .valid_o ( slv_resp_o.r_valid ), + .ready_i ( slv_req_i.r_ready ), + .data_o ( slv_resp_o.r ) + ); +endmodule + +`include "axi/assign.svh" +`include "axi/typedef.svh" + +// interface wrapper +module axi_cut_intf #( + // Bypass eneable + parameter bit BYPASS = 1'b0, + // The address width. + parameter int unsigned ADDR_WIDTH = 0, + // The data width. + parameter int unsigned DATA_WIDTH = 0, + // The ID width. + parameter int unsigned ID_WIDTH = 0, + // The user data width. + parameter int unsigned USER_WIDTH = 0 +) ( + input logic clk_i , + input logic rst_ni , + AXI_BUS.Slave in , + AXI_BUS.Master out +); + + typedef logic [ID_WIDTH-1:0] id_t; + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + typedef logic [USER_WIDTH-1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, in) + `AXI_ASSIGN_FROM_RESP(in, slv_resp) + + `AXI_ASSIGN_FROM_REQ(out, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, out) + + axi_cut #( + .Bypass ( BYPASS ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_cut ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter"); + assert (ID_WIDTH > 0) else $fatal(1, "Wrong id width parameter"); + assert (USER_WIDTH > 0) else $fatal(1, "Wrong user width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition"); + end + `endif + // pragma translate_on +endmodule + +module axi_lite_cut_intf #( + // bypass enable + parameter bit BYPASS = 1'b0, + /// The address width. + parameter int unsigned ADDR_WIDTH = 0, + /// The data width. + parameter int unsigned DATA_WIDTH = 0 +) ( + input logic clk_i , + input logic rst_ni , + AXI_LITE.Slave in , + AXI_LITE.Master out +); + + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + + `AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t) + `AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t) + `AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_t) + `AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t) + `AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_t, data_t) + `AXI_LITE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_LITE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_LITE_ASSIGN_TO_REQ(slv_req, in) + `AXI_LITE_ASSIGN_FROM_RESP(in, slv_resp) + + `AXI_LITE_ASSIGN_FROM_REQ(out, mst_req) + `AXI_LITE_ASSIGN_TO_RESP(mst_resp, out) + + axi_cut #( + .Bypass ( BYPASS ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_cut ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + end + `endif + // pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv new file mode 100644 index 00000000..cab18eb5 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_delayer.sv @@ -0,0 +1,198 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Florian Zaruba +// - Andreas Kurth + +/// Synthesizable module that (randomly) delays AXI channels. +module axi_delayer #( + // AXI channel types + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + // AXI request & response types + parameter type req_t = logic, + parameter type resp_t = logic, + // delay parameters + parameter bit StallRandomInput = 0, + parameter bit StallRandomOutput = 0, + parameter int unsigned FixedDelayInput = 1, + parameter int unsigned FixedDelayOutput = 1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // slave port + input req_t slv_req_i, + output resp_t slv_resp_o, + // master port + output req_t mst_req_o, + input resp_t mst_resp_i +); + // AW + stream_delay #( + .StallRandom ( StallRandomInput ), + .FixedDelay ( FixedDelayInput ), + .payload_t ( aw_chan_t ) + ) i_stream_delay_aw ( + .clk_i, + .rst_ni, + .payload_i ( slv_req_i.aw ), + .ready_o ( slv_resp_o.aw_ready ), + .valid_i ( slv_req_i.aw_valid ), + .payload_o ( mst_req_o.aw ), + .ready_i ( mst_resp_i.aw_ready ), + .valid_o ( mst_req_o.aw_valid ) + ); + + // AR + stream_delay #( + .StallRandom ( StallRandomInput ), + .FixedDelay ( FixedDelayInput ), + .payload_t ( ar_chan_t ) + ) i_stream_delay_ar ( + .clk_i, + .rst_ni, + .payload_i ( slv_req_i.ar ), + .ready_o ( slv_resp_o.ar_ready ), + .valid_i ( slv_req_i.ar_valid ), + .payload_o ( mst_req_o.ar ), + .ready_i ( mst_resp_i.ar_ready ), + .valid_o ( mst_req_o.ar_valid ) + ); + + // W + stream_delay #( + .StallRandom ( StallRandomInput ), + .FixedDelay ( FixedDelayInput ), + .payload_t ( w_chan_t ) + ) i_stream_delay_w ( + .clk_i, + .rst_ni, + .payload_i ( slv_req_i.w ), + .ready_o ( slv_resp_o.w_ready ), + .valid_i ( slv_req_i.w_valid ), + .payload_o ( mst_req_o.w ), + .ready_i ( mst_resp_i.w_ready ), + .valid_o ( mst_req_o.w_valid ) + ); + + // B + stream_delay #( + .StallRandom ( StallRandomOutput ), + .FixedDelay ( FixedDelayOutput ), + .payload_t ( b_chan_t ) + ) i_stream_delay_b ( + .clk_i, + .rst_ni, + .payload_i ( mst_resp_i.b ), + .ready_o ( mst_req_o.b_ready ), + .valid_i ( mst_resp_i.b_valid ), + .payload_o ( slv_resp_o.b ), + .ready_i ( slv_req_i.b_ready ), + .valid_o ( slv_resp_o.b_valid ) + ); + + // R + stream_delay #( + .StallRandom ( StallRandomOutput ), + .FixedDelay ( FixedDelayOutput ), + .payload_t ( r_chan_t ) + ) i_stream_delay_r ( + .clk_i, + .rst_ni, + .payload_i ( mst_resp_i.r ), + .ready_o ( mst_req_o.r_ready ), + .valid_i ( mst_resp_i.r_valid ), + .payload_o ( slv_resp_o.r ), + .ready_i ( slv_req_i.r_ready ), + .valid_o ( slv_resp_o.r_valid ) + ); +endmodule + +`include "axi/typedef.svh" +`include "axi/assign.svh" + +// interface wrapper +module axi_delayer_intf #( + // Synopsys DC requires a default value for parameters. + parameter int unsigned AXI_ID_WIDTH = 0, + parameter int unsigned AXI_ADDR_WIDTH = 0, + parameter int unsigned AXI_DATA_WIDTH = 0, + parameter int unsigned AXI_USER_WIDTH = 0, + parameter bit STALL_RANDOM_INPUT = 0, + parameter bit STALL_RANDOM_OUTPUT = 0, + parameter int unsigned FIXED_DELAY_INPUT = 1, + parameter int unsigned FIXED_DELAY_OUTPUT = 1 +) ( + input logic clk_i, + input logic rst_ni, + AXI_BUS.Slave slv, + AXI_BUS.Master mst +); + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, slv) + `AXI_ASSIGN_FROM_RESP(slv, slv_resp) + + `AXI_ASSIGN_FROM_REQ(mst, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, mst) + + axi_delayer #( + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ), + .StallRandomInput ( STALL_RANDOM_INPUT ), + .StallRandomOutput ( STALL_RANDOM_OUTPUT ), + .FixedDelayInput ( FIXED_DELAY_INPUT ), + .FixedDelayOutput ( FIXED_DELAY_OUTPUT ) + ) i_axi_delayer ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (AXI_ID_WIDTH >= 1) else $fatal(1, "AXI ID width must be at least 1!"); + assert (AXI_ADDR_WIDTH >= 1) else $fatal(1, "AXI ADDR width must be at least 1!"); + assert (AXI_DATA_WIDTH >= 1) else $fatal(1, "AXI DATA width must be at least 1!"); + assert (AXI_USER_WIDTH >= 1) else $fatal(1, "AXI USER width must be at least 1!"); + end +`endif +// pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv new file mode 100644 index 00000000..99a18c8d --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_demux.sv @@ -0,0 +1,786 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth + +`include "common_cells/registers.svh" + +// axi_demux: Demultiplex an AXI bus from one slave port to multiple master ports. +// See `doc/axi_demux.md` for the documentation, including the definition of parameters and ports. +module axi_demux #( + parameter int unsigned AxiIdWidth = 32'd0, + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + parameter type req_t = logic, + parameter type resp_t = logic, + parameter int unsigned NoMstPorts = 32'd0, + parameter int unsigned MaxTrans = 32'd8, + parameter int unsigned AxiLookBits = 32'd3, + parameter bit UniqueIds = 1'b0, + parameter bit FallThrough = 1'b0, + parameter bit SpillAw = 1'b1, + parameter bit SpillW = 1'b0, + parameter bit SpillB = 1'b0, + parameter bit SpillAr = 1'b1, + parameter bit SpillR = 1'b0, + // Dependent parameters, DO NOT OVERRIDE! + parameter int unsigned SelectWidth = (NoMstPorts > 32'd1) ? $clog2(NoMstPorts) : 32'd1, + parameter type select_t = logic [SelectWidth-1:0] +) ( + input logic clk_i, + input logic rst_ni, + input logic test_i, + // Slave Port + input req_t slv_req_i, + input select_t slv_aw_select_i, + input select_t slv_ar_select_i, + output resp_t slv_resp_o, + // Master Ports + output req_t [NoMstPorts-1:0] mst_reqs_o, + input resp_t [NoMstPorts-1:0] mst_resps_i +); + + localparam int unsigned IdCounterWidth = MaxTrans > 1 ? $clog2(MaxTrans) : 1; + + //-------------------------------------- + // Typedefs for the FIFOs / Queues + //-------------------------------------- + typedef struct packed { + aw_chan_t aw_chan; + select_t aw_select; + } aw_chan_select_t; + typedef struct packed { + ar_chan_t ar_chan; + select_t ar_select; + } ar_chan_select_t; + + // pass through if only one master port + if (NoMstPorts == 32'h1) begin : gen_no_demux + assign mst_reqs_o[0] = slv_req_i; + assign slv_resp_o = mst_resps_i; + // other non degenerate cases + end else begin : gen_demux + + //-------------------------------------- + //-------------------------------------- + // Signal Declarations + //-------------------------------------- + //-------------------------------------- + + //-------------------------------------- + // Write Transaction + //-------------------------------------- + // comes from spill register at input + aw_chan_select_t slv_aw_chan_select; + logic slv_aw_valid, slv_aw_ready; + + // AW ID counter + select_t lookup_aw_select; + logic aw_select_occupied, aw_id_cnt_full; + logic aw_push; + // Upon an ATOP load, inject IDs from the AW into the AR channel + logic atop_inject; + + // W FIFO: stores the decision to which master W beats should go + logic w_fifo_pop; + logic w_fifo_full, w_fifo_empty; + select_t w_select; + + // Register which locks the AW valid signal + logic lock_aw_valid_d, lock_aw_valid_q, load_aw_lock; + logic aw_valid, aw_ready; + + // W channel from spill reg + w_chan_t slv_w_chan; + logic slv_w_valid, slv_w_ready; + + // B channles input into the arbitration + b_chan_t [NoMstPorts-1:0] mst_b_chans; + logic [NoMstPorts-1:0] mst_b_valids, mst_b_readies; + + // B channel to spill register + b_chan_t slv_b_chan; + logic slv_b_valid, slv_b_ready; + + //-------------------------------------- + // Read Transaction + //-------------------------------------- + // comes from spill register at input + ar_chan_select_t slv_ar_chan_select; + logic slv_ar_valid, slv_ar_ready; + + // AR ID counter + select_t lookup_ar_select; + logic ar_select_occupied, ar_id_cnt_full; + logic ar_push; + + // Register which locks the AR valid signel + logic lock_ar_valid_d, lock_ar_valid_q, load_ar_lock; + logic ar_valid, ar_ready; + + // R channles input into the arbitration + r_chan_t [NoMstPorts-1:0] mst_r_chans; + logic [NoMstPorts-1:0] mst_r_valids, mst_r_readies; + + // R channel to spill register + r_chan_t slv_r_chan; + logic slv_r_valid, slv_r_ready; + + //-------------------------------------- + //-------------------------------------- + // Channel Control + //-------------------------------------- + //-------------------------------------- + + //-------------------------------------- + // AW Channel + //-------------------------------------- + // spill register at the channel input + `ifdef TARGET_VSIM + // Workaround for bug in Questa 2020.2 and 2021.1: Flatten the struct into a logic vector before + // instantiating `spill_register`. + typedef logic [$bits(aw_chan_select_t)-1:0] aw_chan_select_flat_t; + `else + typedef aw_chan_select_t aw_chan_select_flat_t; + `endif + aw_chan_select_flat_t slv_aw_chan_select_in_flat, + slv_aw_chan_select_out_flat; + assign slv_aw_chan_select_in_flat = {slv_req_i.aw, slv_aw_select_i}; + spill_register #( + .T ( aw_chan_select_flat_t ), + .Bypass ( ~SpillAw ) // because module param indicates if we want a spill reg + ) i_aw_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.aw_valid ), + .ready_o ( slv_resp_o.aw_ready ), + .data_i ( slv_aw_chan_select_in_flat ), + .valid_o ( slv_aw_valid ), + .ready_i ( slv_aw_ready ), + .data_o ( slv_aw_chan_select_out_flat ) + ); + assign slv_aw_chan_select = slv_aw_chan_select_out_flat; + + // Control of the AW handshake + always_comb begin + // AXI Handshakes + slv_aw_ready = 1'b0; + aw_valid = 1'b0; + // `lock_aw_valid`, used to be protocol conform as it is not allowed to deassert + // a valid if there was no corresponding ready. As this process has to be able to inject + // an AXI ID into the counter of the AR channel on an ATOP, there could be a case where + // this process waits on `aw_ready` but in the mean time on the AR channel the counter gets + // full. + lock_aw_valid_d = lock_aw_valid_q; + load_aw_lock = 1'b0; + // AW ID counter and W FIFO + aw_push = 1'b0; + // ATOP injection into ar counter + atop_inject = 1'b0; + // we had an arbitration decision, the valid is locked, wait for the transaction + if (lock_aw_valid_q) begin + aw_valid = 1'b1; + // transaction + if (aw_ready) begin + slv_aw_ready = 1'b1; + lock_aw_valid_d = 1'b0; + load_aw_lock = 1'b1; + atop_inject = slv_aw_chan_select.aw_chan.atop[5]; // inject the ATOP if necessary + end + end else begin + // Process can start handling a transaction if its `i_aw_id_counter` and `w_fifo` have + // space in them. Further check if we could inject something on the AR channel. + if (!aw_id_cnt_full && !w_fifo_full && !ar_id_cnt_full) begin + // there is a valid AW vector make the id lookup and go further, if it passes + if (slv_aw_valid && (!aw_select_occupied || + (slv_aw_chan_select.aw_select == lookup_aw_select))) begin + // connect the handshake + aw_valid = 1'b1; + // push arbitration to the W FIFO regardless, do not wait for the AW transaction + aw_push = 1'b1; + // on AW transaction + if (aw_ready) begin + slv_aw_ready = 1'b1; + atop_inject = slv_aw_chan_select.aw_chan.atop[5]; + // no AW transaction this cycle, lock the decision + end else begin + lock_aw_valid_d = 1'b1; + load_aw_lock = 1'b1; + end + end + end + end + end + + // lock the valid signal, as the selection gets pushed into the W FIFO on first assertion, + // prevent further pushing + `FFLARN(lock_aw_valid_q, lock_aw_valid_d, load_aw_lock, '0, clk_i, rst_ni) + + if (UniqueIds) begin : gen_unique_ids_aw + // If the `UniqueIds` parameter is set, each write transaction has an ID that is unique among + // all in-flight write transactions, or all write transactions with a given ID target the same + // master port as all write transactions with the same ID, or both. This means that the + // signals that are driven by the ID counters if this parameter is not set can instead be + // derived from existing signals. The ID counters can therefore be omitted. + assign lookup_aw_select = slv_aw_chan_select.aw_select; + assign aw_select_occupied = 1'b0; + assign aw_id_cnt_full = 1'b0; + end else begin : gen_aw_id_counter + axi_demux_id_counters #( + .AxiIdBits ( AxiLookBits ), + .CounterWidth ( IdCounterWidth ), + .mst_port_select_t ( select_t ) + ) i_aw_id_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .lookup_axi_id_i ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ), + .lookup_mst_select_o ( lookup_aw_select ), + .lookup_mst_select_occupied_o ( aw_select_occupied ), + .full_o ( aw_id_cnt_full ), + .inject_axi_id_i ( '0 ), + .inject_i ( 1'b0 ), + .push_axi_id_i ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ), + .push_mst_select_i ( slv_aw_chan_select.aw_select ), + .push_i ( aw_push ), + .pop_axi_id_i ( slv_b_chan.id[0+:AxiLookBits] ), + .pop_i ( slv_b_valid & slv_b_ready ) + ); + // pop from ID counter on outward transaction + end + + // FIFO to save W selection + fifo_v3 #( + .FALL_THROUGH ( FallThrough ), + .DEPTH ( MaxTrans ), + .dtype ( select_t ) + ) i_w_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( w_fifo_full ), + .empty_o ( w_fifo_empty ), + .usage_o ( ), + .data_i ( slv_aw_chan_select.aw_select ), + .push_i ( aw_push ), // controlled from proc_aw_chan + .data_o ( w_select ), // where the w beat should go + .pop_i ( w_fifo_pop ) // controlled from proc_w_chan + ); + + //-------------------------------------- + // W Channel + //-------------------------------------- + spill_register #( + .T ( w_chan_t ), + .Bypass ( ~SpillW ) + ) i_w_spill_reg( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.w_valid ), + .ready_o ( slv_resp_o.w_ready ), + .data_i ( slv_req_i.w ), + .valid_o ( slv_w_valid ), + .ready_i ( slv_w_ready ), + .data_o ( slv_w_chan ) + ); + + //-------------------------------------- + // B Channel + //-------------------------------------- + // optional spill register + spill_register #( + .T ( b_chan_t ), + .Bypass ( ~SpillB ) + ) i_b_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_b_valid ), + .ready_o ( slv_b_ready ), + .data_i ( slv_b_chan ), + .valid_o ( slv_resp_o.b_valid ), + .ready_i ( slv_req_i.b_ready ), + .data_o ( slv_resp_o.b ) + ); + + // Arbitration of the different B responses + rr_arb_tree #( + .NumIn ( NoMstPorts ), + .DataType ( b_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_b_mux ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( mst_b_valids ), + .gnt_o ( mst_b_readies ), + .data_i ( mst_b_chans ), + .gnt_i ( slv_b_ready ), + .req_o ( slv_b_valid ), + .data_o ( slv_b_chan ), + .idx_o ( ) + ); + + //-------------------------------------- + // AR Channel + //-------------------------------------- + `ifdef TARGET_VSIM + // Workaround for bug in Questa 2020.2 and 2021.1: Flatten the struct into a logic vector before + // instantiating `spill_register`. + typedef logic [$bits(ar_chan_select_t)-1:0] ar_chan_select_flat_t; + `else + typedef ar_chan_select_t ar_chan_select_flat_t; + `endif + ar_chan_select_flat_t slv_ar_chan_select_in_flat, + slv_ar_chan_select_out_flat; + assign slv_ar_chan_select_in_flat = {slv_req_i.ar, slv_ar_select_i}; + spill_register #( + .T ( ar_chan_select_flat_t ), + .Bypass ( ~SpillAr ) + ) i_ar_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_req_i.ar_valid ), + .ready_o ( slv_resp_o.ar_ready ), + .data_i ( slv_ar_chan_select_in_flat ), + .valid_o ( slv_ar_valid ), + .ready_i ( slv_ar_ready ), + .data_o ( slv_ar_chan_select_out_flat ) + ); + assign slv_ar_chan_select = slv_ar_chan_select_out_flat; + + // control of the AR handshake + always_comb begin + // AXI Handshakes + slv_ar_ready = 1'b0; + ar_valid = 1'b0; + // `lock_ar_valid`: Used to be protocol conform as it is not allowed to deassert `ar_valid` + // if there was no corresponding `ar_ready`. There is the possibility that an injection + // of a R response from an `atop` from the AW channel can change the occupied flag of the + // `i_ar_id_counter`, even if it was previously empty. This FF prevents the deassertion. + lock_ar_valid_d = lock_ar_valid_q; + load_ar_lock = 1'b0; + // AR id counter + ar_push = 1'b0; + // The process had an arbitration decision in a previous cycle, the valid is locked, + // wait for the AR transaction. + if (lock_ar_valid_q) begin + ar_valid = 1'b1; + // transaction + if (ar_ready) begin + slv_ar_ready = 1'b1; + ar_push = 1'b1; + lock_ar_valid_d = 1'b0; + load_ar_lock = 1'b1; + end + end else begin + // The process can start handling AR transaction if `i_ar_id_counter` has space. + if (!ar_id_cnt_full) begin + // There is a valid AR, so look the ID up. + if (slv_ar_valid && (!ar_select_occupied || + (slv_ar_chan_select.ar_select == lookup_ar_select))) begin + // connect the AR handshake + ar_valid = 1'b1; + // on transaction + if (ar_ready) begin + slv_ar_ready = 1'b1; + ar_push = 1'b1; + // no transaction this cycle, lock the valid decision! + end else begin + lock_ar_valid_d = 1'b1; + load_ar_lock = 1'b1; + end + end + end + end + end + + // this ff is needed so that ar does not get de-asserted if an atop gets injected + `FFLARN(lock_ar_valid_q, lock_ar_valid_d, load_ar_lock, '0, clk_i, rst_ni) + + if (UniqueIds) begin : gen_unique_ids_ar + // If the `UniqueIds` parameter is set, each read transaction has an ID that is unique among + // all in-flight read transactions, or all read transactions with a given ID target the same + // master port as all read transactions with the same ID, or both. This means that the + // signals that are driven by the ID counters if this parameter is not set can instead be + // derived from existing signals. The ID counters can therefore be omitted. + assign lookup_ar_select = slv_ar_chan_select.ar_select; + assign ar_select_occupied = 1'b0; + assign ar_id_cnt_full = 1'b0; + end else begin : gen_ar_id_counter + axi_demux_id_counters #( + .AxiIdBits ( AxiLookBits ), + .CounterWidth ( IdCounterWidth ), + .mst_port_select_t ( select_t ) + ) i_ar_id_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .lookup_axi_id_i ( slv_ar_chan_select.ar_chan.id[0+:AxiLookBits] ), + .lookup_mst_select_o ( lookup_ar_select ), + .lookup_mst_select_occupied_o ( ar_select_occupied ), + .full_o ( ar_id_cnt_full ), + .inject_axi_id_i ( slv_aw_chan_select.aw_chan.id[0+:AxiLookBits] ), + .inject_i ( atop_inject ), + .push_axi_id_i ( slv_ar_chan_select.ar_chan.id[0+:AxiLookBits] ), + .push_mst_select_i ( slv_ar_chan_select.ar_select ), + .push_i ( ar_push ), + .pop_axi_id_i ( slv_r_chan.id[0+:AxiLookBits] ), + .pop_i ( slv_r_valid & slv_r_ready & slv_r_chan.last ) + ); + end + + //-------------------------------------- + // R Channel + //-------------------------------------- + // optional spill register + spill_register #( + .T ( r_chan_t ), + .Bypass ( ~SpillR ) + ) i_r_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( slv_r_valid ), + .ready_o ( slv_r_ready ), + .data_i ( slv_r_chan ), + .valid_o ( slv_resp_o.r_valid ), + .ready_i ( slv_req_i.r_ready ), + .data_o ( slv_resp_o.r ) + ); + + // Arbitration of the different r responses + rr_arb_tree #( + .NumIn ( NoMstPorts ), + .DataType ( r_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_r_mux ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( mst_r_valids ), + .gnt_o ( mst_r_readies ), + .data_i ( mst_r_chans ), + .gnt_i ( slv_r_ready ), + .req_o ( slv_r_valid ), + .data_o ( slv_r_chan ), + .idx_o ( ) + ); + + assign ar_ready = ar_valid & mst_resps_i[slv_ar_chan_select.ar_select].ar_ready; + assign aw_ready = aw_valid & mst_resps_i[slv_aw_chan_select.aw_select].aw_ready; + + // process that defines the individual demuxes and assignments for the arbitration + // as mst_reqs_o has to be drivem from the same always comb block! + always_comb begin + // default assignments + mst_reqs_o = '0; + slv_w_ready = 1'b0; + w_fifo_pop = 1'b0; + + for (int unsigned i = 0; i < NoMstPorts; i++) begin + // AW channel + mst_reqs_o[i].aw = slv_aw_chan_select.aw_chan; + mst_reqs_o[i].aw_valid = 1'b0; + if (aw_valid && (slv_aw_chan_select.aw_select == i)) begin + mst_reqs_o[i].aw_valid = 1'b1; + end + + // W channel + mst_reqs_o[i].w = slv_w_chan; + mst_reqs_o[i].w_valid = 1'b0; + if (!w_fifo_empty && (w_select == i)) begin + mst_reqs_o[i].w_valid = slv_w_valid; + slv_w_ready = mst_resps_i[i].w_ready; + w_fifo_pop = slv_w_valid & mst_resps_i[i].w_ready & slv_w_chan.last; + end + + // B channel + mst_reqs_o[i].b_ready = mst_b_readies[i]; + + // AR channel + mst_reqs_o[i].ar = slv_ar_chan_select.ar_chan; + mst_reqs_o[i].ar_valid = 1'b0; + if (ar_valid && (slv_ar_chan_select.ar_select == i)) begin + mst_reqs_o[i].ar_valid = 1'b1; + end + + // R channel + mst_reqs_o[i].r_ready = mst_r_readies[i]; + end + end + // unpack the response B and R channels for the arbitration + for (genvar i = 0; i < NoMstPorts; i++) begin : gen_b_channels + assign mst_b_chans[i] = mst_resps_i[i].b; + assign mst_b_valids[i] = mst_resps_i[i].b_valid; + assign mst_r_chans[i] = mst_resps_i[i].r; + assign mst_r_valids[i] = mst_resps_i[i].r_valid; + end + + +// Validate parameters. +// pragma translate_off +`ifndef VERILATOR +`ifndef XSIM + initial begin: validate_params + no_mst_ports: assume (NoMstPorts > 0) else + $fatal(1, "The Number of slaves (NoMstPorts) has to be at least 1"); + AXI_ID_BITS: assume (AxiIdWidth >= AxiLookBits) else + $fatal(1, "AxiIdBits has to be equal or smaller than AxiIdWidth."); + end + default disable iff (!rst_ni); + aw_select: assume property( @(posedge clk_i) (slv_req_i.aw_valid |-> + (slv_aw_select_i < NoMstPorts))) else + $fatal(1, "slv_aw_select_i is %d: AW has selected a slave that is not defined.\ + NoMstPorts: %d", slv_aw_select_i, NoMstPorts); + ar_select: assume property( @(posedge clk_i) (slv_req_i.ar_valid |-> + (slv_ar_select_i < NoMstPorts))) else + $fatal(1, "slv_ar_select_i is %d: AR has selected a slave that is not defined.\ + NoMstPorts: %d", slv_ar_select_i, NoMstPorts); + aw_valid_stable: assert property( @(posedge clk_i) (aw_valid && !aw_ready) |=> aw_valid) else + $fatal(1, "aw_valid was deasserted, when aw_ready = 0 in last cycle."); + ar_valid_stable: assert property( @(posedge clk_i) + (ar_valid && !ar_ready) |=> ar_valid) else + $fatal(1, "ar_valid was deasserted, when ar_ready = 0 in last cycle."); + aw_stable: assert property( @(posedge clk_i) (aw_valid && !aw_ready) + |=> $stable(slv_aw_chan_select)) else + $fatal(1, "slv_aw_chan_select unstable with valid set."); + ar_stable: assert property( @(posedge clk_i) (ar_valid && !ar_ready) + |=> $stable(slv_ar_chan_select)) else + $fatal(1, "slv_aw_chan_select unstable with valid set."); + internal_ar_select: assert property( @(posedge clk_i) + (ar_valid |-> slv_ar_chan_select.ar_select < NoMstPorts)) + else $fatal(1, "slv_ar_chan_select.ar_select illegal while ar_valid."); + internal_aw_select: assert property( @(posedge clk_i) + (aw_valid |-> slv_aw_chan_select.aw_select < NoMstPorts)) + else $fatal(1, "slv_aw_chan_select.aw_select illegal while aw_valid."); +`endif +`endif +// pragma translate_on + end +endmodule + +module axi_demux_id_counters #( + // the lower bits of the AXI ID that should be considered, results in 2**AXI_ID_BITS counters + parameter int unsigned AxiIdBits = 2, + parameter int unsigned CounterWidth = 4, + parameter type mst_port_select_t = logic +) ( + input clk_i, // Clock + input rst_ni, // Asynchronous reset active low + // lookup + input logic [AxiIdBits-1:0] lookup_axi_id_i, + output mst_port_select_t lookup_mst_select_o, + output logic lookup_mst_select_occupied_o, + // push + output logic full_o, + input logic [AxiIdBits-1:0] push_axi_id_i, + input mst_port_select_t push_mst_select_i, + input logic push_i, + // inject ATOPs in AR channel + input logic [AxiIdBits-1:0] inject_axi_id_i, + input logic inject_i, + // pop + input logic [AxiIdBits-1:0] pop_axi_id_i, + input logic pop_i +); + localparam int unsigned NoCounters = 2**AxiIdBits; + typedef logic [CounterWidth-1:0] cnt_t; + + // registers, each gets loaded when push_en[i] + mst_port_select_t [NoCounters-1:0] mst_select_q; + + // counter signals + logic [NoCounters-1:0] push_en, inject_en, pop_en, occupied, cnt_full; + + //----------------------------------- + // Lookup + //----------------------------------- + assign lookup_mst_select_o = mst_select_q[lookup_axi_id_i]; + assign lookup_mst_select_occupied_o = occupied[lookup_axi_id_i]; + //----------------------------------- + // Push and Pop + //----------------------------------- + assign push_en = (push_i) ? (1 << push_axi_id_i) : '0; + assign inject_en = (inject_i) ? (1 << inject_axi_id_i) : '0; + assign pop_en = (pop_i) ? (1 << pop_axi_id_i) : '0; + assign full_o = |cnt_full; + // counters + for (genvar i = 0; i < NoCounters; i++) begin : gen_counters + logic cnt_en, cnt_down, overflow; + cnt_t cnt_delta, in_flight; + always_comb begin + unique case ({push_en[i], inject_en[i], pop_en[i]}) + 3'b001 : begin // pop_i = -1 + cnt_en = 1'b1; + cnt_down = 1'b1; + cnt_delta = cnt_t'(1); + end + 3'b010 : begin // inject_i = +1 + cnt_en = 1'b1; + cnt_down = 1'b0; + cnt_delta = cnt_t'(1); + end + // 3'b011, inject_i & pop_i = 0 --> use default + 3'b100 : begin // push_i = +1 + cnt_en = 1'b1; + cnt_down = 1'b0; + cnt_delta = cnt_t'(1); + end + // 3'b101, push_i & pop_i = 0 --> use default + 3'b110 : begin // push_i & inject_i = +2 + cnt_en = 1'b1; + cnt_down = 1'b0; + cnt_delta = cnt_t'(2); + end + 3'b111 : begin // push_i & inject_i & pop_i = +1 + cnt_en = 1'b1; + cnt_down = 1'b0; + cnt_delta = cnt_t'(1); + end + default : begin // do nothing to the counters + cnt_en = 1'b0; + cnt_down = 1'b0; + cnt_delta = cnt_t'(0); + end + endcase + end + + delta_counter #( + .WIDTH ( CounterWidth ), + .STICKY_OVERFLOW ( 1'b0 ) + ) i_in_flight_cnt ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( 1'b0 ), + .en_i ( cnt_en ), + .load_i ( 1'b0 ), + .down_i ( cnt_down ), + .delta_i ( cnt_delta ), + .d_i ( '0 ), + .q_o ( in_flight ), + .overflow_o ( overflow ) + ); + assign occupied[i] = |in_flight; + assign cnt_full[i] = overflow | (&in_flight); + + // holds the selection signal for this id + `FFLARN(mst_select_q[i], push_mst_select_i, push_en[i], '0, clk_i, rst_ni) + +// pragma translate_off +`ifndef VERILATOR +`ifndef XSIM + // Validate parameters. + cnt_underflow: assert property( + @(posedge clk_i) disable iff (~rst_ni) (pop_en[i] |=> !overflow)) else + $fatal(1, "axi_demux_id_counters > Counter: %0d underflowed.\ + The reason is probably a faulty AXI response.", i); +`endif +`endif +// pragma translate_on + end +endmodule + +// interface wrapper +`include "axi/assign.svh" +`include "axi/typedef.svh" +module axi_demux_intf #( + parameter int unsigned AXI_ID_WIDTH = 32'd0, // Synopsys DC requires default value for params + parameter int unsigned AXI_ADDR_WIDTH = 32'd0, + parameter int unsigned AXI_DATA_WIDTH = 32'd0, + parameter int unsigned AXI_USER_WIDTH = 32'd0, + parameter int unsigned NO_MST_PORTS = 32'd3, + parameter int unsigned MAX_TRANS = 32'd8, + parameter int unsigned AXI_LOOK_BITS = 32'd3, + parameter bit UNIQUE_IDS = 1'b0, + parameter bit FALL_THROUGH = 1'b0, + parameter bit SPILL_AW = 1'b1, + parameter bit SPILL_W = 1'b0, + parameter bit SPILL_B = 1'b0, + parameter bit SPILL_AR = 1'b1, + parameter bit SPILL_R = 1'b0, + // Dependent parameters, DO NOT OVERRIDE! + parameter int unsigned SELECT_WIDTH = (NO_MST_PORTS > 32'd1) ? $clog2(NO_MST_PORTS) : 32'd1, + parameter type select_t = logic [SELECT_WIDTH-1:0] // MST port select type +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + input select_t slv_aw_select_i, // has to be stable, when aw_valid + input select_t slv_ar_select_i, // has to be stable, when ar_valid + AXI_BUS.Slave slv, // slave port + AXI_BUS.Master mst [NO_MST_PORTS-1:0] // master ports +); + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req; + resp_t slv_resp; + req_t [NO_MST_PORTS-1:0] mst_req; + resp_t [NO_MST_PORTS-1:0] mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, slv) + `AXI_ASSIGN_FROM_RESP(slv, slv_resp) + + for (genvar i = 0; i < NO_MST_PORTS; i++) begin : gen_assign_mst_ports + `AXI_ASSIGN_FROM_REQ(mst[i], mst_req[i]) + `AXI_ASSIGN_TO_RESP(mst_resp[i], mst[i]) + end + + axi_demux #( + .AxiIdWidth ( AXI_ID_WIDTH ), // ID Width + .aw_chan_t ( aw_chan_t ), // AW Channel Type + .w_chan_t ( w_chan_t ), // W Channel Type + .b_chan_t ( b_chan_t ), // B Channel Type + .ar_chan_t ( ar_chan_t ), // AR Channel Type + .r_chan_t ( r_chan_t ), // R Channel Type + .req_t ( req_t ), + .resp_t ( resp_t ), + .NoMstPorts ( NO_MST_PORTS ), + .MaxTrans ( MAX_TRANS ), + .AxiLookBits ( AXI_LOOK_BITS ), + .UniqueIds ( UNIQUE_IDS ), + .FallThrough ( FALL_THROUGH ), + .SpillAw ( SPILL_AW ), + .SpillW ( SPILL_W ), + .SpillB ( SPILL_B ), + .SpillAr ( SPILL_AR ), + .SpillR ( SPILL_R ) + ) i_axi_demux ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .test_i, // Testmode enable + // slave port + .slv_req_i ( slv_req ), + .slv_aw_select_i ( slv_aw_select_i ), + .slv_ar_select_i ( slv_ar_select_i ), + .slv_resp_o ( slv_resp ), + // master port + .mst_reqs_o ( mst_req ), + .mst_resps_i ( mst_resp ) + ); +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv new file mode 100644 index 00000000..f3c807dc --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_err_slv.sv @@ -0,0 +1,261 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth +// - Matheus Cavalcante + +// AXI Error Slave: This module always responds with an AXI error for transactions that are sent to +// it. This module optionally supports ATOPs if the `ATOPs` parameter is set. + +module axi_err_slv #( + parameter int unsigned AxiIdWidth = 0, // AXI ID Width + parameter type req_t = logic, // AXI 4 request struct, with atop field + parameter type resp_t = logic, // AXI 4 response struct + parameter axi_pkg::resp_t Resp = axi_pkg::RESP_DECERR, // Error generated by this slave. + parameter int unsigned RespWidth = 32'd64, // Data response width, gets zero extended or truncated to r.data. + parameter logic [RespWidth-1:0] RespData = 64'hCA11AB1EBADCAB1E, // Hexvalue for data return value + parameter bit ATOPs = 1'b1, // Activate support for ATOPs. Set to 1 if this slave could ever get an atomic AXI transaction. + parameter int unsigned MaxTrans = 1 // Maximum # of accepted transactions before stalling +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + // slave port + input req_t slv_req_i, + output resp_t slv_resp_o +); + typedef logic [AxiIdWidth-1:0] id_t; + typedef struct packed { + id_t id; + axi_pkg::len_t len; + } r_data_t; + + req_t err_req; + resp_t err_resp; + + if (ATOPs) begin + axi_atop_filter #( + .AxiIdWidth ( AxiIdWidth ), + .AxiMaxWriteTxns ( MaxTrans ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_atop_filter ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req_i ), + .slv_resp_o ( slv_resp_o ), + .mst_req_o ( err_req ), + .mst_resp_i ( err_resp ) + ); + end else begin + assign err_req = slv_req_i; + assign slv_resp_o = err_resp; + end + + // w fifo + logic w_fifo_full, w_fifo_empty; + logic w_fifo_push, w_fifo_pop; + id_t w_fifo_data; + // b fifo + logic b_fifo_full, b_fifo_empty; + logic b_fifo_push, b_fifo_pop; + id_t b_fifo_data; + // r fifo + r_data_t r_fifo_inp; + logic r_fifo_full, r_fifo_empty; + logic r_fifo_push, r_fifo_pop; + r_data_t r_fifo_data; + // r counter + logic r_cnt_clear, r_cnt_en, r_cnt_load; + axi_pkg::len_t r_current_beat; + // r status + logic r_busy_d, r_busy_q, r_busy_load; + + //-------------------------------------- + // Write Transactions + //-------------------------------------- + // push, when there is room in the fifo + assign w_fifo_push = err_req.aw_valid & ~w_fifo_full; + assign err_resp.aw_ready = ~w_fifo_full; + + fifo_v3 #( + .FALL_THROUGH ( 1'b1 ), + .DEPTH ( MaxTrans ), + .dtype ( id_t ) + ) i_w_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( test_i ), + .full_o ( w_fifo_full ), + .empty_o ( w_fifo_empty ), + .usage_o ( ), + .data_i ( err_req.aw.id ), + .push_i ( w_fifo_push ), + .data_o ( w_fifo_data ), + .pop_i ( w_fifo_pop ) + ); + + always_comb begin : proc_w_channel + err_resp.w_ready = 1'b0; + w_fifo_pop = 1'b0; + b_fifo_push = 1'b0; + if (!w_fifo_empty && !b_fifo_full) begin + // eat the beats + err_resp.w_ready = 1'b1; + // on the last w transaction + if (err_req.w_valid && err_req.w.last) begin + w_fifo_pop = 1'b1; + b_fifo_push = 1'b1; + end + end + end + + fifo_v3 #( + .FALL_THROUGH ( 1'b0 ), + .DEPTH ( unsigned'(2) ), // two placed so that w can eat beats if b is not sent + .dtype ( id_t ) + ) i_b_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( test_i ), + .full_o ( b_fifo_full ), + .empty_o ( b_fifo_empty ), + .usage_o ( ), + .data_i ( w_fifo_data ), + .push_i ( b_fifo_push ), + .data_o ( b_fifo_data ), + .pop_i ( b_fifo_pop ) + ); + + always_comb begin : proc_b_channel + b_fifo_pop = 1'b0; + err_resp.b = '0; + err_resp.b.id = b_fifo_data; + err_resp.b.resp = Resp; + err_resp.b_valid = 1'b0; + if (!b_fifo_empty) begin + err_resp.b_valid = 1'b1; + // b transaction + b_fifo_pop = err_req.b_ready; + end + end + + //-------------------------------------- + // Read Transactions + //-------------------------------------- + // push if there is room in the fifo + assign r_fifo_push = err_req.ar_valid & ~r_fifo_full; + assign err_resp.ar_ready = ~r_fifo_full; + + // fifo data assignment + assign r_fifo_inp.id = err_req.ar.id; + assign r_fifo_inp.len = err_req.ar.len; + + fifo_v3 #( + .FALL_THROUGH ( 1'b0 ), + .DEPTH ( MaxTrans ), + .dtype ( r_data_t ) + ) i_r_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( r_fifo_full ), + .empty_o ( r_fifo_empty ), + .usage_o ( ), + .data_i ( r_fifo_inp ), + .push_i ( r_fifo_push ), + .data_o ( r_fifo_data ), + .pop_i ( r_fifo_pop ) + ); + + always_comb begin : proc_r_channel + // default assignments + r_busy_d = r_busy_q; + r_busy_load = 1'b0; + // r fifo signals + r_fifo_pop = 1'b0; + // r counter signals + r_cnt_clear = 1'b0; + r_cnt_en = 1'b0; + r_cnt_load = 1'b0; + // r_channel + err_resp.r = '0; + err_resp.r.id = r_fifo_data.id; + err_resp.r.data = RespData; + err_resp.r.resp = Resp; + err_resp.r_valid = 1'b0; + // control + if (r_busy_q) begin + err_resp.r_valid = 1'b1; + err_resp.r.last = (r_current_beat == '0); + // r transaction + if (err_req.r_ready) begin + r_cnt_en = 1'b1; + if (r_current_beat == '0) begin + r_busy_d = 1'b0; + r_busy_load = 1'b1; + r_cnt_clear = 1'b1; + r_fifo_pop = 1'b1; + end + end + end else begin + // when not busy and fifo not empty, start counter err gen + if (!r_fifo_empty) begin + r_busy_d = 1'b1; + r_busy_load = 1'b1; + r_cnt_load = 1'b1; + end + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + r_busy_q <= '0; + end else if (r_busy_load) begin + r_busy_q <= r_busy_d; + end + end + + counter #( + .WIDTH ($bits(axi_pkg::len_t)) + ) i_r_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( r_cnt_clear ), + .en_i ( r_cnt_en ), + .load_i ( r_cnt_load ), + .down_i ( 1'b1 ), + .d_i ( r_fifo_data.len ), + .q_o ( r_current_beat ), + .overflow_o( ) + ); + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + initial begin + assert (Resp == axi_pkg::RESP_DECERR || Resp == axi_pkg::RESP_SLVERR) else + $fatal(1, "This module may only generate RESP_DECERR or RESP_SLVERR responses!"); + end + default disable iff (!rst_ni); + if (!ATOPs) begin : gen_assert_atops_unsupported + assume property( @(posedge clk_i) (slv_req_i.aw_valid |-> slv_req_i.aw.atop == '0)) else + $fatal(1, "Got ATOP but not configured to support ATOPs!"); + end + `endif + `endif + // pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv new file mode 100644 index 00000000..e9359b90 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_id_prepend.sv @@ -0,0 +1,161 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth + +// AXI ID Prepend: This module prepends/strips the MSB from the AXI IDs. +// Constraints enforced through assertions: ID width of slave and master port + +module axi_id_prepend #( + parameter int unsigned NoBus = 1, // Can take multiple axi busses + parameter int unsigned AxiIdWidthSlvPort = 4, // AXI ID Width of the Slave Ports + parameter int unsigned AxiIdWidthMstPort = 6, // AXI ID Width of the Master Ports + parameter type slv_aw_chan_t = logic, // AW Channel Type for slv port + parameter type slv_w_chan_t = logic, // W Channel Type for slv port + parameter type slv_b_chan_t = logic, // B Channel Type for slv port + parameter type slv_ar_chan_t = logic, // AR Channel Type for slv port + parameter type slv_r_chan_t = logic, // R Channel Type for slv port + parameter type mst_aw_chan_t = logic, // AW Channel Type for mst port + parameter type mst_w_chan_t = logic, // W Channel Type for mst port + parameter type mst_b_chan_t = logic, // B Channel Type for mst port + parameter type mst_ar_chan_t = logic, // AR Channel Type for mst port + parameter type mst_r_chan_t = logic, // R Channel Type for mst port + // DEPENDENT PARAMETER DO NOT OVERWRITE! + parameter int unsigned PreIdWidth = AxiIdWidthMstPort - AxiIdWidthSlvPort +) ( + input logic [PreIdWidth-1:0] pre_id_i, // ID to be prepended + // slave port (input), connect master modules here + // AW channel + input slv_aw_chan_t [NoBus-1:0] slv_aw_chans_i, + input logic [NoBus-1:0] slv_aw_valids_i, + output logic [NoBus-1:0] slv_aw_readies_o, + // W channel + input slv_w_chan_t [NoBus-1:0] slv_w_chans_i, + input logic [NoBus-1:0] slv_w_valids_i, + output logic [NoBus-1:0] slv_w_readies_o, + // B channel + output slv_b_chan_t [NoBus-1:0] slv_b_chans_o, + output logic [NoBus-1:0] slv_b_valids_o, + input logic [NoBus-1:0] slv_b_readies_i, + // AR channel + input slv_ar_chan_t [NoBus-1:0] slv_ar_chans_i, + input logic [NoBus-1:0] slv_ar_valids_i, + output logic [NoBus-1:0] slv_ar_readies_o, + // R channel + output slv_r_chan_t [NoBus-1:0] slv_r_chans_o, + output logic [NoBus-1:0] slv_r_valids_o, + input logic [NoBus-1:0] slv_r_readies_i, + // master ports (output), connect slave modules here + // AW channel + output mst_aw_chan_t [NoBus-1:0] mst_aw_chans_o, + output logic [NoBus-1:0] mst_aw_valids_o, + input logic [NoBus-1:0] mst_aw_readies_i, + // W channel + output mst_w_chan_t [NoBus-1:0] mst_w_chans_o, + output logic [NoBus-1:0] mst_w_valids_o, + input logic [NoBus-1:0] mst_w_readies_i, + // B channel + input mst_b_chan_t [NoBus-1:0] mst_b_chans_i, + input logic [NoBus-1:0] mst_b_valids_i, + output logic [NoBus-1:0] mst_b_readies_o, + // AR channel + output mst_ar_chan_t [NoBus-1:0] mst_ar_chans_o, + output logic [NoBus-1:0] mst_ar_valids_o, + input logic [NoBus-1:0] mst_ar_readies_i, + // R channel + input mst_r_chan_t [NoBus-1:0] mst_r_chans_i, + input logic [NoBus-1:0] mst_r_valids_i, + output logic [NoBus-1:0] mst_r_readies_o +); + + // prepend the ID + for (genvar i = 0; i < NoBus; i++) begin : gen_id_prepend + if (PreIdWidth == 0) begin : gen_no_prepend + assign mst_aw_chans_o[i] = slv_aw_chans_i[i]; + assign mst_ar_chans_o[i] = slv_ar_chans_i[i]; + end else begin : gen_prepend + always_comb begin + mst_aw_chans_o[i] = slv_aw_chans_i[i]; + mst_ar_chans_o[i] = slv_ar_chans_i[i]; + mst_aw_chans_o[i].id = {pre_id_i, slv_aw_chans_i[i].id[AxiIdWidthSlvPort-1:0]}; + mst_ar_chans_o[i].id = {pre_id_i, slv_ar_chans_i[i].id[AxiIdWidthSlvPort-1:0]}; + end + end + // The ID is in the highest bits of the struct, so an assignment from a channel with a wide ID + // to a channel with a shorter ID correctly cuts the prepended ID. + assign slv_b_chans_o[i] = mst_b_chans_i[i]; + assign slv_r_chans_o[i] = mst_r_chans_i[i]; + end + + // assign the handshaking's and w channel + assign mst_w_chans_o = slv_w_chans_i; + assign mst_aw_valids_o = slv_aw_valids_i; + assign slv_aw_readies_o = mst_aw_readies_i; + assign mst_w_valids_o = slv_w_valids_i; + assign slv_w_readies_o = mst_w_readies_i; + assign slv_b_valids_o = mst_b_valids_i; + assign mst_b_readies_o = slv_b_readies_i; + assign mst_ar_valids_o = slv_ar_valids_i; + assign slv_ar_readies_o = mst_ar_readies_i; + assign slv_r_valids_o = mst_r_valids_i; + assign mst_r_readies_o = slv_r_readies_i; + +// pragma translate_off +`ifndef VERILATOR + initial begin : p_assert + assert(NoBus > 0) + else $fatal(1, "Input must be at least one element wide."); + assert(PreIdWidth == ($bits(mst_aw_chans_o[0].id) - $bits(slv_aw_chans_i[0].id))) + else $fatal(1, "Prepend ID Width must be: $bits(mst_aw_chans_o.id)-$bits(slv_aw_chans_i.id)"); + assert ($bits(mst_aw_chans_o[0].id) > $bits(slv_aw_chans_i[0].id)) + else $fatal(1, "The master AXI port has to have a wider ID than the slave port."); + end + + aw_id : assert final( + mst_aw_chans_o[0].id[$bits(slv_aw_chans_i[0].id)-1:0] === slv_aw_chans_i[0].id) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + aw_addr : assert final(mst_aw_chans_o[0].addr === slv_aw_chans_i[0].addr) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + aw_len : assert final(mst_aw_chans_o[0].len === slv_aw_chans_i[0].len) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + aw_size : assert final(mst_aw_chans_o[0].size === slv_aw_chans_i[0].size) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + aw_qos : assert final(mst_aw_chans_o[0].qos === slv_aw_chans_i[0].qos) + else $fatal (1, "Something with the AW channel ID prepending went wrong."); + + b_id : assert final( + mst_b_chans_i[0].id[$bits(slv_b_chans_o[0].id)-1:0] === slv_b_chans_o[0].id) + else $fatal (1, "Something with the B channel ID stripping went wrong."); + b_resp : assert final(mst_b_chans_i[0].resp === slv_b_chans_o[0].resp) + else $fatal (1, "Something with the B channel ID stripping went wrong."); + + ar_id : assert final( + mst_ar_chans_o[0].id[$bits(slv_ar_chans_i[0].id)-1:0] === slv_ar_chans_i[0].id) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + ar_addr : assert final(mst_ar_chans_o[0].addr === slv_ar_chans_i[0].addr) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + ar_len : assert final(mst_ar_chans_o[0].len === slv_ar_chans_i[0].len) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + ar_size : assert final(mst_ar_chans_o[0].size === slv_ar_chans_i[0].size) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + ar_qos : assert final(mst_ar_chans_o[0].qos === slv_ar_chans_i[0].qos) + else $fatal (1, "Something with the AR channel ID prepending went wrong."); + + r_id : assert final(mst_r_chans_i[0].id[$bits(slv_r_chans_o[0].id)-1:0] === slv_r_chans_o[0].id) + else $fatal (1, "Something with the R channel ID stripping went wrong."); + r_data : assert final(mst_r_chans_i[0].data === slv_r_chans_o[0].data) + else $fatal (1, "Something with the R channel ID stripping went wrong."); + r_resp : assert final(mst_r_chans_i[0].resp === slv_r_chans_o[0].resp) + else $fatal (1, "Something with the R channel ID stripping went wrong."); +`endif +// pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv new file mode 100644 index 00000000..f15648eb --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_join.sv @@ -0,0 +1,37 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Fabian Schuiki +// - Andreas Kurth + +`include "axi/assign.svh" + +/// A connector that joins two AXI interfaces. +module axi_join_intf ( + AXI_BUS.Slave in, + AXI_BUS.Master out +); + + `AXI_ASSIGN(out, in) + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert(in.AXI_ADDR_WIDTH == out.AXI_ADDR_WIDTH); + assert(in.AXI_DATA_WIDTH == out.AXI_DATA_WIDTH); + assert(in.AXI_ID_WIDTH <= out.AXI_ID_WIDTH ); + assert(in.AXI_USER_WIDTH == out.AXI_USER_WIDTH); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv new file mode 100644 index 00000000..8e5dc2f9 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_multicut.sv @@ -0,0 +1,237 @@ +// Copyright (c) 2014-2019 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Fabian Schuiki +// - Andreas Kurth +// - Stefan Mach + +// Multiple AXI4 cuts. +// +// These can be used to relax timing pressure on very long AXI busses. +module axi_multicut #( + parameter int unsigned NoCuts = 32'd1, // Number of cuts. + // AXI channel structs + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic, + // AXI request & response structs + parameter type req_t = logic, + parameter type resp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // slave port + input req_t slv_req_i, + output resp_t slv_resp_o, + // master port + output req_t mst_req_o, + input resp_t mst_resp_i +); + + if (NoCuts == '0) begin : gen_no_cut + // degenerate case, connect input to output + assign mst_req_o = slv_req_i; + assign slv_resp_o = mst_resp_i; + end else begin : gen_axi_cut + // instantiate all needed cuts + req_t [NoCuts:0] cut_req; + resp_t [NoCuts:0] cut_resp; + + // connect slave to the lowest index + assign cut_req[0] = slv_req_i; + assign slv_resp_o = cut_resp[0]; + + // AXI cuts + for (genvar i = 0; i < NoCuts; i++) begin : gen_axi_cuts + axi_cut #( + .Bypass ( 1'b0 ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_cut ( + .clk_i, + .rst_ni, + .slv_req_i ( cut_req[i] ), + .slv_resp_o ( cut_resp[i] ), + .mst_req_o ( cut_req[i+1] ), + .mst_resp_i ( cut_resp[i+1] ) + ); + end + + // connect master to the highest index + assign mst_req_o = cut_req[NoCuts]; + assign cut_resp[NoCuts] = mst_resp_i; + end + + // Check the invariants + // pragma translate_off + `ifndef VERILATOR + initial begin + assert(NoCuts >= 0); + end + `endif + // pragma translate_on +endmodule + +`include "axi/assign.svh" +`include "axi/typedef.svh" + +// interface wrapper +module axi_multicut_intf #( + parameter int unsigned ADDR_WIDTH = 0, // The address width. + parameter int unsigned DATA_WIDTH = 0, // The data width. + parameter int unsigned ID_WIDTH = 0, // The ID width. + parameter int unsigned USER_WIDTH = 0, // The user data width. + parameter int unsigned NUM_CUTS = 0 // The number of cuts. +) ( + input logic clk_i, + input logic rst_ni, + AXI_BUS.Slave in, + AXI_BUS.Master out +); + + typedef logic [ID_WIDTH-1:0] id_t; + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + typedef logic [USER_WIDTH-1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_ASSIGN_TO_REQ(slv_req, in) + `AXI_ASSIGN_FROM_RESP(in, slv_resp) + + `AXI_ASSIGN_FROM_REQ(out, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, out) + + axi_multicut #( + .NoCuts ( NUM_CUTS ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_multicut ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter"); + assert (ID_WIDTH > 0) else $fatal(1, "Wrong id width parameter"); + assert (USER_WIDTH > 0) else $fatal(1, "Wrong user width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ID_WIDTH == ID_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_USER_WIDTH == USER_WIDTH) else $fatal(1, "Wrong interface definition"); + end + `endif + // pragma translate_on +endmodule + +module axi_lite_multicut_intf #( + // The address width. + parameter int unsigned ADDR_WIDTH = 0, + // The data width. + parameter int unsigned DATA_WIDTH = 0, + // The number of cuts. + parameter int unsigned NUM_CUTS = 0 +) ( + input logic clk_i , + input logic rst_ni , + AXI_LITE.Slave in , + AXI_LITE.Master out +); + + typedef logic [ADDR_WIDTH-1:0] addr_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + + `AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t) + `AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t) + `AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_t) + `AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t) + `AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_t, data_t) + `AXI_LITE_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_LITE_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + + req_t slv_req, mst_req; + resp_t slv_resp, mst_resp; + + `AXI_LITE_ASSIGN_TO_REQ(slv_req, in) + `AXI_LITE_ASSIGN_FROM_RESP(in, slv_resp) + + `AXI_LITE_ASSIGN_FROM_REQ(out, mst_req) + `AXI_LITE_ASSIGN_TO_RESP(mst_resp, out) + + axi_multicut #( + .NoCuts ( NUM_CUTS ), + .aw_chan_t ( aw_chan_t ), + .w_chan_t ( w_chan_t ), + .b_chan_t ( b_chan_t ), + .ar_chan_t ( ar_chan_t ), + .r_chan_t ( r_chan_t ), + .req_t ( req_t ), + .resp_t ( resp_t ) + ) i_axi_multicut ( + .clk_i, + .rst_ni, + .slv_req_i ( slv_req ), + .slv_resp_o ( slv_resp ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); + + // Check the invariants. + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ADDR_WIDTH > 0) else $fatal(1, "Wrong addr width parameter"); + assert (DATA_WIDTH > 0) else $fatal(1, "Wrong data width parameter"); + assert (in.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (in.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_ADDR_WIDTH == ADDR_WIDTH) else $fatal(1, "Wrong interface definition"); + assert (out.AXI_DATA_WIDTH == DATA_WIDTH) else $fatal(1, "Wrong interface definition"); + end + `endif + // pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv new file mode 100644 index 00000000..59ee3ec4 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_mux.sv @@ -0,0 +1,522 @@ +// Copyright (c) 2019 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth + +// AXI Multiplexer: This module multiplexes the AXI4 slave ports down to one master port. +// The AXI IDs from the slave ports get extended with the respective slave port index. +// The extension width can be calculated with `$clog2(NoSlvPorts)`. This means the AXI +// ID for the master port has to be this `$clog2(NoSlvPorts)` wider than the ID for the +// slave ports. +// Responses are switched based on these bits. For example, with 4 slave ports +// a response with ID `6'b100110` will be forwarded to slave port 2 (`2'b10`). + +// register macros +`include "common_cells/registers.svh" + +module axi_mux #( + // AXI parameter and channel types + parameter int unsigned SlvAxiIDWidth = 32'd0, // AXI ID width, slave ports + parameter type slv_aw_chan_t = logic, // AW Channel Type, slave ports + parameter type mst_aw_chan_t = logic, // AW Channel Type, master port + parameter type w_chan_t = logic, // W Channel Type, all ports + parameter type slv_b_chan_t = logic, // B Channel Type, slave ports + parameter type mst_b_chan_t = logic, // B Channel Type, master port + parameter type slv_ar_chan_t = logic, // AR Channel Type, slave ports + parameter type mst_ar_chan_t = logic, // AR Channel Type, master port + parameter type slv_r_chan_t = logic, // R Channel Type, slave ports + parameter type mst_r_chan_t = logic, // R Channel Type, master port + parameter type slv_req_t = logic, // Slave port request type + parameter type slv_resp_t = logic, // Slave port response type + parameter type mst_req_t = logic, // Master ports request type + parameter type mst_resp_t = logic, // Master ports response type + parameter int unsigned NoSlvPorts = 32'd0, // Number of slave ports + // Maximum number of outstanding transactions per write + parameter int unsigned MaxWTrans = 32'd8, + // If enabled, this multiplexer is purely combinatorial + parameter bit FallThrough = 1'b0, + // add spill register on write master ports, adds a cycle latency on write channels + parameter bit SpillAw = 1'b1, + parameter bit SpillW = 1'b0, + parameter bit SpillB = 1'b0, + // add spill register on read master ports, adds a cycle latency on read channels + parameter bit SpillAr = 1'b1, + parameter bit SpillR = 1'b0 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Test Mode enable + // slave ports (AXI inputs), connect master modules here + input slv_req_t [NoSlvPorts-1:0] slv_reqs_i, + output slv_resp_t [NoSlvPorts-1:0] slv_resps_o, + // master port (AXI outputs), connect slave modules here + output mst_req_t mst_req_o, + input mst_resp_t mst_resp_i +); + + localparam int unsigned MstIdxBits = $clog2(NoSlvPorts); + localparam int unsigned MstAxiIDWidth = SlvAxiIDWidth + MstIdxBits; + + // pass through if only one slave port + if (NoSlvPorts == 32'h1) begin : gen_no_mux + assign mst_req_o = slv_reqs_i[0]; + assign slv_resps_o[0] = mst_resp_i; + // other non degenerate cases + end else begin : gen_mux + + typedef logic [MstIdxBits-1:0] switch_id_t; + + // AXI channels between the ID prepend unit and the rest of the multiplexer + mst_aw_chan_t [NoSlvPorts-1:0] slv_aw_chans; + logic [NoSlvPorts-1:0] slv_aw_valids, slv_aw_readies; + w_chan_t [NoSlvPorts-1:0] slv_w_chans; + logic [NoSlvPorts-1:0] slv_w_valids, slv_w_readies; + mst_b_chan_t [NoSlvPorts-1:0] slv_b_chans; + logic [NoSlvPorts-1:0] slv_b_valids, slv_b_readies; + mst_ar_chan_t [NoSlvPorts-1:0] slv_ar_chans; + logic [NoSlvPorts-1:0] slv_ar_valids, slv_ar_readies; + mst_r_chan_t [NoSlvPorts-1:0] slv_r_chans; + logic [NoSlvPorts-1:0] slv_r_valids, slv_r_readies; + + // These signals are all ID prepended + // AW channel + mst_aw_chan_t mst_aw_chan; + logic mst_aw_valid, mst_aw_ready; + + // AW master handshake internal, so that we are able to stall, if w_fifo is full + logic aw_valid, aw_ready; + + // FF to lock the AW valid signal, when a new arbitration decision is made the decision + // gets pushed into the W FIFO, when it now stalls prevent subsequent pushing + // This FF removes AW to W dependency + logic lock_aw_valid_d, lock_aw_valid_q; + logic load_aw_lock; + + // signals for the FIFO that holds the last switching decision of the AW channel + logic w_fifo_full, w_fifo_empty; + logic w_fifo_push, w_fifo_pop; + switch_id_t w_fifo_data; + + // W channel spill reg + w_chan_t mst_w_chan; + logic mst_w_valid, mst_w_ready; + + // master ID in the b_id + switch_id_t switch_b_id; + + // B channel spill reg + mst_b_chan_t mst_b_chan; + logic mst_b_valid; + + // AR channel for when spill is enabled + mst_ar_chan_t mst_ar_chan; + logic ar_valid, ar_ready; + + // master ID in the r_id + switch_id_t switch_r_id; + + // R channel spill reg + mst_r_chan_t mst_r_chan; + logic mst_r_valid; + + //-------------------------------------- + // ID prepend for all slave ports + //-------------------------------------- + for (genvar i = 0; i < NoSlvPorts; i++) begin : gen_id_prepend + axi_id_prepend #( + .NoBus ( 32'd1 ), // one AXI bus per slave port + .AxiIdWidthSlvPort( SlvAxiIDWidth ), + .AxiIdWidthMstPort( MstAxiIDWidth ), + .slv_aw_chan_t ( slv_aw_chan_t ), + .slv_w_chan_t ( w_chan_t ), + .slv_b_chan_t ( slv_b_chan_t ), + .slv_ar_chan_t ( slv_ar_chan_t ), + .slv_r_chan_t ( slv_r_chan_t ), + .mst_aw_chan_t ( mst_aw_chan_t ), + .mst_w_chan_t ( w_chan_t ), + .mst_b_chan_t ( mst_b_chan_t ), + .mst_ar_chan_t ( mst_ar_chan_t ), + .mst_r_chan_t ( mst_r_chan_t ) + ) i_id_prepend ( + .pre_id_i ( switch_id_t'(i) ), + .slv_aw_chans_i ( slv_reqs_i[i].aw ), + .slv_aw_valids_i ( slv_reqs_i[i].aw_valid ), + .slv_aw_readies_o ( slv_resps_o[i].aw_ready ), + .slv_w_chans_i ( slv_reqs_i[i].w ), + .slv_w_valids_i ( slv_reqs_i[i].w_valid ), + .slv_w_readies_o ( slv_resps_o[i].w_ready ), + .slv_b_chans_o ( slv_resps_o[i].b ), + .slv_b_valids_o ( slv_resps_o[i].b_valid ), + .slv_b_readies_i ( slv_reqs_i[i].b_ready ), + .slv_ar_chans_i ( slv_reqs_i[i].ar ), + .slv_ar_valids_i ( slv_reqs_i[i].ar_valid ), + .slv_ar_readies_o ( slv_resps_o[i].ar_ready ), + .slv_r_chans_o ( slv_resps_o[i].r ), + .slv_r_valids_o ( slv_resps_o[i].r_valid ), + .slv_r_readies_i ( slv_reqs_i[i].r_ready ), + .mst_aw_chans_o ( slv_aw_chans[i] ), + .mst_aw_valids_o ( slv_aw_valids[i] ), + .mst_aw_readies_i ( slv_aw_readies[i] ), + .mst_w_chans_o ( slv_w_chans[i] ), + .mst_w_valids_o ( slv_w_valids[i] ), + .mst_w_readies_i ( slv_w_readies[i] ), + .mst_b_chans_i ( slv_b_chans[i] ), + .mst_b_valids_i ( slv_b_valids[i] ), + .mst_b_readies_o ( slv_b_readies[i] ), + .mst_ar_chans_o ( slv_ar_chans[i] ), + .mst_ar_valids_o ( slv_ar_valids[i] ), + .mst_ar_readies_i ( slv_ar_readies[i] ), + .mst_r_chans_i ( slv_r_chans[i] ), + .mst_r_valids_i ( slv_r_valids[i] ), + .mst_r_readies_o ( slv_r_readies[i] ) + ); + end + + //-------------------------------------- + // AW Channel + //-------------------------------------- + rr_arb_tree #( + .NumIn ( NoSlvPorts ), + .DataType ( mst_aw_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_aw_arbiter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( slv_aw_valids ), + .gnt_o ( slv_aw_readies ), + .data_i ( slv_aw_chans ), + .gnt_i ( aw_ready ), + .req_o ( aw_valid ), + .data_o ( mst_aw_chan ), + .idx_o ( ) + ); + + // control of the AW channel + always_comb begin + // default assignments + lock_aw_valid_d = lock_aw_valid_q; + load_aw_lock = 1'b0; + w_fifo_push = 1'b0; + mst_aw_valid = 1'b0; + aw_ready = 1'b0; + // had a downstream stall, be valid and send the AW along + if (lock_aw_valid_q) begin + mst_aw_valid = 1'b1; + // transaction + if (mst_aw_ready) begin + aw_ready = 1'b1; + lock_aw_valid_d = 1'b0; + load_aw_lock = 1'b1; + end + end else begin + if (!w_fifo_full && aw_valid) begin + mst_aw_valid = 1'b1; + w_fifo_push = 1'b1; + if (mst_aw_ready) begin + aw_ready = 1'b1; + end else begin + // go to lock if transaction not in this cycle + lock_aw_valid_d = 1'b1; + load_aw_lock = 1'b1; + end + end + end + end + + `FFLARN(lock_aw_valid_q, lock_aw_valid_d, load_aw_lock, '0, clk_i, rst_ni) + + fifo_v3 #( + .FALL_THROUGH ( FallThrough ), + .DEPTH ( MaxWTrans ), + .dtype ( switch_id_t ) + ) i_w_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( w_fifo_full ), + .empty_o ( w_fifo_empty ), + .usage_o ( ), + .data_i ( mst_aw_chan.id[SlvAxiIDWidth+:MstIdxBits] ), + .push_i ( w_fifo_push ), + .data_o ( w_fifo_data ), + .pop_i ( w_fifo_pop ) + ); + + spill_register #( + .T ( mst_aw_chan_t ), + .Bypass ( ~SpillAw ) // Param indicated that we want a spill reg + ) i_aw_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_aw_valid ), + .ready_o ( mst_aw_ready ), + .data_i ( mst_aw_chan ), + .valid_o ( mst_req_o.aw_valid ), + .ready_i ( mst_resp_i.aw_ready ), + .data_o ( mst_req_o.aw ) + ); + + //-------------------------------------- + // W Channel + //-------------------------------------- + // multiplexer + assign mst_w_chan = slv_w_chans[w_fifo_data]; + always_comb begin + // default assignments + mst_w_valid = 1'b0; + slv_w_readies = '0; + w_fifo_pop = 1'b0; + // control + if (!w_fifo_empty) begin + // connect the handshake + mst_w_valid = slv_w_valids[w_fifo_data]; + slv_w_readies[w_fifo_data] = mst_w_ready; + // pop FIFO on a last transaction + w_fifo_pop = slv_w_valids[w_fifo_data] & mst_w_ready & mst_w_chan.last; + end + end + + spill_register #( + .T ( w_chan_t ), + .Bypass ( ~SpillW ) + ) i_w_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_w_valid ), + .ready_o ( mst_w_ready ), + .data_i ( mst_w_chan ), + .valid_o ( mst_req_o.w_valid ), + .ready_i ( mst_resp_i.w_ready ), + .data_o ( mst_req_o.w ) + ); + + //-------------------------------------- + // B Channel + //-------------------------------------- + // replicate B channels + assign slv_b_chans = {NoSlvPorts{mst_b_chan}}; + // control B channel handshake + assign switch_b_id = mst_b_chan.id[SlvAxiIDWidth+:MstIdxBits]; + assign slv_b_valids = (mst_b_valid) ? (1 << switch_b_id) : '0; + + spill_register #( + .T ( mst_b_chan_t ), + .Bypass ( ~SpillB ) + ) i_b_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_resp_i.b_valid ), + .ready_o ( mst_req_o.b_ready ), + .data_i ( mst_resp_i.b ), + .valid_o ( mst_b_valid ), + .ready_i ( slv_b_readies[switch_b_id] ), + .data_o ( mst_b_chan ) + ); + + //-------------------------------------- + // AR Channel + //-------------------------------------- + rr_arb_tree #( + .NumIn ( NoSlvPorts ), + .DataType ( mst_ar_chan_t ), + .AxiVldRdy( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_ar_arbiter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i( 1'b0 ), + .rr_i ( '0 ), + .req_i ( slv_ar_valids ), + .gnt_o ( slv_ar_readies ), + .data_i ( slv_ar_chans ), + .gnt_i ( ar_ready ), + .req_o ( ar_valid ), + .data_o ( mst_ar_chan ), + .idx_o ( ) + ); + + spill_register #( + .T ( mst_ar_chan_t ), + .Bypass ( ~SpillAr ) + ) i_ar_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( ar_valid ), + .ready_o ( ar_ready ), + .data_i ( mst_ar_chan ), + .valid_o ( mst_req_o.ar_valid ), + .ready_i ( mst_resp_i.ar_ready ), + .data_o ( mst_req_o.ar ) + ); + + //-------------------------------------- + // R Channel + //-------------------------------------- + // replicate R channels + assign slv_r_chans = {NoSlvPorts{mst_r_chan}}; + // R channel handshake control + assign switch_r_id = mst_r_chan.id[SlvAxiIDWidth+:MstIdxBits]; + assign slv_r_valids = (mst_r_valid) ? (1 << switch_r_id) : '0; + + spill_register #( + .T ( mst_r_chan_t ), + .Bypass ( ~SpillR ) + ) i_r_spill_reg ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .valid_i ( mst_resp_i.r_valid ), + .ready_o ( mst_req_o.r_ready ), + .data_i ( mst_resp_i.r ), + .valid_o ( mst_r_valid ), + .ready_i ( slv_r_readies[switch_r_id] ), + .data_o ( mst_r_chan ) + ); + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert (SlvAxiIDWidth > 0) else $fatal(1, "AXI ID width of slave ports must be non-zero!"); + assert (NoSlvPorts > 0) else $fatal(1, "Number of slave ports must be non-zero!"); + assert (MaxWTrans > 0) + else $fatal(1, "Maximum number of outstanding writes must be non-zero!"); + assert (MstAxiIDWidth >= SlvAxiIDWidth + $clog2(NoSlvPorts)) + else $fatal(1, "AXI ID width of master ports must be wide enough to identify slave ports!"); + // Assert ID widths (one slave is sufficient since they all have the same type). + assert ($unsigned($bits(slv_reqs_i[0].aw.id)) == SlvAxiIDWidth) + else $fatal(1, "ID width of AW channel of slave ports does not match parameter!"); + assert ($unsigned($bits(slv_reqs_i[0].ar.id)) == SlvAxiIDWidth) + else $fatal(1, "ID width of AR channel of slave ports does not match parameter!"); + assert ($unsigned($bits(slv_resps_o[0].b.id)) == SlvAxiIDWidth) + else $fatal(1, "ID width of B channel of slave ports does not match parameter!"); + assert ($unsigned($bits(slv_resps_o[0].r.id)) == SlvAxiIDWidth) + else $fatal(1, "ID width of R channel of slave ports does not match parameter!"); + assert ($unsigned($bits(mst_req_o.aw.id)) == MstAxiIDWidth) + else $fatal(1, "ID width of AW channel of master port is wrong!"); + assert ($unsigned($bits(mst_req_o.ar.id)) == MstAxiIDWidth) + else $fatal(1, "ID width of AR channel of master port is wrong!"); + assert ($unsigned($bits(mst_resp_i.b.id)) == MstAxiIDWidth) + else $fatal(1, "ID width of B channel of master port is wrong!"); + assert ($unsigned($bits(mst_resp_i.r.id)) == MstAxiIDWidth) + else $fatal(1, "ID width of R channel of master port is wrong!"); + end +`endif +// pragma translate_on +endmodule + +// interface wrap +`include "axi/assign.svh" +`include "axi/typedef.svh" +module axi_mux_intf #( + parameter int unsigned SLV_AXI_ID_WIDTH = 32'd0, // Synopsys DC requires default value for params + parameter int unsigned MST_AXI_ID_WIDTH = 32'd0, + parameter int unsigned AXI_ADDR_WIDTH = 32'd0, + parameter int unsigned AXI_DATA_WIDTH = 32'd0, + parameter int unsigned AXI_USER_WIDTH = 32'd0, + parameter int unsigned NO_SLV_PORTS = 32'd0, // Number of slave ports + // Maximum number of outstanding transactions per write + parameter int unsigned MAX_W_TRANS = 32'd8, + // if enabled, this multiplexer is purely combinatorial + parameter bit FALL_THROUGH = 1'b0, + // add spill register on write master ports, adds a cycle latency on write channels + parameter bit SPILL_AW = 1'b1, + parameter bit SPILL_W = 1'b0, + parameter bit SPILL_B = 1'b0, + // add spill register on read master ports, adds a cycle latency on read channels + parameter bit SPILL_AR = 1'b1, + parameter bit SPILL_R = 1'b0 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + AXI_BUS.Slave slv [NO_SLV_PORTS-1:0], // slave ports + AXI_BUS.Master mst // master port +); + + typedef logic [SLV_AXI_ID_WIDTH-1:0] slv_id_t; + typedef logic [MST_AXI_ID_WIDTH-1:0] mst_id_t; + typedef logic [AXI_ADDR_WIDTH -1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + // channels typedef + `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, slv_id_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, mst_id_t, user_t) + + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + + `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, slv_id_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, mst_id_t, user_t) + + `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, slv_id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, mst_id_t, user_t) + + `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, slv_id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, mst_id_t, user_t) + + `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) + `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t) + + `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t) + + slv_req_t [NO_SLV_PORTS-1:0] slv_reqs; + slv_resp_t [NO_SLV_PORTS-1:0] slv_resps; + mst_req_t mst_req; + mst_resp_t mst_resp; + + for (genvar i = 0; i < NO_SLV_PORTS; i++) begin : gen_assign_slv_ports + `AXI_ASSIGN_TO_REQ(slv_reqs[i], slv[i]) + `AXI_ASSIGN_FROM_RESP(slv[i], slv_resps[i]) + end + + `AXI_ASSIGN_FROM_REQ(mst, mst_req) + `AXI_ASSIGN_TO_RESP(mst_resp, mst) + + axi_mux #( + .SlvAxiIDWidth ( SLV_AXI_ID_WIDTH ), + .slv_aw_chan_t ( slv_aw_chan_t ), // AW Channel Type, slave ports + .mst_aw_chan_t ( mst_aw_chan_t ), // AW Channel Type, master port + .w_chan_t ( w_chan_t ), // W Channel Type, all ports + .slv_b_chan_t ( slv_b_chan_t ), // B Channel Type, slave ports + .mst_b_chan_t ( mst_b_chan_t ), // B Channel Type, master port + .slv_ar_chan_t ( slv_ar_chan_t ), // AR Channel Type, slave ports + .mst_ar_chan_t ( mst_ar_chan_t ), // AR Channel Type, master port + .slv_r_chan_t ( slv_r_chan_t ), // R Channel Type, slave ports + .mst_r_chan_t ( mst_r_chan_t ), // R Channel Type, master port + .slv_req_t ( slv_req_t ), + .slv_resp_t ( slv_resp_t ), + .mst_req_t ( mst_req_t ), + .mst_resp_t ( mst_resp_t ), + .NoSlvPorts ( NO_SLV_PORTS ), // Number of slave ports + .MaxWTrans ( MAX_W_TRANS ), + .FallThrough ( FALL_THROUGH ), + .SpillAw ( SPILL_AW ), + .SpillW ( SPILL_W ), + .SpillB ( SPILL_B ), + .SpillAr ( SPILL_AR ), + .SpillR ( SPILL_R ) + ) i_axi_mux ( + .clk_i ( clk_i ), // Clock + .rst_ni ( rst_ni ), // Asynchronous reset active low + .test_i ( test_i ), // Test Mode enable + .slv_reqs_i ( slv_reqs ), + .slv_resps_o ( slv_resps ), + .mst_req_o ( mst_req ), + .mst_resp_i ( mst_resp ) + ); +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv new file mode 100644 index 00000000..92ede558 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_pkg.sv @@ -0,0 +1,423 @@ +// Copyright (c) 2014-2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Florian Zaruba +// - Wolfgang Roenninger +// - Fabian Schuiki +// - Matheus Cavalcante + +//! AXI Package +/// Contains all necessary type definitions, constants, and generally useful functions. +package axi_pkg; + /// AXI Transaction Burst Type. + typedef logic [1:0] burst_t; + /// AXI Transaction Response Type. + typedef logic [1:0] resp_t; + /// AXI Transaction Cacheability Type. + typedef logic [3:0] cache_t; + /// AXI Transaction Protection Type. + typedef logic [2:0] prot_t; + /// AXI Transaction Quality of Service Type. + typedef logic [3:0] qos_t; + /// AXI Transaction Region Type. + typedef logic [3:0] region_t; + /// AXI Transaction Length Type. + typedef logic [7:0] len_t; + /// AXI Transaction Size Type. + typedef logic [2:0] size_t; + /// AXI5 Atomic Operation Type. + typedef logic [5:0] atop_t; // atomic operations + /// AXI5 Non-Secure Address Identifier. + typedef logic [3:0] nsaid_t; + + /// In a fixed burst: + /// - The address is the same for every transfer in the burst. + /// - The byte lanes that are valid are constant for all beats in the burst. However, within + /// those byte lanes, the actual bytes that have `wstrb` asserted can differ for each beat in + /// the burst. + /// This burst type is used for repeated accesses to the same location such as when loading or + /// emptying a FIFO. + localparam BURST_FIXED = 2'b00; + /// In an incrementing burst, the address for each transfer in the burst is an increment of the + /// address for the previous transfer. The increment value depends on the size of the transfer. + /// For example, the address for each transfer in a burst with a size of 4 bytes is the previous + /// address plus four. + /// This burst type is used for accesses to normal sequential memory. + localparam BURST_INCR = 2'b01; + /// A wrapping burst is similar to an incrementing burst, except that the address wraps around to + /// a lower address if an upper address limit is reached. + /// The following restrictions apply to wrapping bursts: + /// - The start address must be aligned to the size of each transfer. + /// - The length of the burst must be 2, 4, 8, or 16 transfers. + localparam BURST_WRAP = 2'b10; + + /// Normal access success. Indicates that a normal access has been successful. Can also indicate + /// that an exclusive access has failed. + localparam RESP_OKAY = 2'b00; + /// Exclusive access okay. Indicates that either the read or write portion of an exclusive access + /// has been successful. + localparam RESP_EXOKAY = 2'b01; + /// Slave error. Used when the access has reached the slave successfully, but the slave wishes to + /// return an error condition to the originating master. + localparam RESP_SLVERR = 2'b10; + /// Decode error. Generated, typically by an interconnect component, to indicate that there is no + /// slave at the transaction address. + localparam RESP_DECERR = 2'b11; + + /// When this bit is asserted, the interconnect, or any component, can delay the transaction + /// reaching its final destination for any number of cycles. + localparam CACHE_BUFFERABLE = 4'b0001; + /// When HIGH, Modifiable indicates that the characteristics of the transaction can be modified. + /// When Modifiable is LOW, the transaction is Non-modifiable. + localparam CACHE_MODIFIABLE = 4'b0010; + /// When this bit is asserted, read allocation of the transaction is recommended but is not + /// mandatory. + localparam CACHE_RD_ALLOC = 4'b0100; + /// When this bit is asserted, write allocation of the transaction is recommended but is not + /// mandatory. + localparam CACHE_WR_ALLOC = 4'b1000; + + /// Maximum number of bytes per burst, as specified by `size` (see Table A3-2). + function automatic shortint unsigned num_bytes(size_t size); + return 1 << size; + endfunction + + /// An overly long address type. + /// It lets us define functions that work generically for shorter addresses. We rely on the + /// synthesizer to optimize the unused bits away. + typedef logic [127:0] largest_addr_t; + + /// Aligned address of burst (see A3-51). + function automatic largest_addr_t aligned_addr(largest_addr_t addr, size_t size); + return (addr >> size) << size; + endfunction + + /// Warp boundary of a `BURST_WRAP` transfer (see A3-51). + /// This is the lowest address accessed within a wrapping burst. + /// This address is aligned to the size and length of the burst. + /// The length of a `BURST_WRAP` has to be 2, 4, 8, or 16 transfers. + function automatic largest_addr_t wrap_boundary (largest_addr_t addr, size_t size, len_t len); + largest_addr_t wrap_addr; + + // pragma translate_off + `ifndef VERILATOR + assume (len == len_t'(4'b1) || len == len_t'(4'b11) || len == len_t'(4'b111) || + len == len_t'(4'b1111)) else + $error("AXI BURST_WRAP with not allowed len of: %0h", len); + `endif + // pragma translate_on + + // In A3-51 the wrap boundary is defined as: + // `Wrap_Boundary = (INT(Start_Address / (Number_Bytes × Burst_Length))) × + // (Number_Bytes × Burst_Length)` + // Whereas the aligned address is defined as: + // `Aligned_Address = (INT(Start_Address / Number_Bytes)) × Number_Bytes` + // This leads to the wrap boundary using the same calculation as the aligned address, difference + // being the additional dependency on the burst length. The addition in the case statement + // is equal to the multiplication with `Burst_Length` as a shift (used by `aligned_addr`) is + // equivalent with multiplication and division by a power of two, which conveniently are the + // only allowed values for `len` of a `BURST_WRAP`. + unique case (len) + 4'b1 : wrap_addr = (addr >> (unsigned'(size) + 1)) << (unsigned'(size) + 1); // multiply `Number_Bytes` by `2` + 4'b11 : wrap_addr = (addr >> (unsigned'(size) + 2)) << (unsigned'(size) + 2); // multiply `Number_Bytes` by `4` + 4'b111 : wrap_addr = (addr >> (unsigned'(size) + 3)) << (unsigned'(size) + 3); // multiply `Number_Bytes` by `8` + 4'b1111 : wrap_addr = (addr >> (unsigned'(size) + 4)) << (unsigned'(size) + 4); // multiply `Number_Bytes` by `16` + default : wrap_addr = '0; + endcase + return wrap_addr; + endfunction + + /// Address of beat (see A3-51). + function automatic largest_addr_t + beat_addr(largest_addr_t addr, size_t size, len_t len, burst_t burst, shortint unsigned i_beat); + largest_addr_t ret_addr = addr; + largest_addr_t wrp_bond = '0; + if (burst == BURST_WRAP) begin + // do not trigger the function if there is no wrapping burst, to prevent assumptions firing + wrp_bond = wrap_boundary(addr, size, len); + end + if (i_beat != 0 && burst != BURST_FIXED) begin + // From A3-51: + // For an INCR burst, and for a WRAP burst for which the address has not wrapped, this + // equation determines the address of any transfer after the first transfer in a burst: + // `Address_N = Aligned_Address + (N – 1) × Number_Bytes` (N counts from 1 to len!) + ret_addr = aligned_addr(addr, size) + i_beat * num_bytes(size); + // From A3-51: + // For a WRAP burst, if Address_N = Wrap_Boundary + (Number_Bytes × Burst_Length), then: + // * Use this equation for the current transfer: + // `Address_N = Wrap_Boundary` + // * Use this equation for any subsequent transfers: + // `Address_N = Start_Address + ((N – 1) × Number_Bytes) – (Number_Bytes × Burst_Length)` + // This means that the address calculation of a `BURST_WRAP` fundamentally works the same + // as for a `BURST_INC`, the difference is when the calculated address increments + // over the wrap threshold, the address wraps around by subtracting the accessed address + // space from the normal `BURST_INCR` address. The lower wrap boundary is equivalent to + // The wrap trigger condition minus the container size (`num_bytes(size) * (len + 1)`). + if (burst == BURST_WRAP && ret_addr >= wrp_bond + (num_bytes(size) * (len + 1))) begin + ret_addr = ret_addr - (num_bytes(size) * (len + 1)); + end + end + return ret_addr; + endfunction + + /// Index of lowest byte in beat (see A3-51). + function automatic shortint unsigned + beat_lower_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst, + shortint unsigned strobe_width, shortint unsigned i_beat); + largest_addr_t _addr = beat_addr(addr, size, len, burst, i_beat); + return _addr - (_addr / strobe_width) * strobe_width; + endfunction + + /// Index of highest byte in beat (see A3-51). + function automatic shortint unsigned + beat_upper_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst, + shortint unsigned strobe_width, shortint unsigned i_beat); + if (i_beat == 0) begin + return aligned_addr(addr, size) + (num_bytes(size) - 1) - (addr / strobe_width) * strobe_width; + end else begin + return beat_lower_byte(addr, size, len, burst, strobe_width, i_beat) + num_bytes(size) - 1; + end + endfunction + + /// Is the bufferable bit set? + function automatic logic bufferable(cache_t cache); + return |(cache & CACHE_BUFFERABLE); + endfunction + + /// Is the modifiable bit set? + function automatic logic modifiable(cache_t cache); + return |(cache & CACHE_MODIFIABLE); + endfunction + + /// Memory Type. + typedef enum logic [3:0] { + DEVICE_NONBUFFERABLE, + DEVICE_BUFFERABLE, + NORMAL_NONCACHEABLE_NONBUFFERABLE, + NORMAL_NONCACHEABLE_BUFFERABLE, + WTHRU_NOALLOCATE, + WTHRU_RALLOCATE, + WTHRU_WALLOCATE, + WTHRU_RWALLOCATE, + WBACK_NOALLOCATE, + WBACK_RALLOCATE, + WBACK_WALLOCATE, + WBACK_RWALLOCATE + } mem_type_t; + + /// Create an `AR_CACHE` field from a `mem_type_t` type. + function automatic logic [3:0] get_arcache(mem_type_t mtype); + unique case (mtype) + DEVICE_NONBUFFERABLE : return 4'b0000; + DEVICE_BUFFERABLE : return 4'b0001; + NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010; + NORMAL_NONCACHEABLE_BUFFERABLE : return 4'b0011; + WTHRU_NOALLOCATE : return 4'b1010; + WTHRU_RALLOCATE : return 4'b1110; + WTHRU_WALLOCATE : return 4'b1010; + WTHRU_RWALLOCATE : return 4'b1110; + WBACK_NOALLOCATE : return 4'b1011; + WBACK_RALLOCATE : return 4'b1111; + WBACK_WALLOCATE : return 4'b1011; + WBACK_RWALLOCATE : return 4'b1111; + endcase // mtype + endfunction + + /// Create an `AW_CACHE` field from a `mem_type_t` type. + function automatic logic [3:0] get_awcache(mem_type_t mtype); + unique case (mtype) + DEVICE_NONBUFFERABLE : return 4'b0000; + DEVICE_BUFFERABLE : return 4'b0001; + NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010; + NORMAL_NONCACHEABLE_BUFFERABLE : return 4'b0011; + WTHRU_NOALLOCATE : return 4'b0110; + WTHRU_RALLOCATE : return 4'b0110; + WTHRU_WALLOCATE : return 4'b1110; + WTHRU_RWALLOCATE : return 4'b1110; + WBACK_NOALLOCATE : return 4'b0111; + WBACK_RALLOCATE : return 4'b0111; + WBACK_WALLOCATE : return 4'b1111; + WBACK_RWALLOCATE : return 4'b1111; + endcase // mtype + endfunction + + /// RESP precedence: DECERR > SLVERR > OKAY > EXOKAY. This is not defined in the AXI standard but + /// depends on the implementation. We consistently use the precedence above. Rationale: + /// - EXOKAY means an exclusive access was successful, whereas OKAY means it was not. Thus, if + /// OKAY and EXOKAY are to be merged, OKAY precedes because the exclusive access was not fully + /// successful. + /// - Both DECERR and SLVERR mean (part of) a transaction were unsuccessful, whereas OKAY means an + /// entire transaction was successful. Thus both DECERR and SLVERR precede OKAY. + /// - DECERR means (part of) a transactions could not be routed to a slave component, whereas + /// SLVERR means the transaction reached a slave component but lead to an error condition there. + /// Thus DECERR precedes SLVERR because DECERR happens earlier in the handling of a transaction. + function automatic resp_t resp_precedence(resp_t resp_a, resp_t resp_b); + unique case (resp_a) + RESP_OKAY: begin + // Any response except EXOKAY precedes OKAY. + if (resp_b == RESP_EXOKAY) begin + return resp_a; + end else begin + return resp_b; + end + end + RESP_EXOKAY: begin + // Any response precedes EXOKAY. + return resp_b; + end + RESP_SLVERR: begin + // Only DECERR precedes SLVERR. + if (resp_b == RESP_DECERR) begin + return resp_b; + end else begin + return resp_a; + end + end + RESP_DECERR: begin + // No response precedes DECERR. + return resp_a; + end + endcase + endfunction + + // ATOP[5:0] + /// - Sends a single data value with an address. + /// - The target swaps the value at the addressed location with the data value that is supplied in + /// the transaction. + /// - The original data value at the addressed location is returned. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + /// - Inbound data size is the same as the outbound data size. + localparam ATOP_ATOMICSWAP = 6'b110000; + /// - Sends two data values, the compare value and the swap value, to the addressed location. + /// The compare and swap values are of equal size. + /// - The data value at the addressed location is checked against the compare value: + /// - If the values match, the swap value is written to the addressed location. + /// - If the values do not match, the swap value is not written to the addressed location. + /// - The original data value at the addressed location is returned. + /// - Outbound data size is 2, 4, 8, 16, or 32 bytes. + /// - Inbound data size is half of the outbound data size because the outbound data contains both + /// compare and swap values, whereas the inbound data has only the original data value. + localparam ATOP_ATOMICCMP = 6'b110001; + // ATOP[5:4] + /// Perform no atomic operation. + localparam ATOP_NONE = 2'b00; + /// - Sends a single data value with an address and the atomic operation to be performed. + /// - The target performs the operation using the sent data and value at the addressed location as + /// operands. + /// - The result is stored in the address location. + /// - A single response is given without data. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + localparam ATOP_ATOMICSTORE = 2'b01; + /// Sends a single data value with an address and the atomic operation to be performed. + /// - The original data value at the addressed location is returned. + /// - The target performs the operation using the sent data and value at the addressed location as + /// operands. + /// - The result is stored in the address location. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + /// - Inbound data size is the same as the outbound data size. + localparam ATOP_ATOMICLOAD = 2'b10; + // ATOP[3] + /// For AtomicStore and AtomicLoad transactions `AWATOP[3]` indicates the endianness that is + /// required for the atomic operation. The value of `AWATOP[3]` applies to arithmetic operations + /// only and is ignored for bitwise logical operations. + /// When deasserted, this bit indicates that the operation is little-endian. + localparam ATOP_LITTLE_END = 1'b0; + /// When asserted, this bit indicates that the operation is big-endian. + localparam ATOP_BIG_END = 1'b1; + // ATOP[2:0] + /// The value in memory is added to the sent data and the result stored in memory. + localparam ATOP_ADD = 3'b000; + /// Every set bit in the sent data clears the corresponding bit of the data in memory. + localparam ATOP_CLR = 3'b001; + /// Bitwise exclusive OR of the sent data and value in memory. + localparam ATOP_EOR = 3'b010; + /// Every set bit in the sent data sets the corresponding bit of the data in memory. + localparam ATOP_SET = 3'b011; + /// The value stored in memory is the maximum of the existing value and sent data. This operation + /// assumes signed data. + localparam ATOP_SMAX = 3'b100; + /// The value stored in memory is the minimum of the existing value and sent data. This operation + /// assumes signed data. + localparam ATOP_SMIN = 3'b101; + /// The value stored in memory is the maximum of the existing value and sent data. This operation + /// assumes unsigned data. + localparam ATOP_UMAX = 3'b110; + /// The value stored in memory is the minimum of the existing value and sent data. This operation + /// assumes unsigned data. + localparam ATOP_UMIN = 3'b111; + // ATOP[5] == 1'b1 indicated that an atomic transaction has a read response + // Ussage eg: if (req_i.aw.atop[axi_pkg::ATOP_R_RESP]) begin + localparam ATOP_R_RESP = 32'd5; + + // `xbar_latency_e` and `xbar_cfg_t` are documented in `doc/axi_xbar.md`. + /// Slice on Demux AW channel. + localparam logic [9:0] DemuxAw = (1 << 9); + /// Slice on Demux W channel. + localparam logic [9:0] DemuxW = (1 << 8); + /// Slice on Demux B channel. + localparam logic [9:0] DemuxB = (1 << 7); + /// Slice on Demux AR channel. + localparam logic [9:0] DemuxAr = (1 << 6); + /// Slice on Demux R channel. + localparam logic [9:0] DemuxR = (1 << 5); + /// Slice on Mux AW channel. + localparam logic [9:0] MuxAw = (1 << 4); + /// Slice on Mux W channel. + localparam logic [9:0] MuxW = (1 << 3); + /// Slice on Mux B channel. + localparam logic [9:0] MuxB = (1 << 2); + /// Slice on Mux AR channel. + localparam logic [9:0] MuxAr = (1 << 1); + /// Slice on Mux R channel. + localparam logic [9:0] MuxR = (1 << 0); + /// Latency configuration for `axi_xbar`. + typedef enum logic [9:0] { + NO_LATENCY = 10'b000_00_000_00, + CUT_SLV_AX = DemuxAw | DemuxAr, + CUT_MST_AX = MuxAw | MuxAr, + CUT_ALL_AX = DemuxAw | DemuxAr | MuxAw | MuxAr, + CUT_SLV_PORTS = DemuxAw | DemuxW | DemuxB | DemuxAr | DemuxR, + CUT_MST_PORTS = MuxAw | MuxW | MuxB | MuxAr | MuxR, + CUT_ALL_PORTS = 10'b111_11_111_11 + } xbar_latency_e; + + /// Configuration for `axi_xbar`. + typedef struct packed { + int unsigned NoSlvPorts; + int unsigned NoMstPorts; + int unsigned MaxMstTrans; + int unsigned MaxSlvTrans; + bit FallThrough; + xbar_latency_e LatencyMode; + int unsigned AxiIdWidthSlvPorts; + int unsigned AxiIdUsedSlvPorts; + bit UniqueIds; + int unsigned AxiAddrWidth; + int unsigned AxiDataWidth; + int unsigned NoAddrRules; + } xbar_cfg_t; + + /// Commonly used rule types for `axi_xbar` (64-bit addresses). + typedef struct packed { + int unsigned idx; + logic [63:0] start_addr; + logic [63:0] end_addr; + } xbar_rule_64_t; + + /// Commonly used rule types for `axi_xbar` (32-bit addresses). + typedef struct packed { + int unsigned idx; + logic [31:0] start_addr; + logic [31:0] end_addr; + } xbar_rule_32_t; +endpackage diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv new file mode 100644 index 00000000..c75887a6 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_to_axi_lite.sv @@ -0,0 +1,323 @@ +// Copyright (c) 2014-2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth +// - Fabian Schuiki +// - Florian Zaruba + +/// An AXI4+ATOP to AXI4-Lite converter with atomic transaction and burst support. +module axi_to_axi_lite #( + parameter int unsigned AxiAddrWidth = 32'd0, + parameter int unsigned AxiDataWidth = 32'd0, + parameter int unsigned AxiIdWidth = 32'd0, + parameter int unsigned AxiUserWidth = 32'd0, + parameter int unsigned AxiMaxWriteTxns = 32'd0, + parameter int unsigned AxiMaxReadTxns = 32'd0, + parameter bit FallThrough = 1'b1, // FIFOs in Fall through mode in ID reflect + parameter type full_req_t = logic, + parameter type full_resp_t = logic, + parameter type lite_req_t = logic, + parameter type lite_resp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + // slave port full AXI4+ATOP + input full_req_t slv_req_i, + output full_resp_t slv_resp_o, + // master port AXI4-Lite + output lite_req_t mst_req_o, + input lite_resp_t mst_resp_i +); + // full bus declarations + full_req_t filtered_req, splitted_req; + full_resp_t filtered_resp, splitted_resp; + + // atomics adapter so that atomics can be resolved + axi_atop_filter #( + .AxiIdWidth ( AxiIdWidth ), + .AxiMaxWriteTxns ( AxiMaxWriteTxns ), + .req_t ( full_req_t ), + .resp_t ( full_resp_t ) + ) i_axi_atop_filter( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( slv_req_i ), + .slv_resp_o ( slv_resp_o ), + .mst_req_o ( filtered_req ), + .mst_resp_i ( filtered_resp ) + ); + + // burst splitter so that the id reflect module has no burst accessing it + axi_burst_splitter #( + .MaxReadTxns ( AxiMaxReadTxns ), + .MaxWriteTxns ( AxiMaxWriteTxns ), + .AddrWidth ( AxiAddrWidth ), + .DataWidth ( AxiDataWidth ), + .IdWidth ( AxiIdWidth ), + .UserWidth ( AxiUserWidth ), + .req_t ( full_req_t ), + .resp_t ( full_resp_t ) + ) i_axi_burst_splitter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slv_req_i ( filtered_req ), + .slv_resp_o ( filtered_resp ), + .mst_req_o ( splitted_req ), + .mst_resp_i ( splitted_resp ) + ); + + // ID reflect module handles the conversion from the full AXI to AXI lite on the wireing + axi_to_axi_lite_id_reflect #( + .AxiIdWidth ( AxiIdWidth ), + .AxiMaxWriteTxns ( AxiMaxWriteTxns ), + .AxiMaxReadTxns ( AxiMaxReadTxns ), + .FallThrough ( FallThrough ), + .full_req_t ( full_req_t ), + .full_resp_t ( full_resp_t ), + .lite_req_t ( lite_req_t ), + .lite_resp_t ( lite_resp_t ) + ) i_axi_to_axi_lite_id_reflect ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_i ( test_i ), + .slv_req_i ( splitted_req ), + .slv_resp_o ( splitted_resp ), + .mst_req_o ( mst_req_o ), + .mst_resp_i ( mst_resp_i ) + ); + + // Assertions, check params + // pragma translate_off + `ifndef VERILATOR + initial begin + assume (AxiIdWidth > 0) else $fatal(1, "AXI ID width has to be > 0"); + assume (AxiAddrWidth > 0) else $fatal(1, "AXI address width has to be > 0"); + assume (AxiDataWidth > 0) else $fatal(1, "AXI data width has to be > 0"); + end + `endif + // pragma translate_on +endmodule + +// Description: This module does the translation of the full AXI4+ATOP to AXI4-Lite signals. +// It reflects the ID of the incoming transaction and crops all signals not used +// in AXI4-Lite. It requires that incoming AXI4+ATOP transactions have a +// `axi_pkg::len_t` of `'0` and an `axi_pkg::atop_t` of `'0`. + +module axi_to_axi_lite_id_reflect #( + parameter int unsigned AxiIdWidth = 32'd0, + parameter int unsigned AxiMaxWriteTxns = 32'd0, + parameter int unsigned AxiMaxReadTxns = 32'd0, + parameter bit FallThrough = 1'b1, // FIFOs in fall through mode + parameter type full_req_t = logic, + parameter type full_resp_t = logic, + parameter type lite_req_t = logic, + parameter type lite_resp_t = logic +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_i, // Testmode enable + // slave port full AXI + input full_req_t slv_req_i, + output full_resp_t slv_resp_o, + // master port AXI LITE + output lite_req_t mst_req_o, + input lite_resp_t mst_resp_i +); + typedef logic [AxiIdWidth-1:0] id_t; + + // FIFO status and control signals + logic aw_full, aw_empty, aw_push, aw_pop, ar_full, ar_empty, ar_push, ar_pop; + id_t aw_reflect_id, ar_reflect_id; + + assign slv_resp_o = '{ + aw_ready: mst_resp_i.aw_ready & ~aw_full, + w_ready: mst_resp_i.w_ready, + b: '{ + id: aw_reflect_id, + resp: mst_resp_i.b.resp, + default: '0 + }, + b_valid: mst_resp_i.b_valid & ~aw_empty, + ar_ready: mst_resp_i.ar_ready & ~ar_full, + r: '{ + id: ar_reflect_id, + data: mst_resp_i.r.data, + resp: mst_resp_i.r.resp, + last: 1'b1, + default: '0 + }, + r_valid: mst_resp_i.r_valid & ~ar_empty, + default: '0 + }; + + // Write ID reflection + assign aw_push = mst_req_o.aw_valid & slv_resp_o.aw_ready; + assign aw_pop = slv_resp_o.b_valid & mst_req_o.b_ready; + fifo_v3 #( + .FALL_THROUGH ( FallThrough ), + .DEPTH ( AxiMaxWriteTxns ), + .dtype ( id_t ) + ) i_aw_id_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( aw_full ), + .empty_o ( aw_empty ), + .usage_o ( /*not used*/ ), + .data_i ( slv_req_i.aw.id ), + .push_i ( aw_push ), + .data_o ( aw_reflect_id ), + .pop_i ( aw_pop ) + ); + + // Read ID reflection + assign ar_push = mst_req_o.ar_valid & slv_resp_o.ar_ready; + assign ar_pop = slv_resp_o.r_valid & mst_req_o.r_ready; + fifo_v3 #( + .FALL_THROUGH ( FallThrough ), + .DEPTH ( AxiMaxReadTxns ), + .dtype ( id_t ) + ) i_ar_id_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i( test_i ), + .full_o ( ar_full ), + .empty_o ( ar_empty ), + .usage_o ( /*not used*/ ), + .data_i ( slv_req_i.ar.id ), + .push_i ( ar_push ), + .data_o ( ar_reflect_id ), + .pop_i ( ar_pop ) + ); + + assign mst_req_o = '{ + aw: '{ + addr: slv_req_i.aw.addr, + prot: slv_req_i.aw.prot + }, + aw_valid: slv_req_i.aw_valid & ~aw_full, + w: '{ + data: slv_req_i.w.data, + strb: slv_req_i.w.strb + }, + w_valid: slv_req_i.w_valid, + b_ready: slv_req_i.b_ready & ~aw_empty, + ar: '{ + addr: slv_req_i.ar.addr, + prot: slv_req_i.ar.prot + }, + ar_valid: slv_req_i.ar_valid & ~ar_full, + r_ready: slv_req_i.r_ready & ~ar_empty, + default: '0 + }; + + // Assertions + // pragma translate_off + `ifndef VERILATOR + aw_atop: assume property( @(posedge clk_i) disable iff (~rst_ni) + slv_req_i.aw_valid |-> (slv_req_i.aw.atop == '0)) else + $fatal(1, "Module does not support atomics. Value observed: %0b", slv_req_i.aw.atop); + aw_axi_len: assume property( @(posedge clk_i) disable iff (~rst_ni) + slv_req_i.aw_valid |-> (slv_req_i.aw.len == '0)) else + $fatal(1, "AW request length has to be zero. Value observed: %0b", slv_req_i.aw.len); + w_axi_last: assume property( @(posedge clk_i) disable iff (~rst_ni) + slv_req_i.w_valid |-> (slv_req_i.w.last == 1'b1)) else + $fatal(1, "W last signal has to be one. Value observed: %0b", slv_req_i.w.last); + ar_axi_len: assume property( @(posedge clk_i) disable iff (~rst_ni) + slv_req_i.ar_valid |-> (slv_req_i.ar.len == '0)) else + $fatal(1, "AR request length has to be zero. Value observed: %0b", slv_req_i.ar.len); + `endif + // pragma translate_on +endmodule + +// interface wrapper +`include "axi/assign.svh" +`include "axi/typedef.svh" +module axi_to_axi_lite_intf #( + /// AXI bus parameters + parameter int unsigned AXI_ADDR_WIDTH = 32'd0, + parameter int unsigned AXI_DATA_WIDTH = 32'd0, + parameter int unsigned AXI_ID_WIDTH = 32'd0, + parameter int unsigned AXI_USER_WIDTH = 32'd0, + /// Maximum number of outstanding writes. + parameter int unsigned AXI_MAX_WRITE_TXNS = 32'd1, + /// Maximum number of outstanding reads. + parameter int unsigned AXI_MAX_READ_TXNS = 32'd1, + parameter bit FALL_THROUGH = 1'b1 +) ( + input logic clk_i, + input logic rst_ni, + input logic testmode_i, + AXI_BUS.Slave slv, + AXI_LITE.Master mst +); + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + // full channels typedefs + `AXI_TYPEDEF_AW_CHAN_T(full_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(full_w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(full_b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(full_ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(full_r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(full_req_t, full_aw_chan_t, full_w_chan_t, full_ar_chan_t) + `AXI_TYPEDEF_RESP_T(full_resp_t, full_b_chan_t, full_r_chan_t) + // LITE channels typedef + `AXI_LITE_TYPEDEF_AW_CHAN_T(lite_aw_chan_t, addr_t) + `AXI_LITE_TYPEDEF_W_CHAN_T(lite_w_chan_t, data_t, strb_t) + `AXI_LITE_TYPEDEF_B_CHAN_T(lite_b_chan_t) + `AXI_LITE_TYPEDEF_AR_CHAN_T(lite_ar_chan_t, addr_t) + `AXI_LITE_TYPEDEF_R_CHAN_T (lite_r_chan_t, data_t) + `AXI_LITE_TYPEDEF_REQ_T(lite_req_t, lite_aw_chan_t, lite_w_chan_t, lite_ar_chan_t) + `AXI_LITE_TYPEDEF_RESP_T(lite_resp_t, lite_b_chan_t, lite_r_chan_t) + + full_req_t full_req; + full_resp_t full_resp; + lite_req_t lite_req; + lite_resp_t lite_resp; + + `AXI_ASSIGN_TO_REQ(full_req, slv) + `AXI_ASSIGN_FROM_RESP(slv, full_resp) + + `AXI_LITE_ASSIGN_FROM_REQ(mst, lite_req) + `AXI_LITE_ASSIGN_TO_RESP(lite_resp, mst) + + axi_to_axi_lite #( + .AxiAddrWidth ( AXI_ADDR_WIDTH ), + .AxiDataWidth ( AXI_DATA_WIDTH ), + .AxiIdWidth ( AXI_ID_WIDTH ), + .AxiUserWidth ( AXI_USER_WIDTH ), + .AxiMaxWriteTxns ( AXI_MAX_WRITE_TXNS ), + .AxiMaxReadTxns ( AXI_MAX_READ_TXNS ), + .FallThrough ( FALL_THROUGH ), // FIFOs in Fall through mode in ID reflect + .full_req_t ( full_req_t ), + .full_resp_t ( full_resp_t ), + .lite_req_t ( lite_req_t ), + .lite_resp_t ( lite_resp_t ) + ) i_axi_to_axi_lite ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_i ( testmode_i ), + // slave port full AXI4+ATOP + .slv_req_i ( full_req ), + .slv_resp_o ( full_resp ), + // master port AXI4-Lite + .mst_req_o ( lite_req ), + .mst_resp_i ( lite_resp ) + ); +endmodule diff --git a/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv b/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv new file mode 100644 index 00000000..d66cd97d --- /dev/null +++ b/test/type_param/vendor/pulp-platform/axi/src/axi_xbar.sv @@ -0,0 +1,324 @@ +// Copyright (c) 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Wolfgang Roenninger +// - Andreas Kurth +// - Florian Zaruba + +// axi_xbar: Fully-connected AXI4+ATOP crossbar with an arbitrary number of slave and master ports. +// See `doc/axi_xbar.md` for the documentation, including the definition of parameters and ports. +module axi_xbar #( + parameter axi_pkg::xbar_cfg_t Cfg = '0, + parameter bit ATOPs = 1'b1, + parameter type slv_aw_chan_t = logic, + parameter type mst_aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type slv_b_chan_t = logic, + parameter type mst_b_chan_t = logic, + parameter type slv_ar_chan_t = logic, + parameter type mst_ar_chan_t = logic, + parameter type slv_r_chan_t = logic, + parameter type mst_r_chan_t = logic, + parameter type slv_req_t = logic, + parameter type slv_resp_t = logic, + parameter type mst_req_t = logic, + parameter type mst_resp_t = logic, + parameter type rule_t = axi_pkg::xbar_rule_64_t +) ( + input logic clk_i, + input logic rst_ni, + input logic test_i, + input slv_req_t [Cfg.NoSlvPorts-1:0] slv_ports_req_i, + output slv_resp_t [Cfg.NoSlvPorts-1:0] slv_ports_resp_o, + output mst_req_t [Cfg.NoMstPorts-1:0] mst_ports_req_o, + input mst_resp_t [Cfg.NoMstPorts-1:0] mst_ports_resp_i, + input rule_t [Cfg.NoAddrRules-1:0] addr_map_i, + input logic [Cfg.NoSlvPorts-1:0] en_default_mst_port_i, + input logic [Cfg.NoSlvPorts-1:0][$clog2(Cfg.NoMstPorts)-1:0] default_mst_port_i +); + + typedef logic [Cfg.AxiAddrWidth-1:0] addr_t; + // to account for the decoding error slave + typedef logic [$clog2(Cfg.NoMstPorts + 1)-1:0] mst_port_idx_t; + + // signals from the axi_demuxes, one index more for decode error + slv_req_t [Cfg.NoSlvPorts-1:0][Cfg.NoMstPorts:0] slv_reqs; + slv_resp_t [Cfg.NoSlvPorts-1:0][Cfg.NoMstPorts:0] slv_resps; + + // workaround for issue #133 (problem with vsim 10.6c) + localparam int unsigned cfg_NoMstPorts = Cfg.NoMstPorts; + + // signals into the axi_muxes, are of type slave as the multiplexer extends the ID + slv_req_t [Cfg.NoMstPorts-1:0][Cfg.NoSlvPorts-1:0] mst_reqs; + slv_resp_t [Cfg.NoMstPorts-1:0][Cfg.NoSlvPorts-1:0] mst_resps; + + for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_slv_port_demux + logic [$clog2(Cfg.NoMstPorts)-1:0] dec_aw, dec_ar; + mst_port_idx_t slv_aw_select, slv_ar_select; + logic dec_aw_valid, dec_aw_error; + logic dec_ar_valid, dec_ar_error; + + addr_decode #( + .NoIndices ( Cfg.NoMstPorts ), + .NoRules ( Cfg.NoAddrRules ), + .addr_t ( addr_t ), + .rule_t ( rule_t ) + ) i_axi_aw_decode ( + .addr_i ( slv_ports_req_i[i].aw.addr ), + .addr_map_i ( addr_map_i ), + .idx_o ( dec_aw ), + .dec_valid_o ( dec_aw_valid ), + .dec_error_o ( dec_aw_error ), + .en_default_idx_i ( en_default_mst_port_i[i] ), + .default_idx_i ( default_mst_port_i[i] ) + ); + + addr_decode #( + .NoIndices ( Cfg.NoMstPorts ), + .addr_t ( addr_t ), + .NoRules ( Cfg.NoAddrRules ), + .rule_t ( rule_t ) + ) i_axi_ar_decode ( + .addr_i ( slv_ports_req_i[i].ar.addr ), + .addr_map_i ( addr_map_i ), + .idx_o ( dec_ar ), + .dec_valid_o ( dec_ar_valid ), + .dec_error_o ( dec_ar_error ), + .en_default_idx_i ( en_default_mst_port_i[i] ), + .default_idx_i ( default_mst_port_i[i] ) + ); + + assign slv_aw_select = (dec_aw_error) ? + mst_port_idx_t'(Cfg.NoMstPorts) : mst_port_idx_t'(dec_aw); + assign slv_ar_select = (dec_ar_error) ? + mst_port_idx_t'(Cfg.NoMstPorts) : mst_port_idx_t'(dec_ar); + + // make sure that the default slave does not get changed, if there is an unserved Ax + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + default disable iff (~rst_ni); + default_aw_mst_port_en: assert property( + @(posedge clk_i) (slv_ports_req_i[i].aw_valid && !slv_ports_resp_o[i].aw_ready) + |=> $stable(en_default_mst_port_i[i])) + else $fatal (1, $sformatf("It is not allowed to change the default mst port\ + enable, when there is an unserved Aw beat. Slave Port: %0d", i)); + default_aw_mst_port: assert property( + @(posedge clk_i) (slv_ports_req_i[i].aw_valid && !slv_ports_resp_o[i].aw_ready) + |=> $stable(default_mst_port_i[i])) + else $fatal (1, $sformatf("It is not allowed to change the default mst port\ + when there is an unserved Aw beat. Slave Port: %0d", i)); + default_ar_mst_port_en: assert property( + @(posedge clk_i) (slv_ports_req_i[i].ar_valid && !slv_ports_resp_o[i].ar_ready) + |=> $stable(en_default_mst_port_i[i])) + else $fatal (1, $sformatf("It is not allowed to change the enable, when\ + there is an unserved Ar beat. Slave Port: %0d", i)); + default_ar_mst_port: assert property( + @(posedge clk_i) (slv_ports_req_i[i].ar_valid && !slv_ports_resp_o[i].ar_ready) + |=> $stable(default_mst_port_i[i])) + else $fatal (1, $sformatf("It is not allowed to change the default mst port\ + when there is an unserved Ar beat. Slave Port: %0d", i)); + `endif + `endif + // pragma translate_on + axi_demux #( + .AxiIdWidth ( Cfg.AxiIdWidthSlvPorts ), // ID Width + .aw_chan_t ( slv_aw_chan_t ), // AW Channel Type + .w_chan_t ( w_chan_t ), // W Channel Type + .b_chan_t ( slv_b_chan_t ), // B Channel Type + .ar_chan_t ( slv_ar_chan_t ), // AR Channel Type + .r_chan_t ( slv_r_chan_t ), // R Channel Type + .req_t ( slv_req_t ), + .resp_t ( slv_resp_t ), + .NoMstPorts ( Cfg.NoMstPorts + 1 ), + .MaxTrans ( Cfg.MaxMstTrans ), + .AxiLookBits ( Cfg.AxiIdUsedSlvPorts ), + .UniqueIds ( Cfg.UniqueIds ), + .FallThrough ( Cfg.FallThrough ), + .SpillAw ( Cfg.LatencyMode[9] ), + .SpillW ( Cfg.LatencyMode[8] ), + .SpillB ( Cfg.LatencyMode[7] ), + .SpillAr ( Cfg.LatencyMode[6] ), + .SpillR ( Cfg.LatencyMode[5] ) + ) i_axi_demux ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .test_i, // Testmode enable + .slv_req_i ( slv_ports_req_i[i] ), + .slv_aw_select_i ( slv_aw_select ), + .slv_ar_select_i ( slv_ar_select ), + .slv_resp_o ( slv_ports_resp_o[i] ), + .mst_reqs_o ( slv_reqs[i] ), + .mst_resps_i ( slv_resps[i] ) + ); + + axi_err_slv #( + .AxiIdWidth ( Cfg.AxiIdWidthSlvPorts ), + .req_t ( slv_req_t ), + .resp_t ( slv_resp_t ), + .Resp ( axi_pkg::RESP_DECERR ), + .ATOPs ( ATOPs ), + .MaxTrans ( 4 ) // Transactions terminate at this slave, so minimize + // resource consumption by accepting only a few + // transactions at a time. + ) i_axi_err_slv ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .test_i, // Testmode enable + // slave port + .slv_req_i ( slv_reqs[i][Cfg.NoMstPorts] ), + .slv_resp_o ( slv_resps[i][cfg_NoMstPorts] ) + ); + end + + // cross all channels + for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_xbar_slv_cross + for (genvar j = 0; j < Cfg.NoMstPorts; j++) begin : gen_xbar_mst_cross + assign mst_reqs[j][i] = slv_reqs[i][j]; + assign slv_resps[i][j] = mst_resps[j][i]; + end + end + + for (genvar i = 0; i < Cfg.NoMstPorts; i++) begin : gen_mst_port_mux + axi_mux #( + .SlvAxiIDWidth ( Cfg.AxiIdWidthSlvPorts ), // ID width of the slave ports + .slv_aw_chan_t ( slv_aw_chan_t ), // AW Channel Type, slave ports + .mst_aw_chan_t ( mst_aw_chan_t ), // AW Channel Type, master port + .w_chan_t ( w_chan_t ), // W Channel Type, all ports + .slv_b_chan_t ( slv_b_chan_t ), // B Channel Type, slave ports + .mst_b_chan_t ( mst_b_chan_t ), // B Channel Type, master port + .slv_ar_chan_t ( slv_ar_chan_t ), // AR Channel Type, slave ports + .mst_ar_chan_t ( mst_ar_chan_t ), // AR Channel Type, master port + .slv_r_chan_t ( slv_r_chan_t ), // R Channel Type, slave ports + .mst_r_chan_t ( mst_r_chan_t ), // R Channel Type, master port + .slv_req_t ( slv_req_t ), + .slv_resp_t ( slv_resp_t ), + .mst_req_t ( mst_req_t ), + .mst_resp_t ( mst_resp_t ), + .NoSlvPorts ( Cfg.NoSlvPorts ), // Number of Masters for the module + .MaxWTrans ( Cfg.MaxSlvTrans ), + .FallThrough ( Cfg.FallThrough ), + .SpillAw ( Cfg.LatencyMode[4] ), + .SpillW ( Cfg.LatencyMode[3] ), + .SpillB ( Cfg.LatencyMode[2] ), + .SpillAr ( Cfg.LatencyMode[1] ), + .SpillR ( Cfg.LatencyMode[0] ) + ) i_axi_mux ( + .clk_i, // Clock + .rst_ni, // Asynchronous reset active low + .test_i, // Test Mode enable + .slv_reqs_i ( mst_reqs[i] ), + .slv_resps_o ( mst_resps[i] ), + .mst_req_o ( mst_ports_req_o[i] ), + .mst_resp_i ( mst_ports_resp_i[i] ) + ); + end + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + initial begin : check_params + id_slv_req_ports: assert ($bits(slv_ports_req_i[0].aw.id ) == Cfg.AxiIdWidthSlvPorts) else + $fatal(1, $sformatf("Slv_req and aw_chan id width not equal.")); + id_slv_resp_ports: assert ($bits(slv_ports_resp_o[0].r.id) == Cfg.AxiIdWidthSlvPorts) else + $fatal(1, $sformatf("Slv_req and aw_chan id width not equal.")); + end + `endif + `endif + // pragma translate_on +endmodule + +`include "axi/assign.svh" +`include "axi/typedef.svh" + +module axi_xbar_intf #( + parameter int unsigned AXI_USER_WIDTH = 0, + parameter axi_pkg::xbar_cfg_t Cfg = '0, + parameter type rule_t = axi_pkg::xbar_rule_64_t +) ( + input logic clk_i, + input logic rst_ni, + input logic test_i, + AXI_BUS.Slave slv_ports [Cfg.NoSlvPorts-1:0], + AXI_BUS.Master mst_ports [Cfg.NoMstPorts-1:0], + input rule_t [Cfg.NoAddrRules-1:0] addr_map_i, + input logic [Cfg.NoSlvPorts-1:0] en_default_mst_port_i, + input logic [Cfg.NoSlvPorts-1:0][$clog2(Cfg.NoMstPorts)-1:0] default_mst_port_i +); + + localparam int unsigned AxiIdWidthMstPorts = Cfg.AxiIdWidthSlvPorts + $clog2(Cfg.NoSlvPorts); + + typedef logic [AxiIdWidthMstPorts -1:0] id_mst_t; + typedef logic [Cfg.AxiIdWidthSlvPorts -1:0] id_slv_t; + typedef logic [Cfg.AxiAddrWidth -1:0] addr_t; + typedef logic [Cfg.AxiDataWidth -1:0] data_t; + typedef logic [Cfg.AxiDataWidth/8 -1:0] strb_t; + typedef logic [AXI_USER_WIDTH -1:0] user_t; + + `AXI_TYPEDEF_AW_CHAN_T(mst_aw_chan_t, addr_t, id_mst_t, user_t) + `AXI_TYPEDEF_AW_CHAN_T(slv_aw_chan_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(mst_b_chan_t, id_mst_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(slv_b_chan_t, id_slv_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(mst_ar_chan_t, addr_t, id_mst_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(slv_ar_chan_t, addr_t, id_slv_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(mst_r_chan_t, data_t, id_mst_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(slv_r_chan_t, data_t, id_slv_t, user_t) + `AXI_TYPEDEF_REQ_T(mst_req_t, mst_aw_chan_t, w_chan_t, mst_ar_chan_t) + `AXI_TYPEDEF_REQ_T(slv_req_t, slv_aw_chan_t, w_chan_t, slv_ar_chan_t) + `AXI_TYPEDEF_RESP_T(mst_resp_t, mst_b_chan_t, mst_r_chan_t) + `AXI_TYPEDEF_RESP_T(slv_resp_t, slv_b_chan_t, slv_r_chan_t) + + mst_req_t [Cfg.NoMstPorts-1:0] mst_reqs; + mst_resp_t [Cfg.NoMstPorts-1:0] mst_resps; + slv_req_t [Cfg.NoSlvPorts-1:0] slv_reqs; + slv_resp_t [Cfg.NoSlvPorts-1:0] slv_resps; + + for (genvar i = 0; i < Cfg.NoMstPorts; i++) begin : gen_assign_mst + `AXI_ASSIGN_FROM_REQ(mst_ports[i], mst_reqs[i]) + `AXI_ASSIGN_TO_RESP(mst_resps[i], mst_ports[i]) + end + + for (genvar i = 0; i < Cfg.NoSlvPorts; i++) begin : gen_assign_slv + `AXI_ASSIGN_TO_REQ(slv_reqs[i], slv_ports[i]) + `AXI_ASSIGN_FROM_RESP(slv_ports[i], slv_resps[i]) + end + + axi_xbar #( + .Cfg (Cfg), + .slv_aw_chan_t ( slv_aw_chan_t ), + .mst_aw_chan_t ( mst_aw_chan_t ), + .w_chan_t ( w_chan_t ), + .slv_b_chan_t ( slv_b_chan_t ), + .mst_b_chan_t ( mst_b_chan_t ), + .slv_ar_chan_t ( slv_ar_chan_t ), + .mst_ar_chan_t ( mst_ar_chan_t ), + .slv_r_chan_t ( slv_r_chan_t ), + .mst_r_chan_t ( mst_r_chan_t ), + .slv_req_t ( slv_req_t ), + .slv_resp_t ( slv_resp_t ), + .mst_req_t ( mst_req_t ), + .mst_resp_t ( mst_resp_t ), + .rule_t ( rule_t ) + ) i_xbar ( + .clk_i, + .rst_ni, + .test_i, + .slv_ports_req_i (slv_reqs ), + .slv_ports_resp_o (slv_resps), + .mst_ports_req_o (mst_reqs ), + .mst_ports_resp_i (mst_resps), + .addr_map_i, + .en_default_mst_port_i, + .default_mst_port_i + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh b/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh new file mode 100644 index 00000000..b64f31a0 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/include/common_cells/registers.svh @@ -0,0 +1,221 @@ +// Copyright 2018, 2021 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Stefan Mach +// Description: Common register defines for RTL designs + +`ifndef COMMON_CELLS_REGISTERS_SVH_ +`define COMMON_CELLS_REGISTERS_SVH_ + +// Abridged Summary of available FF macros: +// `FF: asynchronous active-low reset +// `FFAR: asynchronous active-high reset +// `FFARN: [deprecated] asynchronous active-low reset +// `FFSR: synchronous active-high reset +// `FFSRN: synchronous active-low reset +// `FFNR: without reset +// `FFL: load-enable and asynchronous active-low reset +// `FFLAR: load-enable and asynchronous active-high reset +// `FFLARN: [deprecated] load-enable and asynchronous active-low reset +// `FFLARNC: load-enable and asynchronous active-low reset and synchronous active-high clear +// `FFLSR: load-enable and synchronous active-high reset +// `FFLSRN: load-enable and synchronous active-low reset +// `FFLNR: load-enable without reset + +`ifdef VERILATOR +`define NO_SYNOPSYS_FF 1 +`endif + +`define REG_DFLT_CLK clk_i +`define REG_DFLT_RST rst_ni + +// Flip-Flop with asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// (__clk: clock input) +// (__arst_n: asynchronous reset, active-low) +`define FF(__q, __d, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// Flip-Flop with asynchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst: asynchronous reset, active-high +`define FFAR(__q, __d, __reset_value, __clk, __arst) \ + always_ff @(posedge (__clk) or posedge (__arst)) begin \ + if (__arst) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__d); \ + end \ + end + +// DEPRECATED - use `FF instead +// Flip-Flop with asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFARN(__q, __d, __reset_value, __clk, __arst_n) \ + `FF(__q, __d, __reset_value, __clk, __arst_n) + +// Flip-Flop with synchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_clk: reset input, active-high +`define FFSR(__q, __d, __reset_value, __clk, __reset_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (__reset_clk) ? (__reset_value) : (__d); \ + end + +// Flip-Flop with synchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_n_clk: reset input, active-low +`define FFSRN(__q, __d, __reset_value, __clk, __reset_n_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_n_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (!__reset_n_clk) ? (__reset_value) : (__d); \ + end + +// Always-enable Flip-Flop without reset +// __q: Q output of FF +// __d: D input of FF +// __clk: clock input +`define FFNR(__q, __d, __clk) \ + always_ff @(posedge (__clk)) begin \ + __q <= (__d); \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset (implicit clock and reset) +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// (__clk: clock input) +// (__arst_n: asynchronous reset, active-low) +`define FFL(__q, __d, __load, __reset_value, __clk = `REG_DFLT_CLK, __arst_n = `REG_DFLT_RST) \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// Flip-Flop with load-enable and asynchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst: asynchronous reset, active-high +`define FFLAR(__q, __d, __load, __reset_value, __clk, __arst) \ + always_ff @(posedge (__clk) or posedge (__arst)) begin \ + if (__arst) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__load) ? (__d) : (__q); \ + end \ + end + +// DEPRECATED - use `FFL instead +// Flip-Flop with load-enable and asynchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFLARN(__q, __d, __load, __reset_value, __clk, __arst_n) \ + `FFL(__q, __d, __load, __reset_value, __clk, __arst_n) + +// Flip-Flop with load-enable and synchronous active-high reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_clk: reset input, active-high +`define FFLSR(__q, __d, __load, __reset_value, __clk, __reset_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (__reset_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \ + end + +// Flip-Flop with load-enable and synchronous active-low reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __reset_n_clk: reset input, active-low +`define FFLSRN(__q, __d, __load, __reset_value, __clk, __reset_n_clk) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__reset_n_clk`" *``/ \ + `endif \ + always_ff @(posedge (__clk)) begin \ + __q <= (!__reset_n_clk) ? (__reset_value) : ((__load) ? (__d) : (__q)); \ + end + +// Flip-Flop with load-enable and asynchronous active-low reset and synchronous clear +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __clear: assign reset value into FF +// __reset_value: value assigned upon reset +// __clk: clock input +// __arst_n: asynchronous reset, active-low +`define FFLARNC(__q, __d, __load, __clear, __reset_value, __clk, __arst_n) \ + `ifndef NO_SYNOPSYS_FF \ + /``* synopsys sync_set_reset `"__clear`" *``/ \ + `endif \ + always_ff @(posedge (__clk) or negedge (__arst_n)) begin \ + if (!__arst_n) begin \ + __q <= (__reset_value); \ + end else begin \ + __q <= (__clear) ? (__reset_value) : (__load) ? (__d) : (__q); \ + end \ + end + +// Load-enable Flip-Flop without reset +// __q: Q output of FF +// __d: D input of FF +// __load: load d value into FF +// __clk: clock input +`define FFLNR(__q, __d, __load, __clk) \ + always_ff @(posedge (__clk)) begin \ + __q <= (__load) ? (__d) : (__q); \ + end + +`endif diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv b/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv new file mode 100644 index 00000000..90a43a0d --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/addr_decode.sv @@ -0,0 +1,161 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +/// Address Decoder: Maps the input address combinatorially to an index. +/// The address map `addr_map_i` is a packed array of rule_t structs. +/// The ranges of any two rules may overlap. If so, the rule at the higher (more significant) +/// position in `addr_map_i` prevails. +/// +/// There can be an arbitrary number of address rules. There can be multiple +/// ranges defined for the same index. The start address has to be less than the end address. +/// +/// There is the possibility to add a default mapping: +/// `en_default_idx_i`: Driving this port to `1'b1` maps all input addresses +/// for which no rule in `addr_map_i` exists to the default index specified by +/// `default_idx_i`. In this case, `dec_error_o` is always `1'b0`. +/// +/// Assertions: The module checks every time there is a change in the address mapping +/// if the resulting map is valid. It fatals if `start_addr` is higher than `end_addr` +/// or if a mapping targets an index that is outside the number of allowed indices. +/// It issues warnings if the address regions of any two mappings overlap. +module addr_decode #( + /// Highest index which can happen in a rule. + parameter int unsigned NoIndices = 32'd0, + /// Total number of rules. + parameter int unsigned NoRules = 32'd0, + /// Address type inside the rules and to decode. + parameter type addr_t = logic, + /// Rule packed struct type. + /// The address decoder expects three fields in `rule_t`: + /// + /// typedef struct packed { + /// int unsigned idx; + /// addr_t start_addr; + /// addr_t end_addr; + /// } rule_t; + /// + /// - `idx`: index of the rule, has to be < `NoIndices` + /// - `start_addr`: start address of the range the rule describes, value is included in range + /// - `end_addr`: end address of the range the rule describes, value is NOT included in range + parameter type rule_t = logic, + /// Dependent parameter, do **not** overwite! + /// + /// Width of the `idx_o` output port. + parameter int unsigned IdxWidth = cf_math_pkg::idx_width(NoIndices), + /// Dependent parameter, do **not** overwite! + /// + /// Type of the `idx_o` output port. + parameter type idx_t = logic [IdxWidth-1:0] +) ( + /// Address to decode. + input addr_t addr_i, + /// Address map: rule with the highest array position wins on collision + input rule_t [NoRules-1:0] addr_map_i, + /// Decoded index. + output idx_t idx_o, + /// Decode is valid. + output logic dec_valid_o, + /// Decode is not valid, no matching rule found. + output logic dec_error_o, + /// Enable default port mapping. + /// + /// When not used, tie to `0`. + input logic en_default_idx_i, + /// Default port index. + /// + /// When `en_default_idx_i` is `1`, this will be the index when no rule matches. + /// + /// When not used, tie to `0`. + input idx_t default_idx_i +); + + logic [NoRules-1:0] matched_rules; // purely for address map debugging + + always_comb begin + // default assignments + matched_rules = '0; + dec_valid_o = 1'b0; + dec_error_o = (en_default_idx_i) ? 1'b0 : 1'b1; + idx_o = (en_default_idx_i) ? default_idx_i : '0; + + // match the rules + for (int unsigned i = 0; i < NoRules; i++) begin + if ((addr_i >= addr_map_i[i].start_addr) && (addr_i < addr_map_i[i].end_addr)) begin + matched_rules[i] = 1'b1; + dec_valid_o = 1'b1; + dec_error_o = 1'b0; + idx_o = idx_t'(addr_map_i[i].idx); + end + end + end + + // Assumptions and assertions + `ifndef VERILATOR + `ifndef XSIM + // pragma translate_off + initial begin : proc_check_parameters + assume ($bits(addr_i) == $bits(addr_map_i[0].start_addr)) else + $warning($sformatf("Input address has %d bits and address map has %d bits.", + $bits(addr_i), $bits(addr_map_i[0].start_addr))); + assume (NoRules > 0) else + $fatal(1, $sformatf("At least one rule needed")); + assume (NoIndices > 0) else + $fatal(1, $sformatf("At least one index needed")); + end + + assert final ($onehot0(matched_rules)) else + $warning("More than one bit set in the one-hot signal, matched_rules"); + + // These following assumptions check the validity of the address map. + // The assumptions gets generated for each distinct pair of rules. + // Each assumption is present two times, as they rely on one rules being + // effectively ordered. Only one of the rules with the same function is + // active at a time for a given pair. + // check_start: Enforces a smaller start than end address. + // check_idx: Enforces a valid index in the rule. + // check_overlap: Warns if there are overlapping address regions. + always @(addr_map_i) #0 begin : proc_check_addr_map + if (!$isunknown(addr_map_i)) begin + for (int unsigned i = 0; i < NoRules; i++) begin + check_start : assume (addr_map_i[i].start_addr < addr_map_i[i].end_addr) else + $fatal(1, $sformatf("This rule has a higher start than end address!!!\n\ + Violating rule %d.\n\ + Rule> IDX: %h START: %h END: %h\n\ + #####################################################", + i ,addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr)); + // check the SLV ids + check_idx : assume (addr_map_i[i].idx < NoIndices) else + $fatal(1, $sformatf("This rule has a IDX that is not allowed!!!\n\ + Violating rule %d.\n\ + Rule> IDX: %h START: %h END: %h\n\ + Rule> MAX_IDX: %h\n\ + #####################################################", + i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr, + (NoIndices-1))); + for (int unsigned j = i + 1; j < NoRules; j++) begin + // overlap check + check_overlap : assume (!((addr_map_i[j].start_addr < addr_map_i[i].end_addr) && + (addr_map_i[j].end_addr > addr_map_i[i].start_addr))) else + $warning($sformatf("Overlapping address region found!!!\n\ + Rule %d: IDX: %h START: %h END: %h\n\ + Rule %d: IDX: %h START: %h END: %h\n\ + #####################################################", + i, addr_map_i[i].idx, addr_map_i[i].start_addr, addr_map_i[i].end_addr, + j, addr_map_i[j].idx, addr_map_i[j].start_addr, addr_map_i[j].end_addr)); + end + end + end + end + // pragma translate_on + `endif + `endif +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv b/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv new file mode 100644 index 00000000..8e770abf --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/cdc_2phase.sv @@ -0,0 +1,175 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + +/// A two-phase clock domain crossing. +/// +/// CONSTRAINT: Requires max_delay of min_period(src_clk_i, dst_clk_i) through +/// the paths async_req, async_ack, async_data. +/* verilator lint_off DECLFILENAME */ +module cdc_2phase #( + parameter type T = logic +)( + input logic src_rst_ni, + input logic src_clk_i, + input T src_data_i, + input logic src_valid_i, + output logic src_ready_o, + + input logic dst_rst_ni, + input logic dst_clk_i, + output T dst_data_o, + output logic dst_valid_o, + input logic dst_ready_i +); + + // Asynchronous handshake signals. + (* dont_touch = "true" *) logic async_req; + (* dont_touch = "true" *) logic async_ack; + (* dont_touch = "true" *) T async_data; + + // The sender in the source domain. + cdc_2phase_src #(.T(T)) i_src ( + .rst_ni ( src_rst_ni ), + .clk_i ( src_clk_i ), + .data_i ( src_data_i ), + .valid_i ( src_valid_i ), + .ready_o ( src_ready_o ), + .async_req_o ( async_req ), + .async_ack_i ( async_ack ), + .async_data_o ( async_data ) + ); + + // The receiver in the destination domain. + cdc_2phase_dst #(.T(T)) i_dst ( + .rst_ni ( dst_rst_ni ), + .clk_i ( dst_clk_i ), + .data_o ( dst_data_o ), + .valid_o ( dst_valid_o ), + .ready_i ( dst_ready_i ), + .async_req_i ( async_req ), + .async_ack_o ( async_ack ), + .async_data_i ( async_data ) + ); + +endmodule + + +/// Half of the two-phase clock domain crossing located in the source domain. +module cdc_2phase_src #( + parameter type T = logic +)( + input logic rst_ni, + input logic clk_i, + input T data_i, + input logic valid_i, + output logic ready_o, + output logic async_req_o, + input logic async_ack_i, + output T async_data_o +); + + (* dont_touch = "true" *) + logic req_src_q, ack_src_q, ack_q; + (* dont_touch = "true" *) + T data_src_q; + + // The req_src and data_src registers change when a new data item is accepted. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_src_q <= 0; + data_src_q <= '0; + end else if (valid_i && ready_o) begin + req_src_q <= ~req_src_q; + data_src_q <= data_i; + end + end + + // The ack_src and ack registers act as synchronization stages. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ack_src_q <= 0; + ack_q <= 0; + end else begin + ack_src_q <= async_ack_i; + ack_q <= ack_src_q; + end + end + + // Output assignments. + assign ready_o = (req_src_q == ack_q); + assign async_req_o = req_src_q; + assign async_data_o = data_src_q; + +endmodule + + +/// Half of the two-phase clock domain crossing located in the destination +/// domain. +module cdc_2phase_dst #( + parameter type T = logic +)( + input logic rst_ni, + input logic clk_i, + output T data_o, + output logic valid_o, + input logic ready_i, + input logic async_req_i, + output logic async_ack_o, + input T async_data_i +); + + (* dont_touch = "true" *) + (* async_reg = "true" *) + logic req_dst_q, req_q0, req_q1, ack_dst_q; + (* dont_touch = "true" *) + T data_dst_q; + + // The ack_dst register changes when a new data item is accepted. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + ack_dst_q <= 0; + end else if (valid_o && ready_i) begin + ack_dst_q <= ~ack_dst_q; + end + end + + // The data_dst register changes when a new data item is presented. This is + // indicated by the async_req line changing levels. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + data_dst_q <= '0; + end else if (req_q0 != req_q1 && !valid_o) begin + data_dst_q <= async_data_i; + end + end + + // The req_dst and req registers act as synchronization stages. + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + req_dst_q <= 0; + req_q0 <= 0; + req_q1 <= 0; + end else begin + req_dst_q <= async_req_i; + req_q0 <= req_dst_q; + req_q1 <= req_q0; + end + end + + // Output assignments. + assign valid_o = (ack_dst_q != req_q1); + assign data_o = data_dst_q; + assign async_ack_o = ack_dst_q; + +endmodule +/* verilator lint_on DECLFILENAME */ diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv b/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv new file mode 100644 index 00000000..9f35a44e --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/cf_math_pkg.sv @@ -0,0 +1,61 @@ +// Copyright 2016 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// cf_math_pkg: Constant Function Implementations of Mathematical Functions for HDL Elaboration +/// +/// This package contains a collection of mathematical functions that are commonly used when defining +/// the value of constants in HDL code. These functions are implemented as Verilog constants +/// functions. Introduced in Verilog 2001 (IEEE Std 1364-2001), a constant function (§ 10.3.5) is a +/// function whose value can be evaluated at compile time or during elaboration. A constant function +/// must be called with arguments that are constants. +package cf_math_pkg; + + /// Ceiled Division of Two Natural Numbers + /// + /// Returns the quotient of two natural numbers, rounded towards plus infinity. + function automatic integer ceil_div (input longint dividend, input longint divisor); + automatic longint remainder; + + // pragma translate_off + `ifndef VERILATOR + if (dividend < 0) begin + $fatal(1, "Dividend %0d is not a natural number!", dividend); + end + + if (divisor < 0) begin + $fatal(1, "Divisor %0d is not a natural number!", divisor); + end + + if (divisor == 0) begin + $fatal(1, "Division by zero!"); + end + `endif + // pragma translate_on + + remainder = dividend; + for (ceil_div = 0; remainder > 0; ceil_div++) begin + remainder = remainder - divisor; + end + endfunction + + /// Index width required to be able to represent up to `num_idx` indices as a binary + /// encoded signal. + /// Ensures that the minimum width if an index signal is `1`, regardless of parametrization. + /// + /// Sample usage in type definition: + /// As parameter: + /// `parameter type idx_t = logic[cf_math_pkg::idx_width(NumIdx)-1:0]` + /// As typedef: + /// `typedef logic [cf_math_pkg::idx_width(NumIdx)-1:0] idx_t` + function automatic integer unsigned idx_width (input integer unsigned num_idx); + return (num_idx > 32'd1) ? unsigned'($clog2(num_idx)) : 32'd1; + endfunction + +endpackage diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv new file mode 100644 index 00000000..43392e4b --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/counter.sv @@ -0,0 +1,43 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba +// Description: Generic up/down counter + +module counter #( + parameter int unsigned WIDTH = 4, + parameter bit STICKY_OVERFLOW = 1'b0 +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, // synchronous clear + input logic en_i, // enable the counter + input logic load_i, // load a new value + input logic down_i, // downcount, default is up + input logic [WIDTH-1:0] d_i, + output logic [WIDTH-1:0] q_o, + output logic overflow_o +); + delta_counter #( + .WIDTH (WIDTH), + .STICKY_OVERFLOW (STICKY_OVERFLOW) + ) i_counter ( + .clk_i, + .rst_ni, + .clear_i, + .en_i, + .load_i, + .down_i, + .delta_i({{WIDTH-1{1'b0}}, 1'b1}), + .d_i, + .q_o, + .overflow_o + ); +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv new file mode 100644 index 00000000..90b5cffa --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/delta_counter.sv @@ -0,0 +1,74 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Up/down counter with variable delta + +module delta_counter #( + parameter int unsigned WIDTH = 4, + parameter bit STICKY_OVERFLOW = 1'b0 +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, // synchronous clear + input logic en_i, // enable the counter + input logic load_i, // load a new value + input logic down_i, // downcount, default is up + input logic [WIDTH-1:0] delta_i, + input logic [WIDTH-1:0] d_i, + output logic [WIDTH-1:0] q_o, + output logic overflow_o +); + logic [WIDTH:0] counter_q, counter_d; + if (STICKY_OVERFLOW) begin : gen_sticky_overflow + logic overflow_d, overflow_q; + always_ff @(posedge clk_i or negedge rst_ni) overflow_q <= ~rst_ni ? 1'b0 : overflow_d; + always_comb begin + overflow_d = overflow_q; + if (clear_i || load_i) begin + overflow_d = 1'b0; + end else if (!overflow_q && en_i) begin + if (down_i) begin + overflow_d = delta_i > counter_q[WIDTH-1:0]; + end else begin + overflow_d = counter_q[WIDTH-1:0] > ({WIDTH{1'b1}} - delta_i); + end + end + end + assign overflow_o = overflow_q; + end else begin : gen_transient_overflow + // counter overflowed if the MSB is set + assign overflow_o = counter_q[WIDTH]; + end + assign q_o = counter_q[WIDTH-1:0]; + + always_comb begin + counter_d = counter_q; + + if (clear_i) begin + counter_d = '0; + end else if (load_i) begin + counter_d = {1'b0, d_i}; + end else if (en_i) begin + if (down_i) begin + counter_d = counter_q - delta_i; + end else begin + counter_d = counter_q + delta_i; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + counter_q <= '0; + end else begin + counter_q <= counter_d; + end + end +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv new file mode 100644 index 00000000..31295e80 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v1.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +/* verilator lint_off DECLFILENAME */ +module fifo #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter int unsigned THRESHOLD = 1, // fill count until when to assert threshold_o + parameter type dtype = logic [DATA_WIDTH-1:0] +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic threshold_o, // the FIFO is above the specified threshold + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + fifo_v2 #( + .FALL_THROUGH ( FALL_THROUGH ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( DEPTH ), + .ALM_FULL_TH ( THRESHOLD ), + .dtype ( dtype ) + ) impl ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .testmode_i ( testmode_i ), + .full_o ( full_o ), + .empty_o ( empty_o ), + .alm_full_o ( threshold_o ), + .alm_empty_o ( ), + .data_i ( data_i ), + .push_i ( push_i ), + .data_o ( data_o ), + .pop_i ( pop_i ) + ); +endmodule +/* verilator lint_on DECLFILENAME */ diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv new file mode 100644 index 00000000..9c87ed96 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/deprecated/fifo_v2.sv @@ -0,0 +1,79 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module fifo_v2 #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter int unsigned ALM_EMPTY_TH = 1, // almost empty threshold (when to assert alm_empty_o) + parameter int unsigned ALM_FULL_TH = 1, // almost full threshold (when to assert alm_full_o) + parameter type dtype = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic alm_full_o, // FIFO fillstate >= the specified threshold + output logic alm_empty_o, // FIFO fillstate <= the specified threshold + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + + logic [ADDR_DEPTH-1:0] usage; + + // generate threshold parameters + if (DEPTH == 0) begin + assign alm_full_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 + assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 + end else begin + assign alm_full_o = (usage >= ALM_FULL_TH[ADDR_DEPTH-1:0]); + assign alm_empty_o = (usage <= ALM_EMPTY_TH[ADDR_DEPTH-1:0]); + end + + fifo_v3 #( + .FALL_THROUGH ( FALL_THROUGH ), + .DATA_WIDTH ( DATA_WIDTH ), + .DEPTH ( DEPTH ), + .dtype ( dtype ) + ) i_fifo_v3 ( + .clk_i, + .rst_ni, + .flush_i, + .testmode_i, + .full_o, + .empty_o, + .usage_o (usage), + .data_i, + .push_i, + .data_o, + .pop_i + ); + + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (ALM_FULL_TH <= DEPTH) else $error("ALM_FULL_TH can't be larger than the DEPTH."); + assert (ALM_EMPTY_TH <= DEPTH) else $error("ALM_EMPTY_TH can't be larger than the DEPTH."); + end + `endif + // pragma translate_on + +endmodule // fifo_v2 diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv b/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv new file mode 100644 index 00000000..91dccb07 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/exp_backoff.sv @@ -0,0 +1,98 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 10.04.2019 +// Description: exponential backoff counter with randomization. +// +// For each failed trial (set_i pulsed), this unit exponentially increases the +// (average) backoff time by masking an LFSR with a shifted mask in order to +// create the backoff counter initial value. +// +// The shift register mask and the counter value are both reset to '0 in case of +// a successful trial (clr_i). +// + +module exp_backoff #( + /// Seed for 16bit LFSR + parameter int unsigned Seed = 'hffff, + /// 2**MaxExp-1 determines the maximum range from which random wait counts are drawn + parameter int unsigned MaxExp = 16 +) ( + input logic clk_i, + input logic rst_ni, + /// Sets the backoff counter (pulse) -> use when trial did not succeed + input logic set_i, + /// Clears the backoff counter (pulse) -> use when trial succeeded + input logic clr_i, + /// Indicates whether the backoff counter is equal to zero and a new trial can be launched + output logic is_zero_o +); + + // leave this constant + localparam int unsigned WIDTH = 16; + + logic [WIDTH-1:0] lfsr_d, lfsr_q, cnt_d, cnt_q, mask_d, mask_q; + logic lfsr; + + // generate random wait counts + // note: we use a flipped lfsr here to + // avoid strange correlation effects between + // the (left-shifted) mask and the lfsr + assign lfsr = lfsr_q[15-15] ^ + lfsr_q[15-13] ^ + lfsr_q[15-12] ^ + lfsr_q[15-10]; + + assign lfsr_d = (set_i) ? {lfsr, lfsr_q[$high(lfsr_q):1]} : + lfsr_q; + + // mask the wait counts with exponentially increasing mask (shift reg) + assign mask_d = (clr_i) ? '0 : + (set_i) ? {{(WIDTH-MaxExp){1'b0}},mask_q[MaxExp-2:0], 1'b1} : + mask_q; + + assign cnt_d = (clr_i) ? '0 : + (set_i) ? (mask_q & lfsr_q) : + (!is_zero_o) ? cnt_q - 1'b1 : '0; + + assign is_zero_o = (cnt_q=='0); + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + lfsr_q <= WIDTH'(Seed); + mask_q <= '0; + cnt_q <= '0; + end else begin + lfsr_q <= lfsr_d; + mask_q <= mask_d; + cnt_q <= cnt_d; + end + end + +/////////////////////////////////////////////////////// +// assertions +/////////////////////////////////////////////////////// + +//pragma translate_off +`ifndef VERILATOR + initial begin + // assert wrong parameterizations + assert (MaxExp>0) + else $fatal(1,"MaxExp must be greater than 0"); + assert (MaxExp<=16) + else $fatal(1,"MaxExp cannot be greater than 16"); + assert (Seed>0) + else $fatal(1,"Zero seed is not allowed for LFSR"); + end +`endif +//pragma translate_on + +endmodule // exp_backoff diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv b/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv new file mode 100644 index 00000000..11b77e02 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/fifo_v3.sv @@ -0,0 +1,191 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba + +module fifo_v3 #( + parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode + parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic + parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 + parameter type dtype = logic [DATA_WIDTH-1:0], + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the queue + input logic testmode_i, // test_mode to bypass clock gating + // status flags + output logic full_o, // queue is full + output logic empty_o, // queue is empty + output logic [ADDR_DEPTH-1:0] usage_o, // fill pointer + // as long as the queue is not full we can push new data + input dtype data_i, // data to push into the queue + input logic push_i, // data is valid and can be pushed to the queue + // as long as the queue is not empty we can pop new elements + output dtype data_o, // output data + input logic pop_i // pop head from queue +); + // local parameter + // FIFO depth - handle the case of pass-through, synthesizer will do constant propagation + localparam int unsigned FifoDepth = (DEPTH > 0) ? DEPTH : 1; + // clock gating control + logic gate_clock; + // pointer to the read and write section of the queue + logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q; + // keep a counter to keep track of the current queue status + // this integer will be truncated by the synthesis tool + logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q; + // actual memory + dtype [FifoDepth - 1:0] mem_n, mem_q; + + // fifo ram signals for fpga target + logic fifo_ram_we; + logic [ADDR_DEPTH-1:0] fifo_ram_read_address; + logic [ADDR_DEPTH-1:0] fifo_ram_write_address; + logic [$bits(dtype)-1:0] fifo_ram_wdata; + logic [$bits(dtype)-1:0] fifo_ram_rdata; + + assign usage_o = status_cnt_q[ADDR_DEPTH-1:0]; + + if (DEPTH == 0) begin : gen_pass_through + assign empty_o = ~push_i; + assign full_o = ~pop_i; + end else begin : gen_fifo + assign full_o = (status_cnt_q == FifoDepth[ADDR_DEPTH:0]); + assign empty_o = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i); + end + // status flags + + // read and write queue logic + always_comb begin : read_write_comb + // default assignment + read_pointer_n = read_pointer_q; + write_pointer_n = write_pointer_q; + status_cnt_n = status_cnt_q; + if (ariane_pkg::FPGA_EN) begin + fifo_ram_we = '0; + fifo_ram_read_address = read_pointer_q; + fifo_ram_write_address = '0; + fifo_ram_wdata = '0; + data_o = (DEPTH == 0) ? data_i : fifo_ram_rdata; + end else begin + data_o = (DEPTH == 0) ? data_i : mem_q[read_pointer_q]; + mem_n = mem_q; + gate_clock = 1'b1; + end + + // push a new element to the queue + if (push_i && ~full_o) begin + if (ariane_pkg::FPGA_EN) begin + fifo_ram_we = 1'b1; + fifo_ram_write_address = write_pointer_q; + fifo_ram_wdata = data_i; + end else begin + // push the data onto the queue + mem_n[write_pointer_q] = data_i; + // un-gate the clock, we want to write something + gate_clock = 1'b0; + end + + // increment the write counter + if (write_pointer_q == FifoDepth[ADDR_DEPTH-1:0] - 1) + write_pointer_n = '0; + else + write_pointer_n = write_pointer_q + 1; + // increment the overall counter + status_cnt_n = status_cnt_q + 1; + end + + if (pop_i && ~empty_o) begin + // read from the queue is a default assignment + // but increment the read pointer... + if (read_pointer_n == FifoDepth[ADDR_DEPTH-1:0] - 1) + read_pointer_n = '0; + else + read_pointer_n = read_pointer_q + 1; + // ... and decrement the overall count + status_cnt_n = status_cnt_q - 1; + end + + // keep the count pointer stable if we push and pop at the same time + if (push_i && pop_i && ~full_o && ~empty_o) + status_cnt_n = status_cnt_q; + + // FIFO is in pass through mode -> do not change the pointers + if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin + data_o = data_i; + if (pop_i) begin + status_cnt_n = status_cnt_q; + read_pointer_n = read_pointer_q; + write_pointer_n = write_pointer_q; + end + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + if (flush_i) begin + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + read_pointer_q <= read_pointer_n; + write_pointer_q <= write_pointer_n; + status_cnt_q <= status_cnt_n; + end + end + end + + if (ariane_pkg::FPGA_EN) begin : gen_fpga_queue + AsyncDpRam #( + .ADDR_WIDTH (ADDR_DEPTH), + .DATA_DEPTH (DEPTH), + .DATA_WIDTH ($bits(dtype)) + ) fifo_ram ( + .Clk_CI ( clk_i ), + .WrEn_SI ( fifo_ram_we ), + .RdAddr_DI ( fifo_ram_read_address ), + .WrAddr_DI ( fifo_ram_write_address ), + .WrData_DI ( fifo_ram_wdata ), + .RdData_DO ( fifo_ram_rdata ) + ); + end else begin : gen_asic_queue + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + mem_q <= '0; + end else if (!gate_clock) begin + mem_q <= mem_n; + end + end + end + +// pragma translate_off +`ifndef VERILATOR + initial begin + assert (DEPTH > 0) else $error("DEPTH must be greater than 0."); + end + + full_write : assert property( + @(posedge clk_i) disable iff (~rst_ni) (full_o |-> ~push_i)) + else $fatal (1, "Trying to push new data although the FIFO is full."); + + empty_read : assert property( + @(posedge clk_i) disable iff (~rst_ni) (empty_o |-> ~pop_i)) + else $fatal (1, "Trying to pop data although the FIFO is empty."); +`endif +// pragma translate_on + +endmodule // fifo_v3 diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv new file mode 100644 index 00000000..aae2e2df --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr.sv @@ -0,0 +1,315 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 26.04.2019 +// +// Description: This is a parametric LFSR with precomputed coefficients for +// LFSR lengths from 4 to 64bit. + +// Additional block cipher layers can be instantiated to non-linearly transform +// the pseudo-random LFSR sequence at the output, and hence break the shifting +// patterns. The additional cipher layers can only be used for an LFSR width +// of 64bit, since the block cipher has been designed for that block length. + +module lfsr #( + parameter int unsigned LfsrWidth = 64, // [4,64] + parameter int unsigned OutWidth = 8, // [1,LfsrWidth] + parameter logic [LfsrWidth-1:0] RstVal = '1, // [1,2^LfsrWidth-1] + // 0: disabled, the present cipher uses 31, but just a few layers (1-3) are enough + // to break linear shifting patterns + parameter int unsigned CipherLayers = 0, + parameter bit CipherReg = 1'b1 // additional output reg after cipher +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [OutWidth-1:0] out_o +); + +// Galois LFSR feedback masks +// Automatically generated with get_lfsr_masks.py +// Masks are from https://users.ece.cmu.edu/~koopman/lfsr/ +localparam logic [63:0] Masks [4:64] = '{64'hC, + 64'h1E, + 64'h39, + 64'h7E, + 64'hFA, + 64'h1FD, + 64'h3FC, + 64'h64B, + 64'hD8F, + 64'h1296, + 64'h2496, + 64'h4357, + 64'h8679, + 64'h1030E, + 64'h206CD, + 64'h403FE, + 64'h807B8, + 64'h1004B2, + 64'h2006A8, + 64'h4004B2, + 64'h800B87, + 64'h10004F3, + 64'h200072D, + 64'h40006AE, + 64'h80009E3, + 64'h10000583, + 64'h20000C92, + 64'h400005B6, + 64'h80000EA6, + 64'h1000007A3, + 64'h200000ABF, + 64'h400000842, + 64'h80000123E, + 64'h100000074E, + 64'h2000000AE9, + 64'h400000086A, + 64'h8000001213, + 64'h1000000077E, + 64'h2000000123B, + 64'h40000000877, + 64'h8000000108D, + 64'h100000000AE9, + 64'h200000000E9F, + 64'h4000000008A6, + 64'h80000000191E, + 64'h100000000090E, + 64'h2000000000FB3, + 64'h4000000000D7D, + 64'h80000000016A5, + 64'h10000000000B4B, + 64'h200000000010AF, + 64'h40000000000DDE, + 64'h8000000000181A, + 64'h100000000000B65, + 64'h20000000000102D, + 64'h400000000000CD5, + 64'h8000000000024C1, + 64'h1000000000000EF6, + 64'h2000000000001363, + 64'h4000000000000FCD, + 64'h80000000000019E2}; + +// this S-box and permutation P has been taken from the Present Cipher, +// a super lightweight block cipher. use the cipher layers to add additional +// non-linearity to the LFSR output. note one layer does not fully correspond +// to the present cipher round, since the key and rekeying function is not applied here. +// +// See also: +// "PRESENT: An Ultra-Lightweight Block Cipher", A. Bogdanov et al., Ches 2007 +// http://www.lightweightcrypto.org/present/present_ches2007.pdf + +// this is the sbox from the present cipher +localparam logic[15:0][3:0] Sbox4 = {4'h2, 4'h1, 4'h7, 4'h4, + 4'h8, 4'hF, 4'hE, 4'h3, + 4'hD, 4'hA, 4'h0, 4'h9, + 4'hB, 4'h6, 4'h5, 4'hC }; + +// these are the permutation indices of the present cipher +localparam logic[63:0][5:0] Perm = {6'd63, 6'd47, 6'd31, 6'd15, 6'd62, 6'd46, 6'd30, 6'd14, + 6'd61, 6'd45, 6'd29, 6'd13, 6'd60, 6'd44, 6'd28, 6'd12, + 6'd59, 6'd43, 6'd27, 6'd11, 6'd58, 6'd42, 6'd26, 6'd10, + 6'd57, 6'd41, 6'd25, 6'd09, 6'd56, 6'd40, 6'd24, 6'd08, + 6'd55, 6'd39, 6'd23, 6'd07, 6'd54, 6'd38, 6'd22, 6'd06, + 6'd53, 6'd37, 6'd21, 6'd05, 6'd52, 6'd36, 6'd20, 6'd04, + 6'd51, 6'd35, 6'd19, 6'd03, 6'd50, 6'd34, 6'd18, 6'd02, + 6'd49, 6'd33, 6'd17, 6'd01, 6'd48, 6'd32, 6'd16, 6'd00}; + + +function automatic logic [63:0] sbox4_layer(logic [63:0] in); + logic [63:0] out; + //for (logic [4:0] j = '0; j<16; j++) out[j*4 +: 4] = sbox4[in[j*4 +: 4]]; + // this simulates much faster than the loop + out[0*4 +: 4] = Sbox4[in[0*4 +: 4]]; + out[1*4 +: 4] = Sbox4[in[1*4 +: 4]]; + out[2*4 +: 4] = Sbox4[in[2*4 +: 4]]; + out[3*4 +: 4] = Sbox4[in[3*4 +: 4]]; + + out[4*4 +: 4] = Sbox4[in[4*4 +: 4]]; + out[5*4 +: 4] = Sbox4[in[5*4 +: 4]]; + out[6*4 +: 4] = Sbox4[in[6*4 +: 4]]; + out[7*4 +: 4] = Sbox4[in[7*4 +: 4]]; + + out[8*4 +: 4] = Sbox4[in[8*4 +: 4]]; + out[9*4 +: 4] = Sbox4[in[9*4 +: 4]]; + out[10*4 +: 4] = Sbox4[in[10*4 +: 4]]; + out[11*4 +: 4] = Sbox4[in[11*4 +: 4]]; + + out[12*4 +: 4] = Sbox4[in[12*4 +: 4]]; + out[13*4 +: 4] = Sbox4[in[13*4 +: 4]]; + out[14*4 +: 4] = Sbox4[in[14*4 +: 4]]; + out[15*4 +: 4] = Sbox4[in[15*4 +: 4]]; + return out; +endfunction : sbox4_layer + +function automatic logic [63:0] perm_layer(logic [63:0] in); + logic [63:0] out; + // for (logic [7:0] j = '0; j<64; j++) out[perm[j]] = in[j]; + // this simulates much faster than the loop + out[Perm[0]] = in[0]; + out[Perm[1]] = in[1]; + out[Perm[2]] = in[2]; + out[Perm[3]] = in[3]; + out[Perm[4]] = in[4]; + out[Perm[5]] = in[5]; + out[Perm[6]] = in[6]; + out[Perm[7]] = in[7]; + out[Perm[8]] = in[8]; + out[Perm[9]] = in[9]; + + out[Perm[10]] = in[10]; + out[Perm[11]] = in[11]; + out[Perm[12]] = in[12]; + out[Perm[13]] = in[13]; + out[Perm[14]] = in[14]; + out[Perm[15]] = in[15]; + out[Perm[16]] = in[16]; + out[Perm[17]] = in[17]; + out[Perm[18]] = in[18]; + out[Perm[19]] = in[19]; + + out[Perm[20]] = in[20]; + out[Perm[21]] = in[21]; + out[Perm[22]] = in[22]; + out[Perm[23]] = in[23]; + out[Perm[24]] = in[24]; + out[Perm[25]] = in[25]; + out[Perm[26]] = in[26]; + out[Perm[27]] = in[27]; + out[Perm[28]] = in[28]; + out[Perm[29]] = in[29]; + + out[Perm[30]] = in[30]; + out[Perm[31]] = in[31]; + out[Perm[32]] = in[32]; + out[Perm[33]] = in[33]; + out[Perm[34]] = in[34]; + out[Perm[35]] = in[35]; + out[Perm[36]] = in[36]; + out[Perm[37]] = in[37]; + out[Perm[38]] = in[38]; + out[Perm[39]] = in[39]; + + out[Perm[40]] = in[40]; + out[Perm[41]] = in[41]; + out[Perm[42]] = in[42]; + out[Perm[43]] = in[43]; + out[Perm[44]] = in[44]; + out[Perm[45]] = in[45]; + out[Perm[46]] = in[46]; + out[Perm[47]] = in[47]; + out[Perm[48]] = in[48]; + out[Perm[49]] = in[49]; + + out[Perm[50]] = in[50]; + out[Perm[51]] = in[51]; + out[Perm[52]] = in[52]; + out[Perm[53]] = in[53]; + out[Perm[54]] = in[54]; + out[Perm[55]] = in[55]; + out[Perm[56]] = in[56]; + out[Perm[57]] = in[57]; + out[Perm[58]] = in[58]; + out[Perm[59]] = in[59]; + + out[Perm[60]] = in[60]; + out[Perm[61]] = in[61]; + out[Perm[62]] = in[62]; + out[Perm[63]] = in[63]; + return out; +endfunction : perm_layer + +//////////////////////////////////////////////////////////////////////// +// lfsr +//////////////////////////////////////////////////////////////////////// + +logic [LfsrWidth-1:0] lfsr_d, lfsr_q; +assign lfsr_d = + (en_i) ? (lfsr_q>>1) ^ ({LfsrWidth{lfsr_q[0]}} & Masks[LfsrWidth][LfsrWidth-1:0]) : lfsr_q; + +always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + //$display("%b %h", en_i, lfsr_d); + if (!rst_ni) begin + lfsr_q <= LfsrWidth'(RstVal); + end else begin + lfsr_q <= lfsr_d; + end +end + +//////////////////////////////////////////////////////////////////////// +// block cipher layers +//////////////////////////////////////////////////////////////////////// + +if (CipherLayers > unsigned'(0)) begin : g_cipher_layers + logic [63:0] ciph_layer; + localparam int unsigned NumRepl = ((64+LfsrWidth)/LfsrWidth); + + always_comb begin : p_ciph_layer + automatic logic [63:0] tmp; + tmp = 64'({NumRepl{lfsr_q}}); + for(int unsigned k = 0; k < CipherLayers; k++) begin + tmp = perm_layer(sbox4_layer(tmp)); + end + ciph_layer = tmp; + end + + // additiona output reg after cipher + if (CipherReg) begin : g_cipher_reg + logic [OutWidth-1:0] out_d, out_q; + + assign out_d = (en_i) ? ciph_layer[OutWidth-1:0] : out_q; + assign out_o = out_q[OutWidth-1:0]; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if (!rst_ni) begin + out_q <= '0; + end else begin + out_q <= out_d; + end + end + // no outreg + end else begin : g_no_out_reg + assign out_o = ciph_layer[OutWidth-1:0]; + end + +// no block cipher +end else begin : g_no_cipher_layers + assign out_o = lfsr_q[OutWidth-1:0]; +end + +//////////////////////////////////////////////////////////////////////// +// assertions +//////////////////////////////////////////////////////////////////////// + +// pragma translate_off +initial begin + // these are the LUT limits + assert(OutWidth <= LfsrWidth) else + $fatal(1,"OutWidth must be smaller equal the LfsrWidth."); + assert(RstVal > unsigned'(0)) else + $fatal(1,"RstVal must be nonzero."); + assert((LfsrWidth >= $low(Masks)) && (LfsrWidth <= $high(Masks))) else + $fatal(1,"Unsupported LfsrWidth."); + assert(Masks[LfsrWidth][LfsrWidth-1]) else + $fatal(1, "LFSR mask is not correct. The MSB must be 1." ); + assert((CipherLayers > 0) && (LfsrWidth == 64) || (CipherLayers == 0)) else + $fatal(1, "Use additional cipher layers only in conjunction with an LFSR width of 64 bit." ); +end + +`ifndef VERILATOR + all_zero: assert property ( + @(posedge clk_i) disable iff (!rst_ni) en_i |-> lfsr_d) + else $fatal(1,"Lfsr must not be all-zero."); +`endif +// pragma translate_on + +endmodule // lfsr diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv new file mode 100644 index 00000000..3fc93c77 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_16bit.sv @@ -0,0 +1,68 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba, ETH Zurich +// Date: 5.11.2018 +// Description: 16-bit LFSR + +// -------------- +// 16-bit LFSR +// -------------- +// +// Description: Shift register +// +module lfsr_16bit #( + parameter logic [15:0] SEED = 8'b0, + parameter int unsigned WIDTH = 16 +)( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [WIDTH-1:0] refill_way_oh, + output logic [$clog2(WIDTH)-1:0] refill_way_bin +); + + localparam int unsigned LogWidth = $clog2(WIDTH); + + logic [15:0] shift_d, shift_q; + + + always_comb begin + + automatic logic shift_in; + shift_in = !(shift_q[15] ^ shift_q[12] ^ shift_q[5] ^ shift_q[1]); + + shift_d = shift_q; + + if (en_i) + shift_d = {shift_q[14:0], shift_in}; + + // output assignment + refill_way_oh = 'b0; + refill_way_oh[shift_q[LogWidth-1:0]] = 1'b1; + refill_way_bin = shift_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ + if(~rst_ni) begin + shift_q <= SEED; + end else begin + shift_q <= shift_d; + end + end + + //pragma translate_off + initial begin + assert (WIDTH <= 16) + else $fatal(1, "WIDTH needs to be less than 16 because of the 16-bit LFSR"); + end + //pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv new file mode 100644 index 00000000..60fdf19f --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/lfsr_8bit.sv @@ -0,0 +1,61 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Igor Loi - University of Bologna +// Author: Florian Zaruba, ETH Zurich +// Date: 12.11.2017 +// Description: 8-bit LFSR + +/// 8 bit Linear Feedback Shift register +module lfsr_8bit #( + parameter logic [7:0] SEED = 8'b0, + parameter int unsigned WIDTH = 8 +) ( + input logic clk_i, + input logic rst_ni, + input logic en_i, + output logic [ WIDTH-1:0] refill_way_oh, + output logic [$clog2(WIDTH)-1:0] refill_way_bin +); + + localparam int unsigned LogWidth = $clog2(WIDTH); + + logic [7:0] shift_d, shift_q; + + always_comb begin + + automatic logic shift_in; + shift_in = !(shift_q[7] ^ shift_q[3] ^ shift_q[2] ^ shift_q[1]); + + shift_d = shift_q; + + if (en_i) shift_d = {shift_q[6:0], shift_in}; + + // output assignment + refill_way_oh = 'b0; + refill_way_oh[shift_q[LogWidth - 1:0]] = 1'b1; + refill_way_bin = shift_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ + if (~rst_ni) begin + shift_q <= SEED; + end else begin + shift_q <= shift_d; + end + end + + //pragma translate_off + initial begin + assert (WIDTH <= 8) else $fatal(1, "WIDTH needs to be less than 8 because of the 8-bit LFSR"); + end + //pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv b/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv new file mode 100644 index 00000000..424eb2ef --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/lzc.sv @@ -0,0 +1,112 @@ +// Copyright (c) 2018 - 2019 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. + +/// A trailing zero counter / leading zero counter. +/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB) +/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB) +/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains +/// the maximum number of zeros - 1. For example: +/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0) +/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0) +/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0) +/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only). +/// This speeds up simulation significantly. +module lzc #( + /// The width of the input vector. + parameter int unsigned WIDTH = 2, + /// Mode selection: 0 -> trailing zero, 1 -> leading zero + parameter bit MODE = 1'b0, + /// Dependent parameter. Do **not** change! + /// + /// Width of the output signal with the zero count. + parameter int unsigned CNT_WIDTH = cf_math_pkg::idx_width(WIDTH) +) ( + /// Input vector to be counted. + input logic [WIDTH-1:0] in_i, + /// Count of the leading / trailing zeros. + output logic [CNT_WIDTH-1:0] cnt_o, + /// Counter is empty: Asserted if all bits in in_i are zero. + output logic empty_o +); + + if (WIDTH == 1) begin : gen_degenerate_lzc + + assign cnt_o[0] = !in_i[0]; + assign empty_o = !in_i[0]; + + end else begin : gen_lzc + + localparam int unsigned NumLevels = $clog2(WIDTH); + + // pragma translate_off + initial begin + assert(WIDTH > 0) else $fatal(1, "input must be at least one bit wide"); + end + // pragma translate_on + + logic [WIDTH-1:0][NumLevels-1:0] index_lut; + logic [2**NumLevels-1:0] sel_nodes; + logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + // reverse vector if required + always_comb begin : flip_vector + for (int unsigned i = 0; i < WIDTH; i++) begin + in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i]; + end + end + + for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut + assign index_lut[j] = (NumLevels)'(unsigned'(j)); + end + + for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : g_levels + if (unsigned'(level) == NumLevels - 1) begin : g_last_level + for (genvar k = 0; k < 2 ** level; k++) begin : g_level + // if two successive indices are still in the vector... + if (unsigned'(k) * 2 < WIDTH - 1) begin : g_reduce + assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2] | in_tmp[k * 2 + 1]; + assign index_nodes[2 ** level - 1 + k] = (in_tmp[k * 2] == 1'b1) + ? index_lut[k * 2] : + index_lut[k * 2 + 1]; + end + // if only the first index is still in the vector... + if (unsigned'(k) * 2 == WIDTH - 1) begin : g_base + assign sel_nodes[2 ** level - 1 + k] = in_tmp[k * 2]; + assign index_nodes[2 ** level - 1 + k] = index_lut[k * 2]; + end + // if index is out of range + if (unsigned'(k) * 2 > WIDTH - 1) begin : g_out_of_range + assign sel_nodes[2 ** level - 1 + k] = 1'b0; + assign index_nodes[2 ** level - 1 + k] = '0; + end + end + end else begin : g_not_last_level + for (genvar l = 0; l < 2 ** level; l++) begin : g_level + assign sel_nodes[2 ** level - 1 + l] = + sel_nodes[2 ** (level + 1) - 1 + l * 2] | sel_nodes[2 ** (level + 1) - 1 + l * 2 + 1]; + assign index_nodes[2 ** level - 1 + l] = (sel_nodes[2 ** (level + 1) - 1 + l * 2] == 1'b1) + ? index_nodes[2 ** (level + 1) - 1 + l * 2] : + index_nodes[2 ** (level + 1) - 1 + l * 2 + 1]; + end + end + end + + assign cnt_o = NumLevels > unsigned'(0) ? index_nodes[0] : {($clog2(WIDTH)) {1'b0}}; + assign empty_o = NumLevels > unsigned'(0) ? ~sel_nodes[0] : ~(|in_i); + + end : gen_lzc + +endmodule : lzc diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv b/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv new file mode 100644 index 00000000..72b9b71f --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/popcount.sv @@ -0,0 +1,60 @@ +// Copyright (C) 2013-2018 ETH Zurich, University of Bologna +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Manuel Eggimann + +// Description: This module calculates the hamming weight (number of ones) in +// its input vector using a balanced binary adder tree. Recursive instantiation +// is used to build the tree. Any unsigned INPUT_WIDTH larger or equal 2 is +// legal. The module pads the signal internally to the next power of two. The +// output result width is ceil(log2(INPUT_WIDTH))+1. + +module popcount #( + parameter int unsigned INPUT_WIDTH = 256, + localparam int unsigned PopcountWidth = $clog2(INPUT_WIDTH)+1 +) ( + input logic [INPUT_WIDTH-1:0] data_i, + output logic [PopcountWidth-1:0] popcount_o +); + + localparam int unsigned PaddedWidth = 1 << $clog2(INPUT_WIDTH); + + logic [PaddedWidth-1:0] padded_input; + logic [PopcountWidth-2:0] left_child_result, right_child_result; + + //Zero pad the input to next power of two + always_comb begin + padded_input = '0; + padded_input[INPUT_WIDTH-1:0] = data_i; + end + + //Recursive instantiation to build binary adder tree + if (INPUT_WIDTH == 1) begin : single_node + assign left_child_result = 1'b0; + assign right_child_result = padded_input[0]; + end else if (INPUT_WIDTH == 2) begin : leaf_node + assign left_child_result = padded_input[1]; + assign right_child_result = padded_input[0]; + end else begin : non_leaf_node + popcount #(.INPUT_WIDTH(PaddedWidth / 2)) + left_child( + .data_i(padded_input[PaddedWidth-1:PaddedWidth/2]), + .popcount_o(left_child_result)); + + popcount #(.INPUT_WIDTH(PaddedWidth / 2)) + right_child( + .data_i(padded_input[PaddedWidth/2-1:0]), + .popcount_o(right_child_result)); + end + + //Output assignment + assign popcount_o = left_child_result + right_child_result; + +endmodule : popcount diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv new file mode 100644 index 00000000..90301c82 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/rr_arb_tree.sv @@ -0,0 +1,348 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Michael Schaffner , ETH Zurich +// Wolfgang Roenninger , ETH Zurich +// Date: 02.04.2019 +// Description: logarithmic arbitration tree with round robin arbitration scheme. + +/// The rr_arb_tree employs non-starving round robin-arbitration - i.e., the priorities +/// rotate each cycle. +/// +/// ## Fair vs. unfair Arbitration +/// +/// This refers to fair throughput distribution when not all inputs have active requests. +/// This module has an internal state `rr_q` which defines the highest priority input. (When +/// `ExtPrio` is `1'b1` this state is provided from the outside.) The arbitration tree will +/// choose the input with the same index as currently defined by the state if it has an active +/// request. Otherwise a *random* other active input is selected. The parameter `FairArb` is used +/// to distinguish between two methods of calculating the next state. +/// * `1'b0`: The next state is calculated by advancing the current state by one. This leads to the +/// state being calculated without the context of the active request. Leading to an +/// unfair throughput distribution if not all inputs have active requests. +/// * `1'b1`: The next state jumps to the next unserved request with higher index. +/// This is achieved by using two trailing-zero-counters (`lzc`). The upper has the masked +/// `req_i` signal with all indices which will have a higher priority in the next state. +/// The trailing zero count defines the input index with the next highest priority after +/// the current one is served. When the upper is empty the lower `lzc` provides the +/// wrapped index if there are outstanding requests with lower or same priority. +/// The implication of throughput fairness on the module timing are: +/// * The trailing zero counter (`lzc`) has a loglog relation of input to output timing. This means +/// that in this module the input to register path scales with Log(Log(`NumIn`)). +/// * The `rr_arb_tree` data multiplexing scales with Log(`NumIn`). This means that the input to output +/// timing path of this module also scales scales with Log(`NumIn`). +/// This implies that in this module the input to output path is always longer than the input to +/// register path. As the output data usually also terminates in a register the parameter `FairArb` +/// only has implications on the area. When it is `1'b0` a static plus one adder is instantiated. +/// If it is `1'b1` two `lzc`, a masking logic stage and a two input multiplexer are instantiated. +/// However these are small in respect of the data multiplexers needed, as the width of the `req_i` +/// signal is usually less as than `DataWidth`. +module rr_arb_tree #( + /// Number of inputs to be arbitrated. + parameter int unsigned NumIn = 64, + /// Data width of the payload in bits. Not needed if `DataType` is overwritten. + parameter int unsigned DataWidth = 32, + /// Data type of the payload, can be overwritten with custom type. Only use of `DataWidth`. + parameter type DataType = logic [DataWidth-1:0], + /// The `ExtPrio` option allows to override the internal round robin counter via the + /// `rr_i` signal. This can be useful in case multiple arbiters need to have + /// rotating priorities that are operating in lock-step. If static priority arbitration + /// is needed, just connect `rr_i` to '0. + /// + /// Set to 1'b1 to enable. + parameter bit ExtPrio = 1'b0, + /// If `AxiVldRdy` is set, the req/gnt signals are compliant with the AXI style vld/rdy + /// handshake. Namely, upstream vld (req) must not depend on rdy (gnt), as it can be deasserted + /// again even though vld is asserted. Enabling `AxiVldRdy` leads to a reduction of arbiter + /// delay and area. + /// + /// Set to `1'b1` to treat req/gnt as vld/rdy. + parameter bit AxiVldRdy = 1'b0, + /// The `LockIn` option prevents the arbiter from changing the arbitration + /// decision when the arbiter is disabled. I.e., the index of the first request + /// that wins the arbitration will be locked in case the destination is not + /// able to grant the request in the same cycle. + /// + /// Set to `1'b1` to enable. + parameter bit LockIn = 1'b0, + /// When set, ensures that throughput gets distributed evenly between all inputs. + /// + /// Set to `1'b0` to disable. + parameter bit FairArb = 1'b1, + /// Dependent parameter, do **not** overwrite. + /// Width of the arbitration priority signal and the arbitrated index. + parameter int unsigned IdxWidth = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1, + /// Dependent parameter, do **not** overwrite. + /// Type for defining the arbitration priority and arbitrated index signal. + parameter type idx_t = logic [IdxWidth-1:0] +) ( + /// Clock, positive edge triggered. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// Clears the arbiter state. Only used if `ExtPrio` is `1'b0` or `LockIn` is `1'b1`. + input logic flush_i, + /// External round-robin priority. Only used if `ExtPrio` is `1'b1.` + input idx_t rr_i, + /// Input requests arbitration. + input logic [NumIn-1:0] req_i, + /* verilator lint_off UNOPTFLAT */ + /// Input request is granted. + output logic [NumIn-1:0] gnt_o, + /* verilator lint_on UNOPTFLAT */ + /// Input data for arbitration. + input DataType [NumIn-1:0] data_i, + /// Output request is valid. + output logic req_o, + /// Output request is granted. + input logic gnt_i, + /// Output data. + output DataType data_o, + /// Index from which input the data came from. + output idx_t idx_o +); + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + // Default SVA reset + default disable iff (!rst_ni || flush_i); + `endif + `endif + // pragma translate_on + + // just pass through in this corner case + if (NumIn == unsigned'(1)) begin : gen_pass_through + assign req_o = req_i[0]; + assign gnt_o[0] = gnt_i; + assign data_o = data_i[0]; + assign idx_o = '0; + // non-degenerate cases + end else begin : gen_arbiter + localparam int unsigned NumLevels = unsigned'($clog2(NumIn)); + + /* verilator lint_off UNOPTFLAT */ + idx_t [2**NumLevels-2:0] index_nodes; // used to propagate the indices + DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data + logic [2**NumLevels-2:0] gnt_nodes; // used to propagate the grant to masters + logic [2**NumLevels-2:0] req_nodes; // used to propagate the requests to slave + /* lint_off */ + idx_t rr_q; + logic [NumIn-1:0] req_d; + + // the final arbitration decision can be taken from the root of the tree + assign req_o = req_nodes[0]; + assign data_o = data_nodes[0]; + assign idx_o = index_nodes[0]; + + if (ExtPrio) begin : gen_ext_rr + assign rr_q = rr_i; + assign req_d = req_i; + end else begin : gen_int_rr + idx_t rr_d; + + // lock arbiter decision in case we got at least one req and no acknowledge + if (LockIn) begin : gen_lock + logic lock_d, lock_q; + logic [NumIn-1:0] req_q; + + assign lock_d = req_o & ~gnt_i; + assign req_d = (lock_q) ? req_q : req_i; + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_lock_reg + if (!rst_ni) begin + lock_q <= '0; + end else begin + if (flush_i) begin + lock_q <= '0; + end else begin + lock_q <= lock_d; + end + end + end + + // pragma translate_off + `ifndef VERILATOR + lock: assert property( + @(posedge clk_i) LockIn |-> req_o && + (!gnt_i && !flush_i) |=> idx_o == $past(idx_o)) else + $fatal (1, "Lock implies same arbiter decision in next cycle if output is not \ + ready."); + + logic [NumIn-1:0] req_tmp; + assign req_tmp = req_q & req_i; + lock_req: assume property( + @(posedge clk_i) LockIn |-> lock_d |=> req_tmp == req_q) else + $fatal (1, "It is disallowed to deassert unserved request signals when LockIn is \ + enabled."); + `endif + // pragma translate_on + + always_ff @(posedge clk_i or negedge rst_ni) begin : p_req_regs + if (!rst_ni) begin + req_q <= '0; + end else begin + if (flush_i) begin + req_q <= '0; + end else begin + req_q <= req_d; + end + end + end + end else begin : gen_no_lock + assign req_d = req_i; + end + + if (FairArb) begin : gen_fair_arb + logic [NumIn-1:0] upper_mask, lower_mask; + idx_t upper_idx, lower_idx, next_idx; + logic upper_empty, lower_empty; + + for (genvar i = 0; i < NumIn; i++) begin : gen_mask + assign upper_mask[i] = (i > rr_q) ? req_d[i] : 1'b0; + assign lower_mask[i] = (i <= rr_q) ? req_d[i] : 1'b0; + end + + lzc #( + .WIDTH ( NumIn ), + .MODE ( 1'b0 ) + ) i_lzc_upper ( + .in_i ( upper_mask ), + .cnt_o ( upper_idx ), + .empty_o ( upper_empty ) + ); + + lzc #( + .WIDTH ( NumIn ), + .MODE ( 1'b0 ) + ) i_lzc_lower ( + .in_i ( lower_mask ), + .cnt_o ( lower_idx ), + .empty_o ( /*unused*/ ) + ); + + assign next_idx = upper_empty ? lower_idx : upper_idx; + assign rr_d = (gnt_i && req_o) ? next_idx : rr_q; + + end else begin : gen_unfair_arb + assign rr_d = (gnt_i && req_o) ? ((rr_q == idx_t'(NumIn-1)) ? '0 : rr_q + 1'b1) : rr_q; + end + + // this holds the highest priority + always_ff @(posedge clk_i or negedge rst_ni) begin : p_rr_regs + if (!rst_ni) begin + rr_q <= '0; + end else begin + if (flush_i) begin + rr_q <= '0; + end else begin + rr_q <= rr_d; + end + end + end + end + + assign gnt_nodes[0] = gnt_i; + + // arbiter tree + for (genvar level = 0; unsigned'(level) < NumLevels; level++) begin : gen_levels + for (genvar l = 0; l < 2**level; l++) begin : gen_level + // local select signal + logic sel; + // index calcs + localparam int unsigned Idx0 = 2**level-1+l;// current node + localparam int unsigned Idx1 = 2**(level+1)-1+l*2; + ////////////////////////////////////////////////////////////// + // uppermost level where data is fed in from the inputs + if (unsigned'(level) == NumLevels-1) begin : gen_first_level + // if two successive indices are still in the vector... + if (unsigned'(l) * 2 < NumIn-1) begin : gen_reduce + assign req_nodes[Idx0] = req_d[l*2] | req_d[l*2+1]; + + // arbitration: round robin + assign sel = ~req_d[l*2] | req_d[l*2+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[Idx0] = idx_t'(sel); + assign data_nodes[Idx0] = (sel) ? data_i[l*2+1] : data_i[l*2]; + assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]) & ~sel; + assign gnt_o[l*2+1] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2+1]) & sel; + end + // if only the first index is still in the vector... + if (unsigned'(l) * 2 == NumIn-1) begin : gen_first + assign req_nodes[Idx0] = req_d[l*2]; + assign index_nodes[Idx0] = '0;// always zero in this case + assign data_nodes[Idx0] = data_i[l*2]; + assign gnt_o[l*2] = gnt_nodes[Idx0] & (AxiVldRdy | req_d[l*2]); + end + // if index is out of range, fill up with zeros (will get pruned) + if (unsigned'(l) * 2 > NumIn-1) begin : gen_out_of_range + assign req_nodes[Idx0] = 1'b0; + assign index_nodes[Idx0] = idx_t'('0); + assign data_nodes[Idx0] = DataType'('0); + end + ////////////////////////////////////////////////////////////// + // general case for other levels within the tree + end else begin : gen_other_levels + assign req_nodes[Idx0] = req_nodes[Idx1] | req_nodes[Idx1+1]; + + // arbitration: round robin + assign sel = ~req_nodes[Idx1] | req_nodes[Idx1+1] & rr_q[NumLevels-1-level]; + + assign index_nodes[Idx0] = (sel) ? + idx_t'({1'b1, index_nodes[Idx1+1][NumLevels-unsigned'(level)-2:0]}) : + idx_t'({1'b0, index_nodes[Idx1][NumLevels-unsigned'(level)-2:0]}); + + assign data_nodes[Idx0] = (sel) ? data_nodes[Idx1+1] : data_nodes[Idx1]; + assign gnt_nodes[Idx1] = gnt_nodes[Idx0] & ~sel; + assign gnt_nodes[Idx1+1] = gnt_nodes[Idx0] & sel; + end + ////////////////////////////////////////////////////////////// + end + end + + // pragma translate_off + `ifndef VERILATOR + `ifndef XSIM + initial begin : p_assert + assert(NumIn) + else $fatal(1, "Input must be at least one element wide."); + assert(!(LockIn && ExtPrio)) + else $fatal(1,"Cannot use LockIn feature together with external ExtPrio."); + end + + hot_one : assert property( + @(posedge clk_i) $onehot0(gnt_o)) + else $fatal (1, "Grant signal must be hot1 or zero."); + + gnt0 : assert property( + @(posedge clk_i) |gnt_o |-> gnt_i) + else $fatal (1, "Grant out implies grant in."); + + gnt1 : assert property( + @(posedge clk_i) req_o |-> gnt_i |-> |gnt_o) + else $fatal (1, "Req out and grant in implies grant out."); + + gnt_idx : assert property( + @(posedge clk_i) req_o |-> gnt_i |-> gnt_o[idx_o]) + else $fatal (1, "Idx_o / gnt_o do not match."); + + req0 : assert property( + @(posedge clk_i) |req_i |-> req_o) + else $fatal (1, "Req in implies req out."); + + req1 : assert property( + @(posedge clk_i) req_o |-> |req_i) + else $fatal (1, "Req out implies req in."); + `endif + `endif + // pragma translate_on + end + +endmodule : rr_arb_tree diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv new file mode 100644 index 00000000..a7dccc63 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen.sv @@ -0,0 +1,30 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Davide Rossi + +module rstgen ( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + output logic rst_no, + output logic init_no +); + + rstgen_bypass i_rstgen_bypass ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .rst_test_mode_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .rst_no ( rst_no ), + .init_no ( init_no ) + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv new file mode 100644 index 00000000..c51ee835 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/rstgen_bypass.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Florian Zaruba +// Description: This module is a reset synchronizer with a dedicated reset bypass pin for testmode reset. +// Pro Tip: The wise Dr. Schaffner recommends at least 4 registers! + +module rstgen_bypass #( + parameter int unsigned NumRegs = 4 +) ( + input logic clk_i, + input logic rst_ni, + input logic rst_test_mode_ni, + input logic test_mode_i, + output logic rst_no, + output logic init_no +); + + // internal reset + logic rst_n; + + logic [NumRegs-1:0] synch_regs_q; + // bypass mode + always_comb begin + if (test_mode_i == 1'b0) begin + rst_n = rst_ni; + rst_no = synch_regs_q[NumRegs-1]; + init_no = synch_regs_q[NumRegs-1]; + end else begin + rst_n = rst_test_mode_ni; + rst_no = rst_test_mode_ni; + init_no = 1'b1; + end + end + + always @(posedge clk_i or negedge rst_n) begin + if (~rst_n) begin + synch_regs_q <= 0; + end else begin + synch_regs_q <= {synch_regs_q[NumRegs-2:0], 1'b1}; + end + end + // pragma translate_off + `ifndef VERILATOR + initial begin : p_assertions + if (NumRegs < 1) $fatal(1, "At least one register is required."); + end + `endif + // pragma translate_on +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv b/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv new file mode 100644 index 00000000..7193fbcd --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/shift_reg.sv @@ -0,0 +1,53 @@ + +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: +// +// Description: Simple shift register for arbitrary depth and types + +module shift_reg #( + parameter type dtype = logic, + parameter int unsigned Depth = 1 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input dtype d_i, + output dtype d_o +); + + // register of depth 0 is a wire + if (Depth == 0) begin : gen_pass_through + assign d_o = d_i; + // register of depth 1 is a simple register + end else if (Depth == 1) begin : gen_register + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + d_o <= '0; + end else begin + d_o <= d_i; + end + end + // if depth is greater than 1 it becomes a shift register + end else if (Depth > 1) begin : gen_shift_reg + dtype [Depth-1:0] reg_d, reg_q; + assign d_o = reg_q[Depth-1]; + assign reg_d = {reg_q[Depth-2:0], d_i}; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + reg_q <= '0; + end else begin + reg_q <= reg_d; + end + end + end + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv new file mode 100644 index 00000000..80ff37f1 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register.sv @@ -0,0 +1,46 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + + +/// Wrapper around the flushable spill register to maintain back-ward +/// compatibility. +module spill_register #( + parameter type T = logic, + parameter bit Bypass = 1'b0 // make this spill register transparent +) ( + input logic clk_i , + input logic rst_ni , + input logic valid_i , + output logic ready_o , + input T data_i , + output logic valid_o , + input logic ready_i , + output T data_o +); + + spill_register_flushable #( + .T(T), + .Bypass(Bypass) + ) spill_register_flushable_i ( + .clk_i, + .rst_ni, + .valid_i, + .flush_i(1'b0), + .ready_o, + .data_i, + .valid_o, + .ready_i, + .data_o + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv new file mode 100644 index 00000000..c03ad274 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/spill_register_flushable.sv @@ -0,0 +1,105 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki + + +/// A register with handshakes that completely cuts any combinational paths +/// between the input and output. This spill register can be flushed. +module spill_register_flushable #( + parameter type T = logic, + parameter bit Bypass = 1'b0 // make this spill register transparent +) ( + input logic clk_i , + input logic rst_ni , + input logic valid_i , + input logic flush_i , + output logic ready_o , + input T data_i , + output logic valid_o , + input logic ready_i , + output T data_o +); + + if (Bypass) begin : gen_bypass + assign valid_o = valid_i; + assign ready_o = ready_i; + assign data_o = data_i; + end else begin : gen_spill_reg + // The A register. + T a_data_q; + logic a_full_q; + logic a_fill, a_drain; + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_data + if (!rst_ni) + a_data_q <= '0; + else if (a_fill) + a_data_q <= data_i; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_a_full + if (!rst_ni) + a_full_q <= 0; + else if (a_fill || a_drain) + a_full_q <= a_fill; + end + + // The B register. + T b_data_q; + logic b_full_q; + logic b_fill, b_drain; + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_data + if (!rst_ni) + b_data_q <= '0; + else if (b_fill) + b_data_q <= a_data_q; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin : ps_b_full + if (!rst_ni) + b_full_q <= 0; + else if (b_fill || b_drain) + b_full_q <= b_fill; + end + + // Fill the A register when the A or B register is empty. Drain the A register + // whenever it is full and being filled, or if a flush is requested. + assign a_fill = valid_i && ready_o && (!flush_i); + assign a_drain = (a_full_q && !b_full_q) || flush_i; + + // Fill the B register whenever the A register is drained, but the downstream + // circuit is not ready. Drain the B register whenever it is full and the + // downstream circuit is ready, or if a flush is requested. + assign b_fill = a_drain && (!ready_i) && (!flush_i); + assign b_drain = (b_full_q && ready_i) || flush_i; + + // We can accept input as long as register B is not full. + // Note: flush_i and valid_i must not be high at the same time, + // otherwise an invalid handshake may occur + assign ready_o = !a_full_q || !b_full_q; + + // The unit provides output as long as one of the registers is filled. + assign valid_o = a_full_q | b_full_q; + + // We empty the spill register before the slice register. + assign data_o = b_full_q ? b_data_q : a_data_q; + + // pragma translate_off + `ifndef VERILATOR + flush_valid : assert property ( + @(posedge clk_i) disable iff (~rst_ni) (flush_i |-> ~valid_i)) else + $warning("Trying to flush and feed the spill register simultaneously. You will lose data!"); + `endif + // pragma translate_on + end +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv new file mode 100644 index 00000000..c8ca2a87 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter.sv @@ -0,0 +1,49 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready +// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is +// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The +// arbitration scheme is round-robin with "look ahead", see the `rrarbiter` for details. + +module stream_arbiter #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters. + parameter ARBITER = "rr" // "rr" or "prio" +) ( + input logic clk_i, + input logic rst_ni, + + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + stream_arbiter_flushable #( + .DATA_T (DATA_T), + .N_INP (N_INP), + .ARBITER (ARBITER) + ) i_arb ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (1'b0), + .inp_data_i (inp_data_i), + .inp_valid_i (inp_valid_i), + .inp_ready_o (inp_ready_o), + .oup_data_o (oup_data_o), + .oup_valid_o (oup_valid_o), + .oup_ready_i (oup_ready_i) + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv new file mode 100644 index 00000000..32946e68 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_arbiter_flushable.sv @@ -0,0 +1,82 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Stream arbiter: Arbitrates a parametrizable number of input streams (i.e., valid-ready +// handshaking with dependency rules as in AXI4) to a single output stream. Once `oup_valid_o` is +// asserted, `oup_data_o` remains invariant until the output handshake has occurred. The +// arbitration scheme is fair round-robin tree, see `rr_arb_tree` for details. + +module stream_arbiter_flushable #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = -1, // Synopsys DC requires a default value for parameters. + parameter ARBITER = "rr" // "rr" or "prio" +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + if (ARBITER == "rr") begin : gen_rr_arb + rr_arb_tree #( + .NumIn (N_INP), + .DataType (DATA_T), + .ExtPrio (1'b0), + .AxiVldRdy (1'b1), + .LockIn (1'b1) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ('0), + .req_i (inp_valid_i), + .gnt_o (inp_ready_o), + .data_i (inp_data_i), + .gnt_i (oup_ready_i), + .req_o (oup_valid_o), + .data_o (oup_data_o), + .idx_o () + ); + + end else if (ARBITER == "prio") begin : gen_prio_arb + rr_arb_tree #( + .NumIn (N_INP), + .DataType (DATA_T), + .ExtPrio (1'b1), + .AxiVldRdy (1'b1), + .LockIn (1'b1) + ) i_arbiter ( + .clk_i, + .rst_ni, + .flush_i, + .rr_i ('0), + .req_i (inp_valid_i), + .gnt_o (inp_ready_o), + .data_i (inp_data_i), + .gnt_i (oup_ready_i), + .req_o (oup_valid_o), + .data_o (oup_data_o), + .idx_o () + ); + + end else begin : gen_arb_error + // pragma translate_off + $fatal(1, "Invalid value for parameter 'ARBITER'!"); + // pragma translate_on + end + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv new file mode 100644 index 00000000..5051b6c2 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_delay.sv @@ -0,0 +1,132 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Florian Zaruba, zarubaf@iis.ee.ethz.ch +// Description: Delay (or randomize) AXI-like handshaking + +module stream_delay #( + parameter bit StallRandom = 0, + parameter int FixedDelay = 1, + parameter type payload_t = logic +)( + input logic clk_i, + input logic rst_ni, + + input payload_t payload_i, + output logic ready_o, + input logic valid_i, + + output payload_t payload_o, + input logic ready_i, + output logic valid_o +); + + if (FixedDelay == 0 && !StallRandom) begin : gen_pass_through + assign ready_o = ready_i; + assign valid_o = valid_i; + assign payload_o = payload_i; + end else begin : gen_delay + + localparam int unsigned CounterBits = 4; + + typedef enum logic [1:0] { + Idle, Valid, Ready + } state_e; + + state_e state_d, state_q; + + logic load; + logic [3:0] count_out; + logic en; + + logic [CounterBits-1:0] counter_load; + + assign payload_o = payload_i; + + always_comb begin + state_d = state_q; + valid_o = 1'b0; + ready_o = 1'b0; + load = 1'b0; + en = 1'b0; + + unique case (state_q) + Idle: begin + if (valid_i) begin + load = 1'b1; + state_d = Valid; + // Just one cycle delay + if (FixedDelay == 1 || (StallRandom && counter_load == 1)) begin + state_d = Ready; + end + + if (StallRandom && counter_load == 0) begin + valid_o = 1'b1; + ready_o = ready_i; + if (ready_i) state_d = Idle; + else state_d = Ready; + end + end + end + Valid: begin + en = 1'b1; + if (count_out == 0) begin + state_d = Ready; + end + end + + Ready: begin + valid_o = 1'b1; + ready_o = ready_i; + if (ready_i) state_d = Idle; + end + default : /* default */; + endcase + + end + + if (StallRandom) begin : gen_random_stall + lfsr_16bit #( + .WIDTH ( 16 ) + ) i_lfsr_16bit ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( load ), + .refill_way_oh ( ), + .refill_way_bin ( counter_load ) + ); + end else begin : gen_fixed_delay + assign counter_load = FixedDelay; + end + + counter #( + .WIDTH ( CounterBits ) + ) i_counter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( 1'b0 ), + .en_i ( en ), + .load_i ( load ), + .down_i ( 1'b1 ), + .d_i ( counter_load ), + .q_o ( count_out ), + .overflow_o ( ) + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= Idle; + end else begin + state_q <= state_d; + end + end + end + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv new file mode 100644 index 00000000..69ad3099 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_demux.sv @@ -0,0 +1,36 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Connects the input stream (valid-ready) handshake to one of `N_OUP` output stream handshakes. +/// +/// This module has no data ports because stream data does not need to be demultiplexed: the data of +/// the input stream can just be applied at all output streams. +module stream_demux #( + /// Number of connected outputs. + parameter int unsigned N_OUP = 32'd1, + /// Dependent parameters, DO NOT OVERRIDE! + parameter int unsigned LOG_N_OUP = (N_OUP > 32'd1) ? unsigned'($clog2(N_OUP)) : 1'b1 +) ( + input logic inp_valid_i, + output logic inp_ready_o, + + input logic [LOG_N_OUP-1:0] oup_sel_i, + + output logic [N_OUP-1:0] oup_valid_o, + input logic [N_OUP-1:0] oup_ready_i +); + + always_comb begin + oup_valid_o = '0; + oup_valid_o[oup_sel_i] = inp_valid_i; + end + assign inp_ready_o = oup_ready_i[oup_sel_i]; + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv new file mode 100644 index 00000000..34607d91 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_mux.sv @@ -0,0 +1,46 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Stream multiplexer: connects the output to one of `N_INP` data streams with valid-ready +/// handshaking. + +module stream_mux #( + parameter type DATA_T = logic, // Vivado requires a default value for type parameters. + parameter integer N_INP = 0, // Synopsys DC requires a default value for value parameters. + /// Dependent parameters, DO NOT OVERRIDE! + parameter integer LOG_N_INP = $clog2(N_INP) +) ( + input DATA_T [N_INP-1:0] inp_data_i, + input logic [N_INP-1:0] inp_valid_i, + output logic [N_INP-1:0] inp_ready_o, + + input logic [LOG_N_INP-1:0] inp_sel_i, + + output DATA_T oup_data_o, + output logic oup_valid_o, + input logic oup_ready_i +); + + always_comb begin + inp_ready_o = '0; + inp_ready_o[inp_sel_i] = oup_ready_i; + end + assign oup_data_o = inp_data_i[inp_sel_i]; + assign oup_valid_o = inp_valid_i[inp_sel_i]; + +// pragma translate_off +`ifndef VERILATOR + initial begin: p_assertions + assert (N_INP >= 1) else $fatal (1, "The number of inputs must be at least 1!"); + end +`endif +// pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv b/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv new file mode 100644 index 00000000..f529d6a2 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/stream_register.sv @@ -0,0 +1,57 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// Register with a simple stream-like ready/valid handshake. +/// This register does not cut combinatorial paths on all control signals; if you need a complete +/// cut, use the `spill_register`. +module stream_register #( + parameter type T = logic // Vivado requires a default value for type parameters. +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous active-low reset + input logic clr_i, // Synchronous clear + input logic testmode_i, // Test mode to bypass clock gating + // Input port + input logic valid_i, + output logic ready_o, + input T data_i, + // Output port + output logic valid_o, + input logic ready_i, + output T data_o +); + + logic fifo_empty, + fifo_full; + + fifo_v2 #( + .FALL_THROUGH (1'b0), + .DATA_WIDTH ($bits(T)), + .DEPTH (1), + .dtype (T) + ) i_fifo ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (clr_i), + .testmode_i (testmode_i), + .full_o (fifo_full), + .empty_o (fifo_empty), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i (data_i), + .push_i (valid_i & ~fifo_full), + .data_o (data_o), + .pop_i (ready_i & ~fifo_empty) + ); + + assign ready_o = ~fifo_full; + assign valid_o = ~fifo_empty; + +endmodule diff --git a/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv b/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv new file mode 100644 index 00000000..80e73562 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/common_cells/src/unread.sv @@ -0,0 +1,21 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 29.10.2018 +// Description: Dummy circuit to mitigate Open Pin warnings + +/* verilator lint_off UNUSED */ +module unread ( + input logic d_i +); + +endmodule +/* verilator lint_on UNUSED */ diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv new file mode 100644 index 00000000..ac04b9ba --- /dev/null +++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncDpRam.sv @@ -0,0 +1,62 @@ +// Copyright 2022 Thales Research and Technology +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses +// +// Inferable, Asynchronous Dual-Port RAM, there are a write port and a read port +// +// +// This module is designed to work with both Xilinx and Microchip FPGA tools by following the respective +// guidelines: +// - Xilinx UG901 Vivado Design Suite User Guide: Synthesis +// - Inferring Microchip PolarFire RAM Blocks +// +// Intel FPGA (Altera) doesn't seem to support asynchronous RAM +// +// Current Maintainers:: Sébastien Jacq - sjthales on github.com + + +module AsyncDpRam +#( + parameter ADDR_WIDTH = 10, + parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower + parameter DATA_WIDTH = 32 +)( + input logic Clk_CI, + + // Write port + input logic WrEn_SI, + input logic [ADDR_WIDTH-1:0] WrAddr_DI, + input logic [DATA_WIDTH-1:0] WrData_DI, + + // Read port + input logic [ADDR_WIDTH-1:0] RdAddr_DI, + output logic [DATA_WIDTH-1:0] RdData_DO +); + + logic [DATA_WIDTH-1:0] mem [DATA_DEPTH-1:0]= '{default:0}; + + // WRITE + always_ff @(posedge Clk_CI) + begin + if (WrEn_SI) begin + mem[WrAddr_DI] <= WrData_DI; + end + end + + // READ + assign RdData_DO = mem[RdAddr_DI]; + + //////////////////////////// + // assertions + //////////////////////////// + + // pragma translate_off + assert property + (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH))) + else $error("depth out of bounds"); + // pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv new file mode 100644 index 00000000..ee6fd1a0 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv @@ -0,0 +1,66 @@ +// Copyright 2023 Thales Research and Technology +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses +// +// Inferable, Asynchronous Three-Ports RAM, there are a write port and two read ports +// +// +// This module is designed to work with both Xilinx and Microchip FPGA tools by following the respective +// guidelines: +// - Xilinx UG901 Vivado Design Suite User Guide: Synthesis +// - Inferring Microchip PolarFire RAM Blocks +// +// Intel FPGA (Altera) doesn't seem to support asynchronous RAM +// +// Current Maintainers:: Sébastien Jacq - sjthales on github.com + + +module AsyncThreePortRam +#( + parameter ADDR_WIDTH = 10, + parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower + parameter DATA_WIDTH = 32 +)( + input logic Clk_CI, + + // Write port + input logic WrEn_SI, + input logic [ADDR_WIDTH-1:0] WrAddr_DI, + input logic [DATA_WIDTH-1:0] WrData_DI, + + // Read ports + input logic [ADDR_WIDTH-1:0] RdAddr_DI_0, + input logic [ADDR_WIDTH-1:0] RdAddr_DI_1, + + output logic [DATA_WIDTH-1:0] RdData_DO_0, + output logic [DATA_WIDTH-1:0] RdData_DO_1 +); + + logic [DATA_WIDTH-1:0] mem [DATA_DEPTH-1:0]= '{default:0}; + + // WRITE + always_ff @(posedge Clk_CI) + begin + if (WrEn_SI) begin + mem[WrAddr_DI] <= WrData_DI; + end + end + + // READ + assign RdData_DO_0 = mem[RdAddr_DI_0]; + assign RdData_DO_1 = mem[RdAddr_DI_1]; + + //////////////////////////// + // assertions + //////////////////////////// + + // pragma translate_off + assert property + (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH))) + else $error("depth out of bounds"); + // pragma translate_on + +endmodule diff --git a/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv b/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv new file mode 100644 index 00000000..e3efb12d --- /dev/null +++ b/test/type_param/vendor/pulp-platform/fpga-support/rtl/SyncDpRam.sv @@ -0,0 +1,182 @@ +// Copyright 2014 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/** + * Inferable, Synchronous Dual-Port RAM + * + * This module is designed to work with both Xilinx and Altera tools by following the respective + * guidelines: + * - Xilinx UG901 Vivado Design Suite User Guide: Synthesis (p. 106) + * - Altera Quartus II Handbook Volume 1: Design and Synthesis (p. 768) + * + * Current Maintainers: + * - Michael Schaffner + */ + +// this automatically switches the behavioral description +// pragma translate_off +`define SIMULATION +// pragma translate_on + +module SyncDpRam +#( + parameter ADDR_WIDTH = 10, + parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower + parameter DATA_WIDTH = 32, + parameter OUT_REGS = 0, + parameter SIM_INIT = 0 // for simulation only, will not be synthesized + // 0: no init, 1: zero init, 2: random init + // note: on verilator, 2 is not supported. define the VERILATOR macro to work around. +)( + input logic Clk_CI, + input logic Rst_RBI, + // port A + input logic CSelA_SI, + input logic WrEnA_SI, + input logic [DATA_WIDTH-1:0] WrDataA_DI, + input logic [ADDR_WIDTH-1:0] AddrA_DI, + output logic [DATA_WIDTH-1:0] RdDataA_DO, + // port B + input logic CSelB_SI, + input logic WrEnB_SI, + input logic [DATA_WIDTH-1:0] WrDataB_DI, + input logic [ADDR_WIDTH-1:0] AddrB_DI, + output logic [DATA_WIDTH-1:0] RdDataB_DO +); + + //////////////////////////// + // signals, localparams + //////////////////////////// + + logic [DATA_WIDTH-1:0] RdDataA_DN; + logic [DATA_WIDTH-1:0] RdDataA_DP; + logic [DATA_WIDTH-1:0] RdDataB_DN; + logic [DATA_WIDTH-1:0] RdDataB_DP; + logic [DATA_WIDTH-1:0] Mem_DP [DATA_DEPTH-1:0]; + + //////////////////////////// + // XILINX/ALTERA implementation + //////////////////////////// + + `ifdef SIMULATION + always_ff @(posedge Clk_CI) + begin + automatic logic [DATA_WIDTH-1:0] val; + if(Rst_RBI == 1'b0 && SIM_INIT>0) begin + for(int k=0; k0) begin : g_outreg + always_ff @(posedge Clk_CI or negedge Rst_RBI) begin + if(Rst_RBI == 1'b0) + begin + RdDataA_DP <= 0; + RdDataB_DP <= 0; + end + else + begin + RdDataA_DP <= RdDataA_DN; + RdDataB_DP <= RdDataB_DN; + end + end + end + endgenerate // g_outreg + + // output reg bypass + generate + if (OUT_REGS==0) begin : g_oureg_byp + assign RdDataA_DP = RdDataA_DN; + assign RdDataB_DP = RdDataB_DN; + end + endgenerate// g_oureg_byp + + assign RdDataA_DO = RdDataA_DP; + assign RdDataB_DO = RdDataB_DP; + + //////////////////////////// + // assertions + //////////////////////////// + + // pragma translate_off + assert property + (@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH))) + else $error("depth out of bounds"); + assert property + (@(posedge Clk_CI) (CSelA_SI & CSelB_SI & WrEnA_SI & WrEnB_SI) |-> (AddrA_DI != AddrB_DI)) + else $error("A and B write to the same address"); + // pragma translate_on + +endmodule // SyncDpRam diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv new file mode 100644 index 00000000..bc7ed5c7 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/cluster_clk_cells.sv @@ -0,0 +1,94 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module cluster_clock_and2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + tc_clk_and2 i_tc_clk_and2 ( + .clk0_i, + .clk1_i, + .clk_o + ); + +endmodule + +module cluster_clock_buffer ( + input logic clk_i, + output logic clk_o +); + + tc_clk_buffer i_tc_clk_buffer ( + .clk_i, + .clk_o + ); + +endmodule + +// Description: Behavioral model of an integrated clock-gating cell (ICG) +module cluster_clock_gating ( + input logic clk_i, + input logic en_i, + input logic test_en_i, + output logic clk_o +); + + tc_clk_gating i_tc_clk_gating ( + .clk_i, + .en_i, + .test_en_i, + .clk_o + ); + +endmodule + +module cluster_clock_inverter ( + input logic clk_i, + output logic clk_o +); + + tc_clk_inverter i_tc_clk_inverter ( + .clk_i, + .clk_o + ); + +endmodule + +module cluster_clock_mux2 ( + input logic clk0_i, + input logic clk1_i, + input logic clk_sel_i, + output logic clk_o +); + + tc_clk_mux2 i_tc_clk_mux2 ( + .clk0_i, + .clk1_i, + .clk_sel_i, + .clk_o + ); + +endmodule + +module cluster_clock_xor2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + tc_clk_xor2 i_tc_clk_xor2 ( + .clk0_i, + .clk1_i, + .clk_o + ); + +endmodule diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv new file mode 100644 index 00000000..53ad07f0 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/deprecated/pulp_clk_cells.sv @@ -0,0 +1,107 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module pulp_clock_and2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + tc_clk_and2 i_tc_clk_and2 ( + .clk0_i, + .clk1_i, + .clk_o + ); + +endmodule + +module pulp_clock_buffer ( + input logic clk_i, + output logic clk_o +); + + tc_clk_buffer i_tc_clk_buffer ( + .clk_i, + .clk_o + ); + +endmodule + +// Description: Behavioral model of an integrated clock-gating cell (ICG) +module pulp_clock_gating ( + input logic clk_i, + input logic en_i, + input logic test_en_i, + output logic clk_o +); + + tc_clk_gating i_tc_clk_gating ( + .clk_i, + .en_i, + .test_en_i, + .clk_o + ); + +endmodule + +module pulp_clock_inverter ( + input logic clk_i, + output logic clk_o +); + + tc_clk_inverter i_tc_clk_inverter ( + .clk_i, + .clk_o + ); + +endmodule + +module pulp_clock_mux2 ( + input logic clk0_i, + input logic clk1_i, + input logic clk_sel_i, + output logic clk_o +); + + tc_clk_mux2 i_tc_clk_mux2 ( + .clk0_i, + .clk1_i, + .clk_sel_i, + .clk_o + ); + +endmodule + +module pulp_clock_xor2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + tc_clk_xor2 i_tc_clk_xor2 ( + .clk0_i, + .clk1_i, + .clk_o + ); + +endmodule + +`ifndef SYNTHESIS +module pulp_clock_delay( + input logic in_i, + output logic out_o +); + + assign #(300ps) out_o = in_i; + +endmodule +`endif + + diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv new file mode 100644 index 00000000..3ab329e4 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_clk.sv @@ -0,0 +1,120 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module tc_clk_and2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + assign clk_o = clk0_i & clk1_i; + +endmodule + +module tc_clk_buffer ( + input logic clk_i, + output logic clk_o +); + + assign clk_o = clk_i; + +endmodule + +// Description: Behavioral model of an integrated clock-gating cell (ICG) +module tc_clk_gating #( + /// This paramaeter is a hint for tool/technology specific mappings of this + /// tech_cell. It indicates wether this particular clk gate instance is + /// required for functional correctness or just instantiated for power + /// savings. If IS_FUNCTIONAL == 0, technology specific mappings might + /// replace this cell with a feedthrough connection without any gating. + parameter bit IS_FUNCTIONAL = 1'b1 +)( + input logic clk_i, + input logic en_i, + input logic test_en_i, + output logic clk_o +); + + logic clk_en; + + always_latch begin + if (clk_i == 1'b0) clk_en <= en_i | test_en_i; + end + + assign clk_o = clk_i & clk_en; + +endmodule + +module tc_clk_inverter ( + input logic clk_i, + output logic clk_o +); + + assign clk_o = ~clk_i; + +endmodule + +// Warning: Typical clock mux cells of a technologies std cell library ARE NOT +// GLITCH FREE!! The only difference to a regular multiplexer cell is that they +// feature balanced rise- and fall-times. In other words: SWITCHING FROM ONE +// CLOCK TO THE OTHER CAN INTRODUCE GLITCHES. ALSO, GLITCHES ON THE SELECT LINE +// DIRECTLY TRANSLATE TO GLITCHES ON THE OUTPUT CLOCK!! This cell is only +// intended to be used for quasi-static switching between clocks when one of the +// clocks is anyway inactive or if the downstream logic remains gated or in +// reset state during the transition phase. If you need dynamic switching +// between arbitrary input clocks without introducing glitches, have a look at +// the clk_mux_glitch_free cell in the pulp-platform/common_cells repository. +module tc_clk_mux2 ( + input logic clk0_i, + input logic clk1_i, + input logic clk_sel_i, + output logic clk_o +); + + assign clk_o = (clk_sel_i) ? clk1_i : clk0_i; + +endmodule + +module tc_clk_xor2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + assign clk_o = clk0_i ^ clk1_i; + +endmodule + +module tc_clk_or2 ( + input logic clk0_i, + input logic clk1_i, + output logic clk_o +); + + assign clk_o = clk0_i | clk1_i; + +endmodule + +`ifndef SYNTHESIS +module tc_clk_delay #( + parameter int unsigned Delay = 300ps +) ( + input logic in_i, + output logic out_o +); + +// pragma translate_off +`ifndef VERILATOR + assign #(Delay) out_o = in_i; +`endif +// pragma translate_on + +endmodule +`endif diff --git a/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv new file mode 100644 index 00000000..b702a116 --- /dev/null +++ b/test/type_param/vendor/pulp-platform/tech_cells_generic/src/rtl/tc_sram.sv @@ -0,0 +1,245 @@ +// Copyright (c) 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +// Description: Functional module of a generic SRAM +// +// Parameters: +// - NumWords: Number of words in the macro. Address width can be calculated with: +// `AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1` +// The module issues a warning if there is a request on an address which is +// not in range. +// - DataWidth: Width of the ports `wdata_i` and `rdata_o`. +// - ByteWidth: Width of a byte, the byte enable signal `be_i` can be calculated with the +// ceiling division `ceil(DataWidth, ByteWidth)`. +// - NumPorts: Number of read and write ports. Each is a full port. Ports with a higher +// index read and write after the ones with lower indices. +// - Latency: Read latency, the read data is available this many cycles after a request. +// - SimInit: Macro simulation initialization. Values are: +// "zeros": Each bit gets initialized with 1'b0. +// "ones": Each bit gets initialized with 1'b1. +// "random": Each bit gets random initialized with 1'b0 or 1'b1. +// "none": Each bit gets initialized with 1'bx. (default) +// - PrintSimCfg: Prints at the beginning of the simulation a `Hello` message with +// the instantiated parameters and signal widths. +// - ImplKey: Key by which an instance can refer to a specific implementation (e.g. macro). +// May be used to look up additional parameters for implementation (e.g. generator, +// line width, muxing) in an external reference, such as a configuration file. +// +// Ports: +// - `clk_i`: Clock +// - `rst_ni`: Asynchronous reset, active low +// - `req_i`: Request, active high +// - `we_i`: Write request, active high +// - `addr_i`: Request address +// - `wdata_i`: Write data, has to be valid on request +// - `be_i`: Byte enable, active high +// - `rdata_o`: Read data, valid `Latency` cycles after a request with `we_i` low. +// +// Behaviour: +// - Address collision: When Ports are making a write access onto the same address, +// the write operation will start at the port with the lowest address +// index, each port will overwrite the changes made by the previous ports +// according how the respective `be_i` signal is set. +// - Read data on write: This implementation will not produce a read data output on the signal +// `rdata_o` when `req_i` and `we_i` are asserted. The output data is stable +// on write requests. + +module tc_sram #( + parameter int unsigned NumWords = 32'd1024, // Number of Words in data array + parameter int unsigned DataWidth = 32'd128, // Data signal width + parameter int unsigned ByteWidth = 32'd8, // Width of a data byte + parameter int unsigned NumPorts = 32'd2, // Number of read and write ports + parameter int unsigned Latency = 32'd1, // Latency when the read data is available + parameter SimInit = "none", // Simulation initialization + parameter bit PrintSimCfg = 1'b0, // Print configuration + parameter ImplKey = "none", // Reference to specific implementation + // DEPENDENT PARAMETERS, DO NOT OVERWRITE! + parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1, + parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div + parameter type addr_t = logic [AddrWidth-1:0], + parameter type data_t = logic [DataWidth-1:0], + parameter type be_t = logic [BeWidth-1:0] +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // input ports + input logic [NumPorts-1:0] req_i, // request + input logic [NumPorts-1:0] we_i, // write enable + input addr_t [NumPorts-1:0] addr_i, // request address + input data_t [NumPorts-1:0] wdata_i, // write data + input be_t [NumPorts-1:0] be_i, // write byte enable + // output ports + output data_t [NumPorts-1:0] rdata_o // read data +); + + // memory array + data_t sram [NumWords-1:0]; + // hold the read address when no read access is made + addr_t [NumPorts-1:0] r_addr_q; + + // SRAM simulation initialization + data_t init_val[NumWords-1:0]; + initial begin : proc_sram_init + for (int unsigned i = 0; i < NumWords; i++) begin + case (SimInit) + "zeros": init_val[i] = {DataWidth{1'b0}}; + "ones": init_val[i] = {DataWidth{1'b1}}; + "random": init_val[i] = {DataWidth{$urandom()}}; + default: init_val[i] = {DataWidth{1'bx}}; + endcase + end + end + + // set the read output if requested + // The read data at the highest array index is set combinational. + // It gets then delayed for a number of cycles until it gets available at the output at + // array index 0. + + // read data output assignment + data_t [NumPorts-1:0][Latency-1:0] rdata_q, rdata_d; + if (Latency == 32'd0) begin : gen_no_read_lat + for (genvar i = 0; i < NumPorts; i++) begin : gen_port + assign rdata_o[i] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]]; + end + end else begin : gen_read_lat + + always_comb begin + for (int unsigned i = 0; i < NumPorts; i++) begin + rdata_o[i] = rdata_q[i][0]; + for (int unsigned j = 0; j < (Latency-1); j++) begin + rdata_d[i][j] = rdata_q[i][j+1]; + end + rdata_d[i][Latency-1] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]]; + end + end + end + + // In case simulation initialization is disabled (SimInit == 'none'), don't assign to the sram + // content at all. This improves simulation performance in tools like verilator + if (SimInit == "none") begin + // write memory array without initialization + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + for (int i = 0; i < NumPorts; i++) begin + r_addr_q[i] <= {AddrWidth{1'b0}}; + end + end else begin + // read value latch happens before new data is written to the sram + for (int unsigned i = 0; i < NumPorts; i++) begin + if (Latency != 0) begin + for (int unsigned j = 0; j < Latency; j++) begin + rdata_q[i][j] <= rdata_d[i][j]; + end + end + end + // there is a request for the SRAM, latch the required register + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req_i[i]) begin + if (we_i[i]) begin + // update value when write is set at clock + for (int unsigned j = 0; j < BeWidth; j++) begin + if (be_i[i][j]) begin + sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth]; + end + end + end else begin + // otherwise update read address for subsequent non request cycles + r_addr_q[i] <= addr_i[i]; + end + end // if req_i + end // for ports + end // if !rst_ni + end + end else begin + // write memory array + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + sram <= init_val; + for (int i = 0; i < NumPorts; i++) begin + r_addr_q[i] <= {AddrWidth{1'b0}}; + // initialize the read output register for each port + if (Latency != 32'd0) begin + for (int unsigned j = 0; j < Latency; j++) begin + rdata_q[i][j] <= init_val[{AddrWidth{1'b0}}]; + end + end + end + end else begin + // read value latch happens before new data is written to the sram + for (int unsigned i = 0; i < NumPorts; i++) begin + if (Latency != 0) begin + for (int unsigned j = 0; j < Latency; j++) begin + rdata_q[i][j] <= rdata_d[i][j]; + end + end + end + // there is a request for the SRAM, latch the required register + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req_i[i]) begin + if (we_i[i]) begin + // update value when write is set at clock + for (int unsigned j = 0; j < BeWidth; j++) begin + if (be_i[i][j]) begin + sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth]; + end + end + end else begin + // otherwise update read address for subsequent non request cycles + r_addr_q[i] <= addr_i[i]; + end + end // if req_i + end // for ports + end // if !rst_ni + end + end + +// Validate parameters. +// pragma translate_off +`ifndef VERILATOR +`ifndef TARGET_SYNTHESIS + initial begin: p_assertions + assert ($bits(addr_i) == NumPorts * AddrWidth) else $fatal(1, "AddrWidth problem on `addr_i`"); + assert ($bits(wdata_i) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `wdata_i`"); + assert ($bits(be_i) == NumPorts * BeWidth) else $fatal(1, "BeWidth problem on `be_i`" ); + assert ($bits(rdata_o) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `rdata_o`"); + assert (NumWords >= 32'd1) else $fatal(1, "NumWords has to be > 0"); + assert (DataWidth >= 32'd1) else $fatal(1, "DataWidth has to be > 0"); + assert (ByteWidth >= 32'd1) else $fatal(1, "ByteWidth has to be > 0"); + assert (NumPorts >= 32'd1) else $fatal(1, "The number of ports must be at least 1!"); + end + initial begin: p_sim_hello + if (PrintSimCfg) begin + $display("#################################################################################"); + $display("tc_sram functional instantiated with the configuration:" ); + $display("Instance: %m" ); + $display("Number of ports (dec): %0d", NumPorts ); + $display("Number of words (dec): %0d", NumWords ); + $display("Address width (dec): %0d", AddrWidth ); + $display("Data width (dec): %0d", DataWidth ); + $display("Byte width (dec): %0d", ByteWidth ); + $display("Byte enable width (dec): %0d", BeWidth ); + $display("Latency Cycles (dec): %0d", Latency ); + $display("Simulation init (str): %0s", SimInit ); + $display("#################################################################################"); + end + end + for (genvar i = 0; i < NumPorts; i++) begin : gen_assertions + assert property ( @(posedge clk_i) disable iff (!rst_ni) + (req_i[i] |-> (addr_i[i] < NumWords))) else + $warning("Request address %0h not mapped, port %0d, expect random write or read behavior!", + addr_i[i], i); + end + +`endif +`endif +// pragma translate_on +endmodule