Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 93 additions & 4 deletions src/mem/DRAMInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,18 @@ class DRAMInterface(MemInterface):
# timing behaviour and constraints - all in nanoseconds

# the amount of time in nanoseconds from issuing an activate command
# to the data being available in the row buffer for a read/write
tRCD = Param.Latency("RAS to CAS delay")
# to the data being available in the row buffer for a read
tRCD = Param.Latency("RAS to Read CAS delay")

# the time from issuing a read/write command to seeing the actual data
tCL = Param.Latency("CAS latency")
# the amount of time in nanoseconds from issuing an activate command
# to the data being available in the row buffer for a write
tRCD_WR = Param.Latency(Self.tRCD, "RAS to Write CAS delay")

# the time from issuing a read command to seeing the actual data
tCL = Param.Latency("Read CAS latency")

# the time from issuing a write command to seeing the actual data
tCWL = Param.Latency(Self.tCL, "Write CAS latency")

# minimum time between a precharge and subsequent activate
tRP = Param.Latency("Row precharge time")
Expand Down Expand Up @@ -1145,6 +1152,88 @@ class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
# self refresh exit time
tXS = '65ns'

# A single HBM2 x64 interface (tested with HBMCtrl in gem5)
# to be used as a single pseudo channel. The timings are based
# on HBM gen2 specifications. 4H stack, 8Gb per die and total capacity
# of 4GiB.

class HBM_2000_4H_1x64(DRAMInterface):

# 64-bit interface for a single pseudo channel
device_bus_width = 64

# HBM2 supports BL4
burst_length = 4

# size of channel in bytes, 4H stack of 8Gb dies is 4GiB per stack;
# with 16 pseudo channels, 256MiB per pseudo channel
device_size = "256MiB"

device_rowbuffer_size = "1KiB"

# 1x128 configuration
devices_per_rank = 1

ranks_per_channel = 1

banks_per_rank = 16
bank_groups_per_rank = 4

# 1000 MHz for 2Gbps DDR data rate
tCK = "1ns"

tRP = "14ns"

tCCD_L = "3ns"

tRCD = "12ns"
tRCD_WR = "6ns"
tCL = "18ns"
tCWL = "7ns"
tRAS = "28ns"

# BL4 in pseudo channel mode
# DDR @ 1000 MHz means 4 * 1ns / 2 = 2ns
tBURST = "2ns"

# value for 2Gb device from JEDEC spec
tRFC = "220ns"

# value for 2Gb device from JEDEC spec
tREFI = "3.9us"

tWR = "14ns"
tRTP = "5ns"
tWTR = "4ns"
tWTR_L = "9ns"
tRTW = "18ns"

#tAAD from RBus
tAAD = "1ns"

# single rank device, set to 0
tCS = "0ns"

tRRD = "4ns"
tRRD_L = "6ns"

# for a single pseudo channel
tXAW = "16ns"
activation_limit = 4

# 4tCK
tXP = "8ns"

# start with tRFC + tXP -> 160ns + 8ns = 168ns
tXS = "216ns"

page_policy = 'close_adaptive'

read_buffer_size = 64
write_buffer_size = 64

two_cycle_activate = True

# A single LPDDR5 x16 interface (one command/address bus)
# for a single x16 channel with default timings based on
# initial JEDEC specification
Expand Down
50 changes: 50 additions & 0 deletions src/mem/HBMCtrl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (c) 2022 The Regents of the University of California
# All Rights Reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from m5.params import *
from m5.proxy import *
from m5.objects.SimpleMemCtrl import *
from m5.objects.DRAMInterface import HBM_2000_4H_1x64

# HBMCtrl manages two pseudo channels of HBM2

class HBMCtrl(SimpleMemCtrl):
type = 'HBMCtrl'
cxx_header = "mem/hbm_ctrl.hh"
cxx_class = 'gem5::memory::HBMCtrl'

# HBMCtrl uses the SimpleMemCtlr's interface
# `dram` as the first pseudo channel, the second
# pseudo channel interface is following
# HBMCtrl has been tested with two HBM_2000_4H_1x64 interfaces
dram_2 = Param.DRAMInterface(HBM_2000_4H_1x64(), "Memory interface")

# For mixed traffic, HBMCtrl with HBM_2000_4H_1x64 interfaaces
# gives the best results with following min_r/w_per_switch
min_reads_per_switch = 64
min_writes_per_switch = 64

partitioned_q = Param.Bool(True, "split queues for pseudo channels")
2 changes: 2 additions & 0 deletions src/mem/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ DebugFlag('SysBridge')
SimObject('SimpleMemCtrl.py', sim_objects=['SimpleMemCtrl'],
enums=['MemSched'])
SimObject('MemCtrl.py', sim_objects=['MemCtrl'])
SimObject('HBMCtrl.py', sim_objects=['HBMCtrl'])
SimObject('MemInterface.py', sim_objects=['MemInterface'], enums=['AddrMap'])
SimObject('DRAMInterface.py', sim_objects=['DRAMInterface'],
enums=['PageManage'])
Expand All @@ -77,6 +78,7 @@ Source('external_master.cc')
Source('external_slave.cc')
Source('simple_mem_ctrl.cc')
Source('mem_ctrl.cc')
Source('hbm_ctrl.cc')
Source('mem_interface.cc')
Source('dram_interface.cc')
Source('nvm_interface.cc')
Expand Down
4 changes: 4 additions & 0 deletions src/mem/SimpleMemCtrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ class SimpleMemCtrl(QoSMemCtrl):
min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before "
"switching to reads")

# minimum read bursts to schedule before switching back to writes
min_reads_per_switch = Param.Unsigned(16, "Minimum read bursts before "
"switching to writes")

# scheduler, address map and page policy
mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")

Expand Down
66 changes: 43 additions & 23 deletions src/mem/dram_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
MemPacket* pkt = *i;

// select optimal DRAM packet in Q
if (pkt->isDram()) {
if (pkt->isDram() && (pkt->pseudoChannel == pseudoChannel)) {
const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
bank.wrAllowedAt;
Expand Down Expand Up @@ -183,7 +183,7 @@ DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
if (twoCycleActivate)
act_at = ctrl->verifyMultiCmd(act_tick, maxCommandsPerWindow, tAAD);
else
act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow);
act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow, true);

DPRINTF(DRAM, "Activate at tick %d\n", act_at);

Expand Down Expand Up @@ -214,8 +214,9 @@ DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
bank_ref.preAllowedAt = act_at + tRAS;

// Respect the row-to-column command delay for both read and write cmds
bank_ref.rdAllowedAt = std::max(act_at + tRCD, bank_ref.rdAllowedAt);
bank_ref.wrAllowedAt = std::max(act_at + tRCD, bank_ref.wrAllowedAt);
bank_ref.rdAllowedAt = std::max(act_at + tRCD_RD, bank_ref.rdAllowedAt);
bank_ref.wrAllowedAt = std::max(act_at + tRCD_WR, bank_ref.wrAllowedAt);


// start by enforcing tRRD
for (int i = 0; i < banksPerRank; i++) {
Expand Down Expand Up @@ -301,7 +302,7 @@ DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick,
// Issuing an explicit PRE command
// Verify that we have command bandwidth to issue the precharge
// if not, shift to next burst window
pre_at = ctrl->verifySingleCmd(pre_tick, maxCommandsPerWindow);
pre_at = ctrl->verifySingleCmd(pre_tick, maxCommandsPerWindow, true);
// enforce tPPD
for (int i = 0; i < banksPerRank; i++) {
rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
Expand Down Expand Up @@ -399,10 +400,11 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,

// verify that we have command bandwidth to issue the burst
// if not, shift to next burst window
if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay))
Tick max_sync = clkResyncDelay + (mem_pkt->isRead() ? tRL : tWL);
if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > max_sync))
cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
else
cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow, false);

// if we are interleaving bursts, ensure that
// 1) we don't double interleave on next burst issue
Expand All @@ -423,7 +425,11 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at);

// update the packet ready time
mem_pkt->readyTime = cmd_at + tCL + tBURST;
if (mem_pkt->isRead()) {
mem_pkt->readyTime = cmd_at + tRL + tBURST;
} else {
mem_pkt->readyTime = cmd_at + tWL + tBURST;
}

rank_ref.lastBurstTick = cmd_at;

Expand Down Expand Up @@ -513,6 +519,13 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
// 3) make sure we are not considering the packet that we are
// currently dealing with
while (!got_more_hits && p != queue[i].end()) {

if ((*p)->pseudoChannel != pseudoChannel) {
// only consider if this pkt belongs to this interface
++p;
continue;
}

if (mem_pkt != (*p)) {
bool same_rank_bank = (mem_pkt->rank == (*p)->rank) &&
(mem_pkt->bank == (*p)->bank);
Expand Down Expand Up @@ -625,19 +638,21 @@ DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p)
: MemInterface(_p),
bankGroupsPerRank(_p.bank_groups_per_rank),
bankGroupArch(_p.bank_groups_per_rank > 0),
tCL(_p.tCL),
tRL(_p.tCL),
tWL(_p.tCWL),
tBURST_MIN(_p.tBURST_MIN), tBURST_MAX(_p.tBURST_MAX),
tCCD_L_WR(_p.tCCD_L_WR), tCCD_L(_p.tCCD_L), tRCD(_p.tRCD),
tCCD_L_WR(_p.tCCD_L_WR), tCCD_L(_p.tCCD_L),
tRCD_RD(_p.tRCD), tRCD_WR(_p.tRCD_WR),
tRP(_p.tRP), tRAS(_p.tRAS), tWR(_p.tWR), tRTP(_p.tRTP),
tRFC(_p.tRFC), tREFI(_p.tREFI), tRRD(_p.tRRD), tRRD_L(_p.tRRD_L),
tPPD(_p.tPPD), tAAD(_p.tAAD),
tXAW(_p.tXAW), tXP(_p.tXP), tXS(_p.tXS),
clkResyncDelay(tCL + _p.tBURST_MAX),
clkResyncDelay(_p.tBURST_MAX),
dataClockSync(_p.data_clock_sync),
burstInterleave(tBURST != tBURST_MIN),
twoCycleActivate(_p.two_cycle_activate),
activationLimit(_p.activation_limit),
wrToRdDlySameBG(tCL + _p.tBURST_MAX + _p.tWTR_L),
wrToRdDlySameBG(tWL + _p.tBURST_MAX + _p.tWTR_L),
rdToWrDlySameBG(_p.tRTW + _p.tBURST_MAX),
pageMgmt(_p.page_policy),
maxAccessesPerRow(_p.max_accesses_per_row),
Expand Down Expand Up @@ -819,7 +834,7 @@ DRAMInterface::isBusy(bool read_queue_empty, bool all_writes_nvm)

MemPacket*
DRAMInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr,
unsigned size, bool is_read)
unsigned size, bool is_read, uint8_t pseudo_channel)
{
// decode the address based on the address mapping scheme, with
// Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and
Expand Down Expand Up @@ -899,8 +914,8 @@ DRAMInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr,
// later
uint16_t bank_id = banksPerRank * rank + bank;

return new MemPacket(pkt, is_read, true, rank, bank, row, bank_id,
pkt_addr, size);
return new MemPacket(pkt, is_read, true, pseudo_channel, rank, bank, row,
bank_id, pkt_addr, size);
}

void DRAMInterface::setupRank(const uint8_t rank, const bool is_read)
Expand Down Expand Up @@ -1017,10 +1032,6 @@ DRAMInterface::minBankPrep(const MemPacketQueue& queue,
Tick min_act_at = MaxTick;
std::vector<uint32_t> bank_mask(ranksPerChannel, 0);

// latest Tick for which ACT can occur without incurring additoinal
// delay on the data bus
const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick());

// Flag condition when burst can issue back-to-back with previous burst
bool found_seamless_bank = false;

Expand All @@ -1032,6 +1043,9 @@ DRAMInterface::minBankPrep(const MemPacketQueue& queue,
// bank in question
std::vector<bool> got_waiting(ranksPerChannel * banksPerRank, false);
for (const auto& p : queue) {
if (p->pseudoChannel != pseudoChannel)
continue;

if (p->isDram() && ranks[p->rank]->inRefIdleState())
got_waiting[p->bankId] = true;
}
Expand All @@ -1054,6 +1068,13 @@ DRAMInterface::minBankPrep(const MemPacketQueue& queue,
std::max(ranks[i]->banks[j].actAllowedAt, curTick()) :
std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP;

// latest Tick for which ACT can occur without
// incurring additoinal delay on the data bus
const Tick tRCD = ctrl->inReadBusState(false) ?
tRCD_RD : tRCD_WR;
const Tick hidden_act_max =
std::max(min_col_at - tRCD, curTick());

// When is the earliest the R/W burst can issue?
const Tick col_allowed_at = ctrl->inReadBusState(false) ?
ranks[i]->banks[j].rdAllowedAt :
Expand Down Expand Up @@ -1288,11 +1309,10 @@ DRAMInterface::Rank::processRefreshEvent()
// if a request is at the moment being handled and this request is
// accessing the current rank then wait for it to finish
if ((rank == dram.activeRank)
&& (dram.ctrl->requestEventScheduled())) {
&& (dram.ctrl->requestEventScheduled(dram.pseudoChannel))) {
// hand control over to the request loop until it is
// evaluated next
DPRINTF(DRAM, "Refresh awaiting draining\n");

return;
} else {
refreshState = REF_PD_EXIT;
Expand Down Expand Up @@ -1649,10 +1669,10 @@ DRAMInterface::Rank::processPowerEvent()
}

// completed refresh event, ensure next request is scheduled
if (!dram.ctrl->requestEventScheduled()) {
if (!(dram.ctrl->requestEventScheduled(dram.pseudoChannel))) {
DPRINTF(DRAM, "Scheduling next request after refreshing"
" rank %d\n", rank);
dram.ctrl->restartScheduler(curTick());
dram.ctrl->restartScheduler(curTick(), dram.pseudoChannel);
}
}

Expand Down
Loading