Skip to content

Commit dbb6384

Browse files
committed
Add HMT execution model stub
1 parent ec94709 commit dbb6384

File tree

7 files changed

+73
-15
lines changed

7 files changed

+73
-15
lines changed

PIMbench/vec-add/PIM/vec-add.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// This file is licensed under the MIT License.
44
// See the LICENSE file in the root of this repository for more details.
55

6+
67
#include <iostream>
78
#include <vector>
89
#include <getopt.h>

libpimeval/src/pimCmd.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,8 @@ pimCmdFunc2::sanityCheck() const
716716
bool
717717
pimCmdFunc2::computeRegion(unsigned index)
718718
{
719+
//HMT
720+
//This function will compute region $index for each src, region can be row?
719721
const pimObjInfo& objSrc1 = m_device->getResMgr()->getObjInfo(m_src1);
720722
const pimObjInfo& objSrc2 = m_device->getResMgr()->getObjInfo(m_src2);
721723
pimObjInfo& objDest = m_device->getResMgr()->getObjInfo(m_dest);

libpimeval/src/pimPerfEnergyBankLevel.cpp

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ pimPerfEnergyBankLevel::getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjIn
178178
unsigned numPass = obj.getMaxNumRegionsPerCore();
179179
unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL);
180180
unsigned numCoresUsed = obj.isLoadBalanced() ? obj.getNumCoreAvailable() : obj.getNumCoresUsed();
181+
double m_ttrans = HMT_model.get_m_ttrans();
182+
double m_etrans = HMT_model.get_m_etrans();
181183

182184
unsigned maxElementsPerRegion = obj.getMaxElementsPerRegion();
183185
double numberOfOperationPerElement = ((double)bitsPerElement / m_blimpCoreBitWidth);
@@ -196,12 +198,12 @@ pimPerfEnergyBankLevel::getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjIn
196198
case PimCmdEnum::MUL:
197199
case PimCmdEnum::DIV:
198200
{
199-
msRead = ((2 * (m_tACT + m_tPRE)) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((2 * (activateMS + m_tPRE)) + (minGDLItr * m_tGDL));
200-
msWrite = ((m_tACT + m_tPRE) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
201+
msRead = ((2 * (m_tACT + m_tPRE + m_ttrans)) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((2 * (activateMS + m_tPRE)) + (minGDLItr * m_tGDL));
202+
msWrite = ((m_tACT + m_tPRE + m_ttrans) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
201203
msCompute = (maxElementsPerRegion * m_blimpLatency * numberOfOperationPerElement * (numPass - 1)) + (minElementPerRegion * m_blimpLatency * numberOfOperationPerElement);
202204
msRuntime = msRead + msWrite + msCompute;
203-
mjEnergy = (((m_eACT + m_ePRE) * 3) + (maxElementsPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCoresUsed * (numPass - 1);
204-
mjEnergy += (((m_eACT + m_ePRE) * 3) + (minElementPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCoresUsed;
205+
mjEnergy = (((m_eACT + m_ePRE + m_etrans) * 3) + (maxElementsPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCoresUsed * (numPass - 1);
206+
mjEnergy += (((m_eACT + m_ePRE + m_etrans) * 3) + (minElementPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCoresUsed;
205207
mjEnergy += ((m_eR * 2 * maxGDLItr * (numPass-1)) + (m_eR * 2 * minGDLItr)) * numBankPerChip * m_numRanks;
206208
mjEnergy += ((m_eW * maxGDLItr * (numPass-1)) + (m_eW * minGDLItr)) * numBankPerChip * m_numRanks;
207209
mjEnergy += m_pBChip * m_numChipsPerRank * m_numRanks * msRuntime;
@@ -250,12 +252,12 @@ pimPerfEnergyBankLevel::getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjIn
250252
case PimCmdEnum::COND_SELECT:
251253
case PimCmdEnum::COND_SELECT_SCALAR:
252254
{
253-
msRead = ((2 * (m_tACT + m_tPRE)) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((2 * (activateMS + m_tPRE)) + (minGDLItr * m_tGDL));
254-
msWrite = ((m_tACT + m_tPRE) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
255+
msRead = ((2 * (m_tACT + m_tPRE + m_ttrans)) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((2 * (activateMS + m_tPRE)) + (minGDLItr * m_tGDL));
256+
msWrite = ((m_tACT + m_tPRE + m_ttrans) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
255257
msCompute = (maxElementsPerRegion * m_blimpLatency * numberOfOperationPerElement * (numPass - 1)) + (minElementPerRegion * m_blimpLatency * numberOfOperationPerElement);
256258
msRuntime = msRead + msWrite + msCompute;
257-
mjEnergy = (((m_eACT + m_ePRE) * 3) + (maxElementsPerRegion * m_blimpLogicalEnergy * numberOfOperationPerElement)) * numCoresUsed * (numPass - 1);
258-
mjEnergy += (((m_eACT + m_ePRE) * 3) + (minElementPerRegion * m_blimpLogicalEnergy * numberOfOperationPerElement)) * numCoresUsed;
259+
mjEnergy = (((m_eACT + m_ePRE + m_etrans) * 3) + (maxElementsPerRegion * m_blimpLogicalEnergy * numberOfOperationPerElement)) * numCoresUsed * (numPass - 1);
260+
mjEnergy += (((m_eACT + m_ePRE + m_etrans) * 3) + (minElementPerRegion * m_blimpLogicalEnergy * numberOfOperationPerElement)) * numCoresUsed;
259261
mjEnergy += ((m_eR * 2 * maxGDLItr * (numPass-1)) + (m_eR * 2 * minGDLItr)) * numBankPerChip * m_numRanks;
260262
mjEnergy += ((m_eW * maxGDLItr * (numPass-1)) + (m_eW * minGDLItr)) * numBankPerChip * m_numRanks;
261263
mjEnergy += m_pBChip * m_numChipsPerRank * m_numRanks * msRuntime;
@@ -289,6 +291,8 @@ pimPerfEnergyBankLevel::getPerfEnergyForReduction(PimCmdEnum cmdType, const pimO
289291
uint64_t totalOp = 0;
290292
unsigned numBankPerChip = numCore / m_numChipsPerRank;
291293
double activateMS = minGDLItr * m_tGDL < m_tRAS * m_tCK ? m_tRAS * m_tCK : m_tACT; // Use tRAS if GDL is less than tRAS
294+
double m_ttrans = HMT_model.get_m_ttrans();
295+
double m_etrans = HMT_model.get_m_etrans();
292296

293297
switch (cmdType) {
294298
case PimCmdEnum::REDSUM:
@@ -300,15 +304,15 @@ pimPerfEnergyBankLevel::getPerfEnergyForReduction(PimCmdEnum cmdType, const pimO
300304
{
301305
// How many iteration require to read / write max elements per region
302306
double numberOfOperationPerElement = ((double)bitsPerElement / m_blimpCoreBitWidth);
303-
msRead = (m_tACT + m_tPRE) * (numPass - 1) + (activateMS + m_tPRE);
307+
msRead = (m_tACT + m_tPRE + m_ttrans) * (numPass - 1) + (activateMS + m_tPRE);
304308
// reduction for all regions assuming 16 core AMD EPYC 9124
305309
double aggregateMs = static_cast<double>(obj.getNumCoresUsed()) / 2300000;
306310
msCompute = (maxElementsPerRegion * m_blimpLatency * numberOfOperationPerElement * (numPass - 1)) + (minElementPerRegion * m_blimpLatency * numberOfOperationPerElement) + aggregateMs;
307311
msRuntime = msRead + msWrite + msCompute;
308312

309313
// Refer to fulcrum documentation
310-
mjEnergy = ((m_eACT + m_ePRE) + (maxElementsPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * (numPass - 1) * numCore;
311-
mjEnergy += ((m_eACT + m_ePRE) + (minElementPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCore;
314+
mjEnergy = ((m_eACT + m_ePRE + m_etrans) + (maxElementsPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * (numPass - 1) * numCore;
315+
mjEnergy += ((m_eACT + m_ePRE + m_etrans) + (minElementPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCore;
312316
mjEnergy += aggregateMs * cpuTDP;
313317
mjEnergy += ((m_eR * maxGDLItr * (numPass-1)) + (m_eR * minGDLItr)) * numBankPerChip;
314318
mjEnergy += m_pBChip * m_numChipsPerRank * m_numRanks * msRuntime;
@@ -342,11 +346,13 @@ pimPerfEnergyBankLevel::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimO
342346
unsigned minGDLItr = std::ceil(minElementPerRegion * bitsPerElement * 1.0 / m_GDLWidth);
343347
unsigned numBankPerChip = numCore / m_numChipsPerRank;
344348
double activateMS = minGDLItr * m_tGDL < m_tRAS * m_tCK ? m_tRAS * m_tCK : m_tACT; // Use tRAS if GDL is less than tRAS
349+
double m_ttrans = HMT_model.get_m_ttrans();
350+
double m_etrans = HMT_model.get_m_etrans();
345351
uint64_t totalOp = 0;
346-
msWrite = ((m_tACT + m_tPRE) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
352+
msWrite = ((m_tACT + m_tPRE + m_ttrans) + (maxGDLItr * m_tGDL)) * (numPass - 1) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
347353

348354
msRuntime = msRead + msWrite + msCompute;
349-
mjEnergy = (m_eACT + m_ePRE) * numPass * numCore;
355+
mjEnergy = (m_eACT + m_ePRE + m_etrans) * numPass * numCore;
350356
mjEnergy += (m_eW * maxGDLItr * (numPass-1) + m_eW * minGDLItr) * numBankPerChip;
351357
mjEnergy += m_pBChip * m_numChipsPerRank * m_numRanks * msRuntime;
352358
return pimeval::perfEnergy(msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp);
@@ -459,4 +465,4 @@ pimPerfEnergyBankLevel::getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimO
459465
break;
460466
}
461467
return pimeval::perfEnergy(msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp);
462-
}
468+
}

libpimeval/src/pimPerfEnergyBase.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "pimPerfEnergyBankLevel.h"
1212
#include "pimPerfEnergyAquabolt.h"
1313
#include "pimPerfEnergyAim.h"
14+
#include "pimTLB.h"
1415
#include <cstdint>
1516
#include <cstdio>
1617

@@ -75,8 +76,10 @@ pimPerfEnergyBase::pimPerfEnergyBase(const pimPerfEnergyModelParams& params)
7576
m_tRP = m_paramsDram.gettRP();
7677
m_tCAS = m_paramsDram.getNsTCAS() / m_nano_to_milli; // Convert ns to ms
7778
m_tRAS = m_paramsDram.gettRAS();
78-
}
7979

80+
HMT_model.set_m_etrans(HMT_ETRANS);
81+
HMT_model.set_m_ttrans(HMT_TTRANS);
82+
}
8083
//! @brief Perf energy model of data transfer between CPU memory and PIM memory
8184
pimeval::perfEnergy
8285
pimPerfEnergyBase::getPerfEnergyForBytesTransfer(PimCmdEnum cmdType, uint64_t numBytes) const

libpimeval/src/pimPerfEnergyBase.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "pimParamsDram.h" // for pimParamsDram
1212
#include "pimCmd.h" // for PimCmdEnum
1313
#include "pimResMgr.h" // for pimObjInfo
14+
#include "pimTLB.h"
1415
#include <cstdint>
1516
#include <memory> // for std::unique_ptr
1617

@@ -106,6 +107,8 @@ class pimPerfEnergyBase
106107
unsigned m_tRCD; // RCD in cycles
107108
unsigned m_tRP; // RP in cycles
108109
unsigned m_tRAS; // RAS in cycles
110+
111+
HMT HMT_model;
109112
};
110113

111114
#endif

libpimeval/src/pimTLB.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include <cstdio>
2+
#include <cstdlib>
3+
#include <cassert>
4+
#include "pimTLB.h"
5+

libpimeval/src/pimTLB.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#ifndef LAVA_PIM_TLB_H
2+
#define LAVA_PIM_TLB_H
3+
4+
#include <stddef.h>
5+
6+
// #define HMT_ENABLE
7+
#define HMT_SZ 64
8+
9+
#define HMT_TTRANS 1
10+
#define HMT_ETRANS 1
11+
12+
#ifdef HMT_ENABLE
13+
#define GETSET(T, N) \
14+
T get_##N() const { return N; }; \
15+
void set_##N(T x) { N = x; }
16+
#else
17+
#define GETSET(T, N) \
18+
T get_##N() const { return (T)0; }; \
19+
void set_##N(T x) { N = (T)0; }
20+
#endif
21+
22+
23+
struct HMT{
24+
public:
25+
HMT() :\
26+
m_ttrans(0.0),\
27+
m_etrans(0.0) \
28+
{};
29+
30+
GETSET(double, m_ttrans)
31+
GETSET(double, m_etrans)
32+
33+
private:
34+
double m_ttrans;
35+
double m_etrans;
36+
};
37+
38+
#endif

0 commit comments

Comments
 (0)