@@ -178,6 +178,8 @@ pimPerfEnergyBankLevel::getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjIn
178178 unsigned numPass = obj.getMaxNumRegionsPerCore ();
179179 unsigned bitsPerElement = obj.getBitsPerElement (PimBitWidth::ACTUAL);
180180 unsigned numCoresUsed = obj.isLoadBalanced () ? obj.getNumCoreAvailable () : obj.getNumCoresUsed ();
181+ double m_ttrans = HMT_model.get_m_ttrans ();
182+ double m_etrans = HMT_model.get_m_etrans ();
181183
182184 unsigned maxElementsPerRegion = obj.getMaxElementsPerRegion ();
183185 double numberOfOperationPerElement = ((double )bitsPerElement / m_blimpCoreBitWidth);
@@ -196,12 +198,12 @@ pimPerfEnergyBankLevel::getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjIn
196198 case PimCmdEnum::MUL:
197199 case PimCmdEnum::DIV:
198200 {
199- msRead = ((2 * (m_tACT + m_tPRE)) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((2 * (activateMS + m_tPRE)) + (minGDLItr * m_tGDL));
200- msWrite = ((m_tACT + m_tPRE) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
201+ msRead = ((2 * (m_tACT + m_tPRE + m_ttrans )) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((2 * (activateMS + m_tPRE)) + (minGDLItr * m_tGDL));
202+ msWrite = ((m_tACT + m_tPRE + m_ttrans ) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
201203 msCompute = (maxElementsPerRegion * m_blimpLatency * numberOfOperationPerElement * (numPass - 1 )) + (minElementPerRegion * m_blimpLatency * numberOfOperationPerElement);
202204 msRuntime = msRead + msWrite + msCompute;
203- mjEnergy = (((m_eACT + m_ePRE) * 3 ) + (maxElementsPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCoresUsed * (numPass - 1 );
204- mjEnergy += (((m_eACT + m_ePRE) * 3 ) + (minElementPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCoresUsed;
205+ mjEnergy = (((m_eACT + m_ePRE + m_etrans ) * 3 ) + (maxElementsPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCoresUsed * (numPass - 1 );
206+ mjEnergy += (((m_eACT + m_ePRE + m_etrans ) * 3 ) + (minElementPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCoresUsed;
205207 mjEnergy += ((m_eR * 2 * maxGDLItr * (numPass-1 )) + (m_eR * 2 * minGDLItr)) * numBankPerChip * m_numRanks;
206208 mjEnergy += ((m_eW * maxGDLItr * (numPass-1 )) + (m_eW * minGDLItr)) * numBankPerChip * m_numRanks;
207209 mjEnergy += m_pBChip * m_numChipsPerRank * m_numRanks * msRuntime;
@@ -250,12 +252,12 @@ pimPerfEnergyBankLevel::getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjIn
250252 case PimCmdEnum::COND_SELECT:
251253 case PimCmdEnum::COND_SELECT_SCALAR:
252254 {
253- msRead = ((2 * (m_tACT + m_tPRE)) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((2 * (activateMS + m_tPRE)) + (minGDLItr * m_tGDL));
254- msWrite = ((m_tACT + m_tPRE) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
255+ msRead = ((2 * (m_tACT + m_tPRE + m_ttrans )) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((2 * (activateMS + m_tPRE)) + (minGDLItr * m_tGDL));
256+ msWrite = ((m_tACT + m_tPRE + m_ttrans ) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
255257 msCompute = (maxElementsPerRegion * m_blimpLatency * numberOfOperationPerElement * (numPass - 1 )) + (minElementPerRegion * m_blimpLatency * numberOfOperationPerElement);
256258 msRuntime = msRead + msWrite + msCompute;
257- mjEnergy = (((m_eACT + m_ePRE) * 3 ) + (maxElementsPerRegion * m_blimpLogicalEnergy * numberOfOperationPerElement)) * numCoresUsed * (numPass - 1 );
258- mjEnergy += (((m_eACT + m_ePRE) * 3 ) + (minElementPerRegion * m_blimpLogicalEnergy * numberOfOperationPerElement)) * numCoresUsed;
259+ mjEnergy = (((m_eACT + m_ePRE + m_etrans ) * 3 ) + (maxElementsPerRegion * m_blimpLogicalEnergy * numberOfOperationPerElement)) * numCoresUsed * (numPass - 1 );
260+ mjEnergy += (((m_eACT + m_ePRE + m_etrans ) * 3 ) + (minElementPerRegion * m_blimpLogicalEnergy * numberOfOperationPerElement)) * numCoresUsed;
259261 mjEnergy += ((m_eR * 2 * maxGDLItr * (numPass-1 )) + (m_eR * 2 * minGDLItr)) * numBankPerChip * m_numRanks;
260262 mjEnergy += ((m_eW * maxGDLItr * (numPass-1 )) + (m_eW * minGDLItr)) * numBankPerChip * m_numRanks;
261263 mjEnergy += m_pBChip * m_numChipsPerRank * m_numRanks * msRuntime;
@@ -289,6 +291,8 @@ pimPerfEnergyBankLevel::getPerfEnergyForReduction(PimCmdEnum cmdType, const pimO
289291 uint64_t totalOp = 0 ;
290292 unsigned numBankPerChip = numCore / m_numChipsPerRank;
291293 double activateMS = minGDLItr * m_tGDL < m_tRAS * m_tCK ? m_tRAS * m_tCK : m_tACT; // Use tRAS if GDL is less than tRAS
294+ double m_ttrans = HMT_model.get_m_ttrans ();
295+ double m_etrans = HMT_model.get_m_etrans ();
292296
293297 switch (cmdType) {
294298 case PimCmdEnum::REDSUM:
@@ -300,15 +304,15 @@ pimPerfEnergyBankLevel::getPerfEnergyForReduction(PimCmdEnum cmdType, const pimO
300304 {
301305 // How many iteration require to read / write max elements per region
302306 double numberOfOperationPerElement = ((double )bitsPerElement / m_blimpCoreBitWidth);
303- msRead = (m_tACT + m_tPRE) * (numPass - 1 ) + (activateMS + m_tPRE);
307+ msRead = (m_tACT + m_tPRE + m_ttrans ) * (numPass - 1 ) + (activateMS + m_tPRE);
304308 // reduction for all regions assuming 16 core AMD EPYC 9124
305309 double aggregateMs = static_cast <double >(obj.getNumCoresUsed ()) / 2300000 ;
306310 msCompute = (maxElementsPerRegion * m_blimpLatency * numberOfOperationPerElement * (numPass - 1 )) + (minElementPerRegion * m_blimpLatency * numberOfOperationPerElement) + aggregateMs;
307311 msRuntime = msRead + msWrite + msCompute;
308312
309313 // Refer to fulcrum documentation
310- mjEnergy = ((m_eACT + m_ePRE) + (maxElementsPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * (numPass - 1 ) * numCore;
311- mjEnergy += ((m_eACT + m_ePRE) + (minElementPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCore;
314+ mjEnergy = ((m_eACT + m_ePRE + m_etrans ) + (maxElementsPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * (numPass - 1 ) * numCore;
315+ mjEnergy += ((m_eACT + m_ePRE + m_etrans ) + (minElementPerRegion * m_blimpArithmeticEnergy * numberOfOperationPerElement)) * numCore;
312316 mjEnergy += aggregateMs * cpuTDP;
313317 mjEnergy += ((m_eR * maxGDLItr * (numPass-1 )) + (m_eR * minGDLItr)) * numBankPerChip;
314318 mjEnergy += m_pBChip * m_numChipsPerRank * m_numRanks * msRuntime;
@@ -342,11 +346,13 @@ pimPerfEnergyBankLevel::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimO
342346 unsigned minGDLItr = std::ceil (minElementPerRegion * bitsPerElement * 1.0 / m_GDLWidth);
343347 unsigned numBankPerChip = numCore / m_numChipsPerRank;
344348 double activateMS = minGDLItr * m_tGDL < m_tRAS * m_tCK ? m_tRAS * m_tCK : m_tACT; // Use tRAS if GDL is less than tRAS
349+ double m_ttrans = HMT_model.get_m_ttrans ();
350+ double m_etrans = HMT_model.get_m_etrans ();
345351 uint64_t totalOp = 0 ;
346- msWrite = ((m_tACT + m_tPRE) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
352+ msWrite = ((m_tACT + m_tPRE + m_ttrans ) + (maxGDLItr * m_tGDL)) * (numPass - 1 ) + ((activateMS + m_tPRE) + (minGDLItr * m_tGDL));
347353
348354 msRuntime = msRead + msWrite + msCompute;
349- mjEnergy = (m_eACT + m_ePRE) * numPass * numCore;
355+ mjEnergy = (m_eACT + m_ePRE + m_etrans ) * numPass * numCore;
350356 mjEnergy += (m_eW * maxGDLItr * (numPass-1 ) + m_eW * minGDLItr) * numBankPerChip;
351357 mjEnergy += m_pBChip * m_numChipsPerRank * m_numRanks * msRuntime;
352358 return pimeval::perfEnergy (msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp);
@@ -459,4 +465,4 @@ pimPerfEnergyBankLevel::getPerfEnergyForPrefixSum(PimCmdEnum cmdType, const pimO
459465 break ;
460466 }
461467 return pimeval::perfEnergy (msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp);
462- }
468+ }
0 commit comments