Skip to content
Draft
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
9cc455a
add PIM stencil
Arleee1 Mar 28, 2025
d41164a
wip
Arleee1 Mar 31, 2025
31785bd
PIM stencil mostly working, TODO numLeft not working
Arleee1 Apr 1, 2025
1b08ee5
PIM stencil (seemingly) working
Arleee1 Apr 1, 2025
1b138ff
add stencil makefile
Arleee1 Apr 1, 2025
6fb1dd8
add stencil readme
Arleee1 Apr 1, 2025
cb767a7
fix readme typo
Arleee1 Apr 1, 2025
0af845b
wip stencil chunking
Arleee1 Apr 28, 2025
2a0d540
vertical chunking starting to work
Arleee1 Apr 28, 2025
4ec58a1
working for multiple iterations
Arleee1 Apr 28, 2025
d5d22da
correct for radius and iterations both >1
Arleee1 Apr 30, 2025
da1f01c
cleanup
Arleee1 Apr 30, 2025
f6f89e6
works for many iterations
Arleee1 May 1, 2025
9ea640c
fix workingPimMemory size
Arleee1 May 1, 2025
749a46b
cleanup comments, add TODO
Arleee1 May 1, 2025
d4ceec1
chunked stencil working
Arleee1 May 4, 2025
07aa20f
Merge branch 'main' into stencil
Arleee1 May 4, 2025
839fb23
add api support for chunked shift
Arleee1 May 5, 2025
ab75f3b
typo
Arleee1 May 5, 2025
80cafc1
Merge branch 'main' into stencil
Arleee1 May 5, 2025
e7ba0e3
minor stencil updates
Arleee1 May 6, 2025
e4fb1ea
fix readme
Arleee1 May 6, 2025
de8173f
reset delta to 0.1 from 0.01, and set bank level region as subarray
Arleee1 May 7, 2025
1870a32
Merge branch 'main' into stencil
Arleee1 May 22, 2025
73c3c97
cleanup stencil comments
Arleee1 Sep 15, 2025
27e0afc
Merge branch 'main' into stencil
Arleee1 Sep 15, 2025
06d60b9
add cross region communication option for aim
Arleee1 Sep 15, 2025
954e8f7
stencil: fix issue with switch case
Arleee1 Oct 1, 2025
731fb7c
add optimizer for stencil layout
Arleee1 Oct 20, 2025
d08b6f3
fix failed 'make <debug, etc.>' from root dir due to no makefile targ…
Arleee1 Nov 5, 2025
1c3495e
add alt stencil impl
Arleee1 Nov 6, 2025
0678b0d
Merge branch 'main' into stencil
Arleee1 Nov 24, 2025
0cd037e
start horizontal chunking, todo: data copy, chunk layout
Arleee1 Dec 1, 2025
0d91f1c
stencil progress, todo: wrong output
Arleee1 Dec 2, 2025
2ae10ab
fix stencil bug
Arleee1 Dec 2, 2025
8ce5032
stencil cleanup
Arleee1 Dec 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions libpimeval/src/libpimeval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,17 +501,17 @@ pimRotateElementsLeft(PimObjId src)

//! @brief Shift elements of an obj by one step to the right and fill zero
PimStatus
pimShiftElementsRight(PimObjId src)
pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication)
{
bool ok = pimSim::get()->pimShiftElementsRight(src);
bool ok = pimSim::get()->pimShiftElementsRight(src, useCrossRegionCommunication);
return ok ? PIM_OK : PIM_ERROR;
}

//! @brief Shift elements of an obj by one step to the left and fill zero
PimStatus
pimShiftElementsLeft(PimObjId src)
pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication)
{
bool ok = pimSim::get()->pimShiftElementsLeft(src);
bool ok = pimSim::get()->pimShiftElementsLeft(src, useCrossRegionCommunication);
return ok ? PIM_OK : PIM_ERROR;
}

Expand Down
4 changes: 2 additions & 2 deletions libpimeval/src/libpimeval.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ PimStatus pimBroadcastUInt(PimObjId dest, uint64_t value);
PimStatus pimBroadcastFP(PimObjId dest, float value);
PimStatus pimRotateElementsRight(PimObjId src);
PimStatus pimRotateElementsLeft(PimObjId src);
PimStatus pimShiftElementsRight(PimObjId src);
PimStatus pimShiftElementsLeft(PimObjId src);
PimStatus pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication = true);
PimStatus pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication = true);
PimStatus pimShiftBitsRight(PimObjId src, PimObjId dest, unsigned shiftAmount);
PimStatus pimShiftBitsLeft(PimObjId src, PimObjId dest, unsigned shiftAmount);

Expand Down
52 changes: 27 additions & 25 deletions libpimeval/src/pimCmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1205,33 +1205,35 @@ pimCmdRotate::execute()
computeAllRegions(numRegions);

// handle region boundaries
if (m_cmdType == PimCmdEnum::ROTATE_ELEM_R || m_cmdType == PimCmdEnum::SHIFT_ELEM_R) {
for (unsigned i = 0; i < numRegions; ++i) {
const pimRegion &srcRegion = objSrc.getRegions()[i];
uint64_t elemIdxBegin = srcRegion.getElemIdxBegin();
uint64_t val = 0;
if (i == 0 && m_cmdType == PimCmdEnum::ROTATE_ELEM_R) {
val = m_regionBoundary[numRegions - 1];
} else if (i > 0) {
val = m_regionBoundary[i - 1];
if(m_useCrossRegionCommunication) {
if (m_cmdType == PimCmdEnum::ROTATE_ELEM_R || m_cmdType == PimCmdEnum::SHIFT_ELEM_R) {
for (unsigned i = 0; i < numRegions; ++i) {
const pimRegion &srcRegion = objSrc.getRegions()[i];
uint64_t elemIdxBegin = srcRegion.getElemIdxBegin();
uint64_t val = 0;
if (i == 0 && m_cmdType == PimCmdEnum::ROTATE_ELEM_R) {
val = m_regionBoundary[numRegions - 1];
} else if (i > 0) {
val = m_regionBoundary[i - 1];
}
objSrc.setElement(elemIdxBegin, val);
}
objSrc.setElement(elemIdxBegin, val);
}
} else if (m_cmdType == PimCmdEnum::ROTATE_ELEM_L || m_cmdType == PimCmdEnum::SHIFT_ELEM_L) {
for (unsigned i = 0; i < numRegions; ++i) {
const pimRegion &srcRegion = objSrc.getRegions()[i];
unsigned numElementsInRegion = srcRegion.getNumElemInRegion();
uint64_t elemIdxBegin = srcRegion.getElemIdxBegin();
uint64_t val = 0;
if (i == numRegions - 1 && m_cmdType == PimCmdEnum::ROTATE_ELEM_L) {
val = m_regionBoundary[0];
} else if (i < numRegions - 1) {
val = m_regionBoundary[i + 1];
} else if (m_cmdType == PimCmdEnum::ROTATE_ELEM_L || m_cmdType == PimCmdEnum::SHIFT_ELEM_L) {
for (unsigned i = 0; i < numRegions; ++i) {
const pimRegion &srcRegion = objSrc.getRegions()[i];
unsigned numElementsInRegion = srcRegion.getNumElemInRegion();
uint64_t elemIdxBegin = srcRegion.getElemIdxBegin();
uint64_t val = 0;
if (i == numRegions - 1 && m_cmdType == PimCmdEnum::ROTATE_ELEM_L) {
val = m_regionBoundary[0];
} else if (i < numRegions - 1) {
val = m_regionBoundary[i + 1];
}
objSrc.setElement(elemIdxBegin + numElementsInRegion - 1, val);
}
objSrc.setElement(elemIdxBegin + numElementsInRegion - 1, val);
} else {
assert(0);
}
} else {
assert(0);
}

if (pimSim::get()->getDeviceType() != PIM_FUNCTIONAL) {
Expand Down Expand Up @@ -1306,7 +1308,7 @@ pimCmdRotate::updateStats() const
PimDataType dataType = objSrc.getDataType();
bool isVLayout = objSrc.isVLayout();

pimeval::perfEnergy mPerfEnergy = pimSim::get()->getPerfEnergyModel()->getPerfEnergyForRotate(m_cmdType, objSrc);
pimeval::perfEnergy mPerfEnergy = pimSim::get()->getPerfEnergyModel()->getPerfEnergyForRotate(m_cmdType, objSrc, m_useCrossRegionCommunication);
pimSim::get()->getStatsMgr()->recordCmd(getName(dataType, isVLayout), mPerfEnergy);
return true;
}
Expand Down
5 changes: 3 additions & 2 deletions libpimeval/src/pimCmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,8 @@ class pimCmdBroadcast : public pimCmd
class pimCmdRotate : public pimCmd
{
public:
pimCmdRotate(PimCmdEnum cmdType, PimObjId src)
: pimCmd(cmdType), m_src(src)
pimCmdRotate(PimCmdEnum cmdType, PimObjId src, bool useCrossRegionCommunication)
: pimCmd(cmdType), m_src(src), m_useCrossRegionCommunication(useCrossRegionCommunication)
{
assert(cmdType == PimCmdEnum::ROTATE_ELEM_R || cmdType == PimCmdEnum::ROTATE_ELEM_L ||
cmdType == PimCmdEnum::SHIFT_ELEM_R || cmdType == PimCmdEnum::SHIFT_ELEM_L);
Expand All @@ -520,6 +520,7 @@ class pimCmdRotate : public pimCmd
protected:
PimObjId m_src;
std::vector<uint64_t> m_regionBoundary;
bool m_useCrossRegionCommunication;
};

//! @class pimCmdReadRowToSa
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyAquabolt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ pimPerfEnergyAquabolt::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimOb

//! @brief Perf energy model of aquabolt PIM for rotate
pimeval::perfEnergy
pimPerfEnergyAquabolt::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyAquabolt::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyAquabolt.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class pimPerfEnergyAquabolt : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;

protected:
unsigned m_aquaboltFPUBitWidth = 16;
Expand Down
12 changes: 7 additions & 5 deletions libpimeval/src/pimPerfEnergyBankLevel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ pimPerfEnergyBankLevel::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimO
// TODO: This needs to be revisited
//! @brief Perf energy model of bank-level PIM for rotate
pimeval::perfEnergy
pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand All @@ -308,8 +308,6 @@ pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL);
unsigned numRegions = obj.getRegions().size();
uint64_t totalOp = 0;
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);

// rotate within subarray:
// For every bit: Read row to SA; move SA to R1; Shift R1 by N steps; Move R1 to SA; Write SA to row
Expand All @@ -319,8 +317,12 @@ pimPerfEnergyBankLevel::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
msRuntime = (m_tR + (bitsPerElement + 2) * m_tL + m_tW); // for one pass
msRuntime *= numPass;
mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass;
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
if(useCrossRegionCommunication) {
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
}
std::cout << "PIM-Warning: Perf energy model is not precise for PIM command " << pimCmd::getName(cmdType, "") << std::endl;

return pimeval::perfEnergy(msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp);
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyBankLevel.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class pimPerfEnergyBankLevel : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;

protected:
double m_blimpCoreOriginalLatency = 0.000005; // ms; 200 MHz. Reference: BLIMP paper
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ pimPerfEnergyBase::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInf

//! @brief Perf energy model of base class for rotate (placeholder)
pimeval::perfEnergy
pimPerfEnergyBase::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyBase::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 1e10;
double mjEnergy = 999999999.9;
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const;

protected:
PimDeviceEnum m_simTarget;
Expand Down
20 changes: 13 additions & 7 deletions libpimeval/src/pimPerfEnergyBitSerial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ pimPerfEnergyBitSerial::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimO

//! @brief Perf energy model of bit-serial PIM for rotate
pimeval::perfEnergy
pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand All @@ -453,8 +453,6 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL);
unsigned numRegions = obj.getRegions().size();
unsigned numCore = obj.getNumCoreAvailable();
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);

switch (m_simTarget) {
case PIM_DEVICE_BITSIMD_V:
Expand All @@ -467,8 +465,12 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
totalOp += 3 * bitsPerElement * numPass * numCore;
msRuntime = msRead + msWrite + msCompute;
mjEnergy = (m_eAP + 3 * m_eL) * bitsPerElement * numPass; // for one pass
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
if(useCrossRegionCommunication) {
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
}
break;
case PIM_DEVICE_SIMDRAM:
// todo
Expand All @@ -483,8 +485,12 @@ pimPerfEnergyBitSerial::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjI
msRuntime = (m_tR + (bitsPerElement + 2) * m_tL + m_tW); // for one pass
msRuntime *= numPass;
mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass;
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
if(useCrossRegionCommunication) {
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
}
break;
default:
assert(0);
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyBitSerial.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class pimPerfEnergyBitSerial : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;

protected:
pimeval::perfEnergy getPerfEnergyBitSerial(PimDeviceEnum deviceType, PimCmdEnum cmdType, unsigned numPass, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const;
Expand Down
12 changes: 7 additions & 5 deletions libpimeval/src/pimPerfEnergyFulcrum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ pimPerfEnergyFulcrum::getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObj

//! @brief Perf energy model of Fulcrum for rotate
pimeval::perfEnergy
pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const
pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const
{
double msRuntime = 0.0;
double mjEnergy = 0.0;
Expand All @@ -312,8 +312,6 @@ pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInf
unsigned bitsPerElement = obj.getBitsPerElement(PimBitWidth::ACTUAL);
unsigned numRegions = obj.getRegions().size();
uint64_t totalOp = 0;
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);

// rotate within subarray:
// For every bit: Read row to SA; move SA to R1; Shift R1 by N steps; Move R1 to SA; Write SA to row
Expand All @@ -324,8 +322,12 @@ pimPerfEnergyFulcrum::getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInf
msWrite = m_tW * numPass;
msRuntime = msRead + msWrite + msCompute;
mjEnergy = (m_eAP + (bitsPerElement + 2) * m_eL) * numPass;
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
if(useCrossRegionCommunication) {
// boundary handling - assume two times copying between device and host for boundary elements
pimeval::perfEnergy perfEnergyBT = getPerfEnergyForBytesTransfer(PimCmdEnum::COPY_D2H, numRegions * bitsPerElement / 8);
msRuntime += 2 * perfEnergyBT.m_msRuntime;
mjEnergy += 2 * perfEnergyBT.m_mjEnergy;
}
std::cout << "PIM-Warning: Perf energy model is not precise for PIM command " << pimCmd::getName(cmdType, "") << std::endl;

return pimeval::perfEnergy(msRuntime, mjEnergy, msRead, msWrite, msCompute, totalOp);
Expand Down
2 changes: 1 addition & 1 deletion libpimeval/src/pimPerfEnergyFulcrum.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class pimPerfEnergyFulcrum : public pimPerfEnergyBase
virtual pimeval::perfEnergy getPerfEnergyForFunc2(PimCmdEnum cmdType, const pimObjInfo& objSrc1, const pimObjInfo& objSrc2, const pimObjInfo& objDest) const override;
virtual pimeval::perfEnergy getPerfEnergyForReduction(PimCmdEnum cmdType, const pimObjInfo& obj, unsigned numPass) const override;
virtual pimeval::perfEnergy getPerfEnergyForBroadcast(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj) const override;
virtual pimeval::perfEnergy getPerfEnergyForRotate(PimCmdEnum cmdType, const pimObjInfo& obj, bool useCrossRegionCommunication) const override;

protected:
double m_fulcrumMulLatency = 0.00000609; // 6.09ns
Expand Down
12 changes: 6 additions & 6 deletions libpimeval/src/pimSim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ pimSim::pimRotateElementsRight(PimObjId src)
{
pimPerfMon perfMon("pimRotateElementsRight");
if (!isValidDevice()) { return false; }
std::unique_ptr<pimCmd> cmd = std::make_unique<pimCmdRotate>(PimCmdEnum::ROTATE_ELEM_R, src);
std::unique_ptr<pimCmd> cmd = std::make_unique<pimCmdRotate>(PimCmdEnum::ROTATE_ELEM_R, src, true);
return m_device->executeCmd(std::move(cmd));
}

Expand All @@ -835,25 +835,25 @@ pimSim::pimRotateElementsLeft(PimObjId src)
{
pimPerfMon perfMon("pimRotateElementsLeft");
if (!isValidDevice()) { return false; }
std::unique_ptr<pimCmd> cmd = std::make_unique<pimCmdRotate>(PimCmdEnum::ROTATE_ELEM_L, src);
std::unique_ptr<pimCmd> cmd = std::make_unique<pimCmdRotate>(PimCmdEnum::ROTATE_ELEM_L, src, true);
return m_device->executeCmd(std::move(cmd));
}

bool
pimSim::pimShiftElementsRight(PimObjId src)
pimSim::pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication)
{
pimPerfMon perfMon("pimShiftElementsRight");
if (!isValidDevice()) { return false; }
std::unique_ptr<pimCmd> cmd = std::make_unique<pimCmdRotate>(PimCmdEnum::SHIFT_ELEM_R, src);
std::unique_ptr<pimCmd> cmd = std::make_unique<pimCmdRotate>(PimCmdEnum::SHIFT_ELEM_R, src, useCrossRegionCommunication);
return m_device->executeCmd(std::move(cmd));
}

bool
pimSim::pimShiftElementsLeft(PimObjId src)
pimSim::pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication)
{
pimPerfMon perfMon("pimShiftElementsLeft");
if (!isValidDevice()) { return false; }
std::unique_ptr<pimCmd> cmd = std::make_unique<pimCmdRotate>(PimCmdEnum::SHIFT_ELEM_L, src);
std::unique_ptr<pimCmd> cmd = std::make_unique<pimCmdRotate>(PimCmdEnum::SHIFT_ELEM_L, src, useCrossRegionCommunication);
return m_device->executeCmd(std::move(cmd));
}

Expand Down
4 changes: 2 additions & 2 deletions libpimeval/src/pimSim.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ class pimSim
template <typename T> bool pimBroadcast(PimObjId dest, T value);
bool pimRotateElementsRight(PimObjId src);
bool pimRotateElementsLeft(PimObjId src);
bool pimShiftElementsRight(PimObjId src);
bool pimShiftElementsLeft(PimObjId src);
bool pimShiftElementsRight(PimObjId src, bool useCrossRegionCommunication);
bool pimShiftElementsLeft(PimObjId src, bool useCrossRegionCommunication);
bool pimShiftBitsRight(PimObjId src, PimObjId dest, unsigned shiftAmount);
bool pimShiftBitsLeft(PimObjId src, PimObjId dest, unsigned shiftAmount);

Expand Down
16 changes: 16 additions & 0 deletions misc-bench/stencil/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Makefile: C++ version of stencil
# Copyright (c) 2025 University of Virginia
# This file is licensed under the MIT License.
# See the LICENSE file in the root of this repository for more details.

SUBDIRS := PIM

.PHONY: debug perf dramsim3_integ clean $(SUBDIRS)
.DEFAULT_GOAL := perf

USE_OPENMP ?= 0

debug perf dramsim3_integ clean: $(SUBDIRS)

$(SUBDIRS):
$(MAKE) -C $@ $(MAKECMDGOALS) USE_OPENMP=$(USE_OPENMP)
Loading