Skip to content

Commit 7b2d36d

Browse files
committed
In totalExchange() change messagePrepare() to version that uses a search instead of a loop.
1 parent 5c554f6 commit 7b2d36d

File tree

3 files changed

+227
-4
lines changed

3 files changed

+227
-4
lines changed

src/Infrastructure/Array/src/sparseMatMulStoreLinSeqVect.h

Lines changed: 169 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ template<typename T>
7676
void clientRequest(T *t, int i, char **requestStreamClient);
7777

7878
template<typename T>
79-
void clientRequestAllAtOnce(T *t,
79+
void clientRequestSearch(T *t,
8080
ESMCI::VM *vm,
8181
char **requestStreamClient);
8282

@@ -156,7 +156,7 @@ void accessLookup(
156156
}
157157

158158
// Fill buffers
159-
clientRequestAllAtOnce(t, vm, requestStreamClient);
159+
clientRequestSearch(t, vm, requestStreamClient);
160160

161161
// Communicate buffers
162162
for (int ii=localPet+petCount-1; ii>localPet; ii--){
@@ -456,7 +456,7 @@ template<typename IT1>
456456

457457

458458
template<typename IT1, typename IT2>
459-
void clientRequestAllAtOnce(FillLinSeqVectInfo<IT1,IT2> *fillLinSeqVectInfo,
459+
void clientRequestSearch(FillLinSeqVectInfo<IT1,IT2> *fillLinSeqVectInfo,
460460
ESMCI::VM *vm, char **requestStreamClient){
461461
const int localDeCount = fillLinSeqVectInfo->localDeCount;
462462
const int *localDeElementCount = fillLinSeqVectInfo->localDeElementCount;
@@ -854,7 +854,7 @@ template<typename IT1, typename IT2>
854854

855855

856856
template<typename IT1, typename IT2>
857-
void clientRequestAllAtOnce(FillPartnerDeInfo<IT1,IT2> *fillPartnerDeInfo,
857+
void clientRequestSearch(FillPartnerDeInfo<IT1,IT2> *fillPartnerDeInfo,
858858
ESMCI::VM *vm, char **requestStreamClient){
859859
const int localPetfpDEI = fillPartnerDeInfo->localPet;
860860
const Interval<IT1> *seqIndexIntervalIn =
@@ -1104,6 +1104,121 @@ template<typename IT1, typename IT2>
11041104
}
11051105
}
11061106
}
1107+
1108+
virtual void messagePrepareSearch(VMK *vmk,
1109+
int sendIndexOffset, int iiStart, int iiEnd,
1110+
vector<char *> &sendBuffer
1111+
)const{
1112+
1113+
// Get Pet info
1114+
int petCount = vmk->getNpets();
1115+
int localPet = vmk->getMypet();
1116+
1117+
// Memory to hold pointers into buffers
1118+
int **bufferIntArray = new int*[petCount];
1119+
1120+
// Get beginning of buffers
1121+
for (int ii=sendIndexOffset-iiStart; ii>sendIndexOffset-iiEnd; ii--){
1122+
int dstPet = ii%petCount; // fold back into [0,..,petCount-1] range
1123+
bufferIntArray[dstPet] = (int *)(sendBuffer[dstPet]);
1124+
}
1125+
1126+
// Find where eqch SeqInd goes
1127+
for (int j=0; j<localDeCount; j++){
1128+
int de = localDeToDeMap[j]; // global DE number
1129+
if (haloRimFlag){
1130+
// loop over the halo rim elements for localDe j
1131+
for (int k=0; k<array->getRimElementCount()[j]; k++){
1132+
const std::vector<std::vector<SeqIndex<IT> > > *rimSeqIndex;
1133+
array->getRimSeqIndex(&rimSeqIndex);
1134+
SeqIndex<IT> seqIndex = (*rimSeqIndex)[j][k];
1135+
if (seqIndex.valid()){
1136+
IT seqInd = seqIndex.decompSeqIndex;
1137+
1138+
// Figure out which Pet it's on
1139+
Interval<IT> tmpInt;
1140+
tmpInt.min=0;
1141+
tmpInt.max=seqInd;
1142+
tmpInt.count=0;
1143+
1144+
const Interval<IT> *seqIndPos = std::lower_bound(seqIndexInterval, seqIndexInterval+petCount, tmpInt);
1145+
int dstPet=(int)(seqIndPos-seqIndexInterval);
1146+
if (dstPet >= petCount) continue;
1147+
if (dstPet == localPet) continue;
1148+
1149+
IT seqIndMin = seqIndexInterval[dstPet].min;
1150+
IT seqIndMax = seqIndexInterval[dstPet].max;
1151+
1152+
if (seqInd >= seqIndMin && seqInd <= seqIndMax){
1153+
IT seqIndCount = seqIndexInterval[dstPet].count;
1154+
int lookupIndex = (int)(seqInd - seqIndMin);
1155+
if (tensorMixFlag)
1156+
lookupIndex += (seqIndex.tensorSeqIndex - 1)
1157+
* (int)seqIndCount;
1158+
1159+
int *bufferInt = bufferIntArray[dstPet];
1160+
1161+
*bufferInt++ = lookupIndex;
1162+
*bufferInt++ = de;
1163+
1164+
bufferIntArray[dstPet]=bufferInt;
1165+
}
1166+
}
1167+
}
1168+
}else{
1169+
// loop over all elements in the exclusive region for localDe j
1170+
ArrayElement arrayElement(array, j, true, false, false);
1171+
while(arrayElement.isWithin()){
1172+
SeqIndex<IT> seqIndex = arrayElement.getSequenceIndex<IT>();
1173+
IT seqInd = seqIndex.decompSeqIndex;
1174+
1175+
// Figure out which Pet it's on
1176+
Interval<IT> tmpInt;
1177+
tmpInt.min=0;
1178+
tmpInt.max=seqInd;
1179+
tmpInt.count=0;
1180+
1181+
const Interval<IT> *seqIndPos = std::lower_bound(seqIndexInterval, seqIndexInterval+petCount, tmpInt);
1182+
int dstPet=(int)(seqIndPos-seqIndexInterval);
1183+
1184+
// If not found, then skip (?)
1185+
if (dstPet >= petCount) {
1186+
arrayElement.next();
1187+
continue;
1188+
}
1189+
1190+
// If it's this PET, then skip, because those are handled later
1191+
if (dstPet == localPet) {
1192+
arrayElement.next();
1193+
continue;
1194+
}
1195+
1196+
IT seqIndMin = seqIndexInterval[dstPet].min;
1197+
IT seqIndMax = seqIndexInterval[dstPet].max;
1198+
1199+
if (seqInd >= seqIndMin && seqInd <= seqIndMax){
1200+
IT seqIndCount = seqIndexInterval[dstPet].count;
1201+
int lookupIndex = (int)(seqInd - seqIndMin);
1202+
if (tensorMixFlag)
1203+
lookupIndex += (seqIndex.tensorSeqIndex - 1) * (int)seqIndCount;
1204+
1205+
int *bufferInt = bufferIntArray[dstPet];
1206+
1207+
*bufferInt++ = lookupIndex;
1208+
*bufferInt++ = de;
1209+
1210+
bufferIntArray[dstPet]=bufferInt;
1211+
}
1212+
arrayElement.next();
1213+
} // end while over all exclusive elements
1214+
}
1215+
}
1216+
1217+
// Free memory holding pointers into buffers
1218+
delete [] bufferIntArray;
1219+
}
1220+
1221+
11071222
virtual void messageProcess(int srcPet, int dstPet, char *buffer){
11081223
int count = messageSizeCount(srcPet, dstPet);
11091224
int *bufferInt = (int *)buffer;
@@ -1275,6 +1390,32 @@ template<typename IT1, typename IT2>
12751390
seqIntervFactorListLookupIndexToPet[dstPet].front()),
12761391
messageSizeCount(srcPet, dstPet)*sizeof(int));
12771392
}
1393+
1394+
virtual void messagePrepareSearch(VMK *vmk,
1395+
int sendIndexOffset, int iiStart, int iiEnd,
1396+
vector<char *> &sendBuffer
1397+
)const{
1398+
1399+
// Get Pet info
1400+
int petCount = vmk->getNpets();
1401+
int localPet = vmk->getMypet();
1402+
1403+
1404+
// localPet acts as a sender, constructs message and sends to receiver
1405+
for (int ii=sendIndexOffset-iiStart; ii>sendIndexOffset-iiEnd; ii--){
1406+
// localPet-dependent shifted loop reduces communication contention
1407+
int dstPet = ii%petCount; // fold back into [0,..,petCount-1] range
1408+
// send message to Pet "i"
1409+
int size = messageSize(localPet, dstPet);
1410+
if (size>0){
1411+
1412+
// Just use the usual 1 PET messagePrepare
1413+
messagePrepare(localPet, dstPet, sendBuffer[dstPet]);
1414+
}
1415+
}
1416+
1417+
}
1418+
12781419
virtual void messageProcess(int srcPet, int dstPet, char *buffer){
12791420
int *bufferInt = (int *)buffer;
12801421
for (int i=0; i<messageSizeCount(srcPet, dstPet); i++){
@@ -1364,6 +1505,30 @@ template<typename IT1, typename IT2>
13641505
else if (typekindFactors == ESMC_TYPEKIND_I8)
13651506
fillStream<ESMC_I8>(srcPet, dstPet, buffer);
13661507
}
1508+
1509+
virtual void messagePrepareSearch(VMK *vmk,
1510+
int sendIndexOffset, int iiStart, int iiEnd,
1511+
vector<char *> &sendBuffer
1512+
)const{
1513+
1514+
// Get Pet info
1515+
int petCount = vmk->getNpets();
1516+
int localPet = vmk->getMypet();
1517+
1518+
// localPet acts as a sender, constructs message and sends to receiver
1519+
for (int ii=sendIndexOffset-iiStart; ii>sendIndexOffset-iiEnd; ii--){
1520+
// localPet-dependent shifted loop reduces communication contention
1521+
int dstPet = ii%petCount; // fold back into [0,..,petCount-1] range
1522+
// send message to Pet "i"
1523+
int size = messageSize(localPet, dstPet);
1524+
if (size>0){
1525+
1526+
// Just use the usual 1 PET messagePrepare
1527+
messagePrepare(localPet, dstPet, sendBuffer[dstPet]);
1528+
}
1529+
}
1530+
}
1531+
13671532
virtual void messageProcess(int srcPet, int dstPet, char *buffer){
13681533
ESMC_TypeKind_Flag typekindFactors =
13691534
SetupSeqIndexFactorLookup<IT>::typekindFactors;

src/Infrastructure/VM/include/ESMCI_VMKernel.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,11 @@ class ComPat{
738738

739739
virtual void messagePrepare(int srcPet, int dstPet, char *buffer)const =0;
740740
// will be called only for localPet==srcPet
741+
742+
virtual void messagePrepareSearch(VMK *vmk,
743+
int sendIndexOffset, int iiStart, int iiEnd,
744+
std::vector<char *> &sendBuffer
745+
)const = 0;
741746

742747
virtual void messageProcess(int srcPet, int dstPet, char *buffer) =0;
743748
// will be called only for localPet==dstPet

src/Infrastructure/VM/src/ESMCI_VMKernel.C

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7648,6 +7648,56 @@ namespace ESMCI{
76487648
#endif
76497649
}
76507650
}
7651+
7652+
7653+
#ifndef USE_OLD_MESSAGEPREPARE
7654+
7655+
7656+
// Allocate buffers
7657+
for (int ii=sendIndexOffset-iiStart; ii>sendIndexOffset-iiEnd; ii--){
7658+
// localPet-dependent shifted loop reduces communication contention
7659+
int dstPet = ii%petCount; // fold back into [0,..,petCount-1] range
7660+
// send message to Pet "i"
7661+
int size = messageSize(localPet, dstPet);
7662+
if (size>0){
7663+
sendBuffer[dstPet] = new char[size];
7664+
} else {
7665+
sendBuffer[dstPet] = NULL;
7666+
}
7667+
}
7668+
7669+
// Fill buffers
7670+
messagePrepareSearch(vmk,
7671+
sendIndexOffset, iiStart, iiEnd,
7672+
sendBuffer);
7673+
7674+
// Communicate buffers
7675+
for (int ii=sendIndexOffset-iiStart; ii>sendIndexOffset-iiEnd; ii--){
7676+
// localPet-dependent shifted loop reduces communication contention
7677+
int dstPet = ii%petCount; // fold back into [0,..,petCount-1] range
7678+
// send message to Pet "i"
7679+
int size = messageSize(localPet, dstPet);
7680+
if (size>0){
7681+
7682+
#ifdef MUST_USE_BLOCKING_SEND
7683+
vmk->send(sendBuffer[dstPet], size, dstPet);
7684+
#else
7685+
sendCommhList[dstPet] = NULL;
7686+
vmk->send(sendBuffer[dstPet], size, dstPet, &(sendCommhList[dstPet]));
7687+
#endif
7688+
#ifdef DEBUG_COMPAT_on
7689+
{
7690+
std::stringstream msg;
7691+
msg << "ComPat#" << __LINE__
7692+
<< " posting send to i=" << dstPet << " size=" << size
7693+
<< " sendBuffer=" << (void *)sendBuffer[dstPet];
7694+
ESMC_LogDefault.Write(msg.str(), ESMC_LOGMSG_DEBUG);
7695+
}
7696+
#endif
7697+
}
7698+
}
7699+
7700+
#else
76517701
// localPet acts as a sender, constructs message and sends to receiver
76527702
for (int ii=sendIndexOffset-iiStart; ii>sendIndexOffset-iiEnd; ii--){
76537703
// localPet-dependent shifted loop reduces communication contention
@@ -7674,6 +7724,9 @@ namespace ESMCI{
76747724
#endif
76757725
}
76767726
}
7727+
#endif
7728+
7729+
76777730
if (iiStart==localPet+1){
76787731
// localPet does local prepare and process
76797732
#ifdef DEBUG_COMPAT_on

0 commit comments

Comments
 (0)