Skip to content

Commit f436743

Browse files
author
Laxmikant Kale
committed
Modifies converse schedeuler's getNextMessage so nodeGroup messages can run with higher priority over local
As it is, nodeGroup messages are not checked until all local and regular Charm queue (prio Q) messages are checked, which cause issues when the applicaiton is using nodeGroup messages in the hope that *some* PE will attend to it quickly. The change makes getNextMessage check nodeGroup queue every 2^nodeGrpFreq iterations with high priority in addition to its usual check after exhasuting local queues (except task Q). This commit has not been tested at all. But pusing it to allow others to help me test/fix it.
1 parent 298416c commit f436743

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

src/conv-core/convcore.C

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1656,6 +1656,7 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s)
16561656
#if CMK_NODE_QUEUE_AVAILABLE
16571657
s->nodeQ=CsvAccess(CsdNodeQueue);
16581658
s->nodeLock=CsvAccess(CsdNodeQueueLock);
1659+
s->nodeGrpFreq =2; // check node queue once in 4 (2^2) iterations.
16591660
#endif
16601661
#if CMK_GRID_QUEUE_AVAILABLE
16611662
s->gridQ=CpvAccess(CsdGridQueue);
@@ -1664,6 +1665,7 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s)
16641665
s->taskQ = CpvAccess(CsdTaskQueue);
16651666
s->suspendedTaskQ = CpvAccess(CmiSuspendedTaskQueue);
16661667
#endif
1668+
16671669
}
16681670

16691671

@@ -1719,7 +1721,20 @@ void CsdSchedulerState_new(CsdSchedulerState_t *s)
17191721
*/
17201722
void *CsdNextMessage(CsdSchedulerState_t *s) {
17211723
void *msg;
1722-
if((*(s->localCounter))-- >0)
1724+
1725+
s->iter++;
1726+
1727+
#if CMK_NODE_QUEUE_AVAILABLE
1728+
if (1 == (s->iter & (1 << s->nodeGrpFreq)) // since we use nodeGrpFreq == 0 to mean
1729+
// don't check NodeQ with high priority, i
1730+
// value of 1 serves well as when to check it.i but we sshould avoid "%" operator
1731+
// note: s->nodeGrpFreq is raised to a power of 2
1732+
{
1733+
msg = CmiGetNonLocalNodeQ();
1734+
if (NULL != msg) return msg;
1735+
}
1736+
#endif
1737+
if((*(s->localCounter))-- >0)
17231738
{
17241739
/* This avoids a race condition with migration detected by megatest*/
17251740
msg=CdsFifo_Dequeue(s->localQ);

src/conv-core/converse.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,12 @@ typedef struct {
11451145
void *localQ;
11461146
Queue nodeQ;
11471147
Queue schedQ;
1148+
unsigned short iter; // counting number of sched iterations (hopefully of for it to roll over
1149+
unsigned short nodeGrpFreq; // call nodegroup queue once every 2^nodeGrpFreq iterations with high prio
1150+
// should add a function to change this from the program for advanced users. One obstacle:
1151+
// it is inside a struct that is on stack, and so not accessible for standalone functions. Need to
1152+
// resolve this by making a schedule a c++ object, but even then we need a ptr to the currently-running scheduler
1153+
// 0 means do not check nodegroup queue with high prio.. will be checked with low prio after other Qs
11481154
int *localCounter;
11491155
#if CMK_OBJECT_QUEUE_AVAILABLE
11501156
Queue objQ;

0 commit comments

Comments
 (0)