Skip to content

Commit 1b588d4

Browse files
Converse: Fix data race in CmiHandleNodeReductionMessage
Converse messages sent to a node can be handled by any PE. Therefore, the handler for node reductions must access its data in a thread-safe way.
1 parent 773eaf6 commit 1b588d4

File tree

1 file changed

+28
-6
lines changed

1 file changed

+28
-6
lines changed

src/conv-core/convcore.C

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2447,6 +2447,13 @@ struct CmiReduction {
24472447
char localContributed;
24482448
};
24492449

2450+
struct CmiNodeReduction {
2451+
#if CMK_SMP
2452+
CmiNodeLock lock;
2453+
#endif
2454+
CmiReduction * red;
2455+
};
2456+
24502457
CpvStaticDeclare(int, CmiReductionMessageHandler);
24512458
CpvStaticDeclare(int, CmiReductionDynamicRequestHandler);
24522459

@@ -2458,7 +2465,7 @@ CpvStaticDeclare(CmiReductionID, _reduce_seqID_global); /* This is used only by
24582465
CpvStaticDeclare(CmiReductionID, _reduce_seqID_request);
24592466
CpvStaticDeclare(CmiReductionID, _reduce_seqID_dynamic);
24602467

2461-
CsvStaticDeclare(CmiReduction**, _nodereduce_info);
2468+
CsvStaticDeclare(CmiNodeReduction *, _nodereduce_info);
24622469
CsvStaticDeclare(CmiReductionID, _nodereduce_seqID_global);
24632470
CsvStaticDeclare(CmiReductionID, _nodereduce_seqID_request);
24642471
CsvStaticDeclare(CmiReductionID, _nodereduce_seqID_dynamic);
@@ -2525,7 +2532,7 @@ CmiReductionID CmiGetDynamicReduction(void) {
25252532

25262533
static CmiReduction* CmiGetNodeReductionCreate(int id, short int numChildren) {
25272534
const int idx = id & ~((~0u) << CmiLogMaxReductions);
2528-
auto & redref = CsvAccess(_nodereduce_info)[idx];
2535+
auto & redref = CsvAccess(_nodereduce_info)[idx].red;
25292536
CmiReduction *red = redref;
25302537
if (red != NULL && red->seqID != id) {
25312538
/* The table needs to be expanded */
@@ -2553,7 +2560,7 @@ static CmiReduction* CmiGetNodeReductionCreate(int id, short int numChildren) {
25532560

25542561
static void CmiClearNodeReduction(int id) {
25552562
const int idx = id & ~((~0u) << CmiLogMaxReductions);
2556-
auto & redref = CsvAccess(_nodereduce_info)[idx];
2563+
auto & redref = CsvAccess(_nodereduce_info)[idx].red;
25572564
auto red = redref;
25582565
redref = nullptr;
25592566
free(red);
@@ -2913,6 +2920,11 @@ static void CmiHandleReductionMessage(void *msg) {
29132920

29142921
static void CmiHandleNodeReductionMessage(void *msg) {
29152922
const auto id = CmiGetRedID(msg);
2923+
#if CMK_SMP
2924+
const int idx = id & ~((~0u) << CmiLogMaxReductions);
2925+
CmiNodeReduction & nodered = CsvAccess(_nodereduce_info)[idx];
2926+
CmiLock(nodered.lock);
2927+
#endif
29162928

29172929
CmiReduction * red = CmiGetNodeReductionCreate(id, 0);
29182930
if (red->numRemoteReceived == red->numChildren)
@@ -2921,6 +2933,10 @@ static void CmiHandleNodeReductionMessage(void *msg) {
29212933
REDN_DBG("[%d:%d][%d] CmiNodeReduce::remote %hd\n",
29222934
CmiMyNode(),CmiMyRank(),CmiMyPe(),red->seqID);
29232935
CmiSendNodeReduce(red);
2936+
2937+
#if CMK_SMP
2938+
CmiUnlock(nodered.lock);
2939+
#endif
29242940
}
29252941

29262942
void CmiReductionsInit(void) {
@@ -2954,10 +2970,16 @@ void CmiReductionsInit(void) {
29542970
CsvAccess(_nodereduce_seqID_request) = CmiReductionID_requestOffset;
29552971
CsvInitialize(CmiReductionID, _nodereduce_seqID_dynamic);
29562972
CsvAccess(_nodereduce_seqID_dynamic) = CmiReductionID_dynamicOffset;
2957-
CsvInitialize(CmiReduction**, _nodereduce_info);
2958-
auto noderedinfo = (CmiReduction **)malloc(CmiMaxReductions * sizeof(CmiReduction *));
2973+
CsvInitialize(CmiNodeReduction *, _nodereduce_info);
2974+
auto noderedinfo = (CmiNodeReduction *)malloc(CmiMaxReductions * sizeof(CmiNodeReduction));
29592975
for (int i = 0; i < CmiMaxReductions; ++i)
2960-
noderedinfo[i] = nullptr;
2976+
{
2977+
CmiNodeReduction & nodered = noderedinfo[i];
2978+
#if CMK_SMP
2979+
nodered.lock = CmiCreateLock();
2980+
#endif
2981+
nodered.red = nullptr;
2982+
}
29612983
CsvAccess(_nodereduce_info) = noderedinfo;
29622984
}
29632985
}

0 commit comments

Comments
 (0)