Skip to content

Commit 7af138f

Browse files
committed
osc/pt2pt: fix possible race in peer locking
It is possible for another thread to process a lock ack before the peer is set as locked. In this case either setting the locked or the eager active flag might clobber the other thread. To address this the flags have been made volatile and are set atomically. Since there is no a opal_atomic_or or opal_atomic_and function just use cmpset for now. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent c082068 commit 7af138f

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

ompi/mca/osc/pt2pt/osc_pt2pt.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ struct ompi_osc_pt2pt_peer_t {
121121
int32_t passive_incoming_frag_count;
122122

123123
/** peer flags */
124-
int32_t flags;
124+
volatile int32_t flags;
125125
};
126126
typedef struct ompi_osc_pt2pt_peer_t ompi_osc_pt2pt_peer_t;
127127

@@ -144,11 +144,15 @@ static inline bool ompi_osc_pt2pt_peer_eager_active (ompi_osc_pt2pt_peer_t *peer
144144

145145
static inline void ompi_osc_pt2pt_peer_set_flag (ompi_osc_pt2pt_peer_t *peer, int32_t flag, bool value)
146146
{
147-
if (value) {
148-
peer->flags |= flag;
149-
} else {
150-
peer->flags &= ~flag;
151-
}
147+
int32_t peer_flags, new_flags;
148+
do {
149+
peer_flags = peer->flags;
150+
if (value) {
151+
new_flags = peer_flags | flag;
152+
} else {
153+
new_flags = peer_flags & ~flag;
154+
}
155+
} while (!OPAL_ATOMIC_CMPSET_32 (&peer->flags, peer_flags, new_flags));
152156
}
153157

154158
static inline void ompi_osc_pt2pt_peer_set_locked (ompi_osc_pt2pt_peer_t *peer, bool value)

ompi/mca/osc/pt2pt/osc_pt2pt_sync.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ struct ompi_osc_pt2pt_sync_t {
7474
int num_peers;
7575

7676
/** number of synchronization messages expected */
77-
int32_t sync_expected;
77+
volatile int32_t sync_expected;
7878

7979
/** eager sends are active to all peers in this access epoch */
80-
bool eager_send_active;
80+
volatile bool eager_send_active;
8181

8282
/** communication has started on this epoch */
8383
bool epoch_active;
@@ -175,7 +175,7 @@ static inline void ompi_osc_pt2pt_sync_expected (ompi_osc_pt2pt_sync_t *sync)
175175
static inline void ompi_osc_pt2pt_sync_reset (ompi_osc_pt2pt_sync_t *sync)
176176
{
177177
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE;
178-
sync->eager_send_active = 0;
178+
sync->eager_send_active = false;
179179
sync->epoch_active = 0;
180180
sync->peer_list.peers = NULL;
181181
sync->sync.pscw.group = NULL;

0 commit comments

Comments
 (0)