Skip to content

Commit 436b3b0

Browse files
committed
btl/openib: fix sequence number generation for debug mode
When using eager RDMA in debug builds the openib btl generates a sequence number for each send. The code independently updated the head index and the sequence number for the eager rdma transaction. If multiple threads enter this code at the same time and run in the following order: thread 1: update sequence (0 -> 1) thread 2: update sequence (1 -> 2) thread 2: update head (0 -> 1) thread 1: update head (1 -> 2) the sequence number for head[0] gets 1 and the sequence number for head[1] gets 0. The fix is to generate the sequence number from the head index. master commit open-mpi/ompi@c101385 Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 8f8590e commit 436b3b0

File tree

2 files changed

+35
-15
lines changed

2 files changed

+35
-15
lines changed

opal/mca/btl/openib/btl_openib_eager_rdma.h

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2006-2007 Voltaire All rights reserved.
4+
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
5+
* reserved.
36
* $COPYRIGHT$
47
*
58
* Additional copyrights may follow
@@ -81,17 +84,33 @@ typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remo
8184
mca_btl_openib_component.eager_rdma_num) \
8285
(I) = 0; \
8386
} while (0)
84-
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \
85-
do { \
86-
int32_t new_head; \
87-
do { \
88-
OLD_HEAD = HEAD; \
89-
new_head = OLD_HEAD + 1; \
90-
if(new_head == mca_btl_openib_component.eager_rdma_num) \
91-
new_head = 0; \
92-
} while(!OPAL_ATOMIC_CMPSET_32(&HEAD, OLD_HEAD, new_head)); \
87+
88+
89+
#if OPAL_ENABLE_DEBUG
90+
91+
/**
92+
* @brief read and increment the remote head index and generate a sequence
93+
* number
94+
*/
95+
96+
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD, SEQ) \
97+
do { \
98+
(SEQ) = OPAL_THREAD_ADD32(&(HEAD), 1) - 1; \
99+
(OLD_HEAD) = (SEQ) % mca_btl_openib_component.eager_rdma_num; \
93100
} while(0)
94101

102+
#else
103+
104+
/**
105+
* @brief read and increment the remote head index
106+
*/
107+
108+
#define MCA_BTL_OPENIB_RDMA_MOVE_INDEX(HEAD, OLD_HEAD) \
109+
do { \
110+
(OLD_HEAD) = (OPAL_THREAD_ADD32(&(HEAD), 1) - 1) % mca_btl_openib_component.eager_rdma_num; \
111+
} while(0)
112+
113+
#endif
95114

96115
END_C_DECLS
97116
#endif

opal/mca/btl/openib/btl_openib_endpoint.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -569,17 +569,18 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
569569
MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length);
570570
MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr);
571571
#if OPAL_ENABLE_DEBUG
572-
do {
573-
ftr->seq = ep->eager_rdma_remote.seq;
574-
} while (!OPAL_ATOMIC_CMPSET_32((int32_t*) &ep->eager_rdma_remote.seq,
575-
(int32_t) ftr->seq,
576-
(int32_t) (ftr->seq+1)));
572+
/* NTH: generate the sequence from the remote head index to ensure that the
573+
* wrong sequence isn't set. The way this code used to look the sequence number
574+
* and head were updated independently and it led to false positives for incorrect
575+
* sequence numbers. */
576+
MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head, ftr->seq);
577+
#else
578+
MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head);
577579
#endif
578580
if(ep->nbo)
579581
BTL_OPENIB_FOOTER_HTON(*ftr);
580582

581583
sr_desc->wr.rdma.rkey = ep->eager_rdma_remote.rkey;
582-
MCA_BTL_OPENIB_RDMA_MOVE_INDEX(ep->eager_rdma_remote.head, head);
583584
#if BTL_OPENIB_FAILOVER_ENABLED
584585
/* frag->ftr is unused on the sending fragment, so use it
585586
* to indicate it is an eager fragment. A non-zero value

0 commit comments

Comments
 (0)