Skip to content

Commit 1ccb6ab

Browse files
committed
btl/vader: various threading fixes
This commit fixes several threading bugs: - Add an additional lock to the btl_base_endpoint_t structure to lock the list of pending frags. This allows the progress function to attempt to send pending frags without needing to drop/reaquire the lock. This should provide a small improvement in performance and fixes a potential race between adding an removing items from the pending list. - Ensure fast boxes are only set up once by updating the send count using atomics when needed and do not set the fast box buffer pointer until the fast box is set up. Closes open-mpi#1408 Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from open-mpi/ompi@2a0b3a5) Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 595d45e commit 1ccb6ab

File tree

5 files changed

+18
-11
lines changed

5 files changed

+18
-11
lines changed

opal/mca/btl/vader/btl_vader_component.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -633,22 +633,21 @@ static void mca_btl_vader_progress_waiting (mca_btl_base_endpoint_t *ep)
633633
return;
634634
}
635635

636-
OPAL_THREAD_LOCK(&ep->lock);
636+
OPAL_THREAD_LOCK(&ep->pending_frags_lock);
637637
OPAL_LIST_FOREACH_SAFE(frag, next, &ep->pending_frags, mca_btl_vader_frag_t) {
638-
OPAL_THREAD_UNLOCK(&ep->lock);
639638
ret = vader_fifo_write_ep (frag->hdr, ep);
640639
if (!ret) {
640+
OPAL_THREAD_UNLOCK(&ep->pending_frags_lock);
641641
return;
642642
}
643643

644-
OPAL_THREAD_LOCK(&ep->lock);
645644
(void) opal_list_remove_first (&ep->pending_frags);
646645
}
647646

648647
ep->waiting = false;
649648
opal_list_remove_item (&mca_btl_vader_component.pending_endpoints, &ep->super);
650649

651-
OPAL_THREAD_UNLOCK(&ep->lock);
650+
OPAL_THREAD_UNLOCK(&ep->pending_frags_lock);
652651
}
653652

654653
/**

opal/mca/btl/vader/btl_vader_endpoint.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ typedef struct mca_btl_base_endpoint_t {
6262

6363
int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing
6464
* SMP specfic data structures. */
65-
uint32_t send_count; /**< number of fragments sent to this peer */
65+
volatile uint64_t send_count; /**< number of fragments sent to this peer */
6666
char *segment_base; /**< start of the peer's segment (in the address space
6767
* of this process) */
6868

@@ -84,6 +84,7 @@ typedef struct mca_btl_base_endpoint_t {
8484
} other;
8585
} segment_data;
8686

87+
opal_mutex_t pending_frags_lock; /**< protect pending_frags */
8788
opal_list_t pending_frags; /**< fragments pending fast box space */
8889
bool waiting; /**< endpoint is on the component wait list */
8990
} mca_btl_base_endpoint_t;
@@ -94,15 +95,15 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_endpoint_t);
9495

9596
static inline void mca_btl_vader_endpoint_setup_fbox_recv (struct mca_btl_base_endpoint_t *endpoint, void *base)
9697
{
97-
endpoint->fbox_in.buffer = base;
9898
endpoint->fbox_in.startp = (uint32_t *) base;
9999
endpoint->fbox_in.start = MCA_BTL_VADER_FBOX_ALIGNMENT;
100100
endpoint->fbox_in.seq = 0;
101+
opal_atomic_wmb ();
102+
endpoint->fbox_in.buffer = base;
101103
}
102104

103105
static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, void *base)
104106
{
105-
endpoint->fbox_out.buffer = base;
106107
endpoint->fbox_out.start = MCA_BTL_VADER_FBOX_ALIGNMENT;
107108
endpoint->fbox_out.end = MCA_BTL_VADER_FBOX_ALIGNMENT;
108109
endpoint->fbox_out.startp = (uint32_t *) base;
@@ -111,6 +112,9 @@ static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_e
111112

112113
/* zero out the first header in the fast box */
113114
memset ((char *) base + MCA_BTL_VADER_FBOX_ALIGNMENT, 0, MCA_BTL_VADER_FBOX_ALIGNMENT);
115+
116+
opal_atomic_wmb ();
117+
endpoint->fbox_out.buffer = base;
114118
}
115119

116120
#endif /* MCA_BTL_VADER_ENDPOINT_H */

opal/mca/btl/vader/btl_vader_fbox.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
117117

118118
if (OPAL_UNLIKELY(buffer_free < size)) {
119119
ep->fbox_out.end = (hbs << 31) | end;
120+
opal_atomic_wmb ();
120121
OPAL_THREAD_UNLOCK(&ep->lock);
121122
return NULL;
122123
}
@@ -141,6 +142,7 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
141142

142143
/* align the buffer */
143144
ep->fbox_out.end = ((uint32_t) hbs << 31) | end;
145+
opal_atomic_wmb ();
144146
OPAL_THREAD_UNLOCK(&ep->lock);
145147

146148
return dst + sizeof (mca_btl_vader_fbox_hdr_t);
@@ -247,6 +249,7 @@ static inline bool mca_btl_vader_check_fboxes (void)
247249

248250
/* save where we left off */
249251
/* let the sender know where we stopped */
252+
opal_atomic_mb ();
250253
ep->fbox_in.start = ep->fbox_in.startp[0] = ((uint32_t) hbs << 31) | start;
251254
processed = true;
252255
}
@@ -258,8 +261,7 @@ static inline bool mca_btl_vader_check_fboxes (void)
258261

259262
static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr)
260263
{
261-
if (NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == ++ep->send_count) {
262-
264+
if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD64 ((volatile int64_t *) &ep->send_count, 1))) {
263265
/* protect access to mca_btl_vader_component.segment_offset */
264266
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
265267

opal/mca/btl/vader/btl_vader_module.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,12 +524,14 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
524524
static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep)
525525
{
526526
OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t);
527+
OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t);
527528
ep->fifo = NULL;
528529
}
529530

530531
static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
531532
{
532533
OBJ_DESTRUCT(&ep->pending_frags);
534+
OBJ_DESTRUCT(&ep->pending_frags_lock);
533535

534536
#if OPAL_BTL_VADER_HAVE_XPMEM
535537
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {

opal/mca/btl/vader/btl_vader_send.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,15 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
5757
/* post the relative address of the descriptor into the peer's fifo */
5858
if (opal_list_get_size (&endpoint->pending_frags) || !vader_fifo_write_ep (frag->hdr, endpoint)) {
5959
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
60-
OPAL_THREAD_LOCK(&endpoint->lock);
60+
OPAL_THREAD_LOCK(&endpoint->pending_frags_lock);
6161
opal_list_append (&endpoint->pending_frags, (opal_list_item_t *) frag);
6262
if (!endpoint->waiting) {
6363
OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
6464
opal_list_append (&mca_btl_vader_component.pending_endpoints, &endpoint->super);
6565
OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
6666
endpoint->waiting = true;
6767
}
68-
OPAL_THREAD_UNLOCK(&endpoint->lock);
68+
OPAL_THREAD_UNLOCK(&endpoint->pending_frags_lock);
6969
return OPAL_SUCCESS;
7070
}
7171

0 commit comments

Comments
 (0)