Skip to content

Commit 6b210fa

Browse files
committed
btl/ugni: do not return a frag from sendi if an endpoint is waitlisted
This fixes a hang that can occur when running bandwidth tests. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 2e42b0a commit 6b210fa

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

opal/mca/btl/ugni/btl_ugni_send.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,13 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
118118
size_t packed_size = payload_size;
119119
int rc;
120120

121+
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list))) {
122+
if (NULL != descriptor) {
123+
*descriptor = NULL;
124+
}
125+
return OPAL_ERR_OUT_OF_RESOURCE;
126+
}
127+
121128
do {
122129
BTL_VERBOSE(("btl/ugni isend sending fragment from %d -> %d. length = %" PRIu64
123130
" endoint state %d", OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid,
@@ -134,8 +141,7 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
134141
}
135142

136143
assert (packed_size == payload_size);
137-
if (OPAL_UNLIKELY(NULL == frag || OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint) ||
138-
opal_list_get_size (&endpoint->frag_wait_list))) {
144+
if (OPAL_UNLIKELY(NULL == frag || OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint))) {
139145
break;
140146
}
141147

0 commit comments

Comments
 (0)