diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index 1f4bfbb5899..4ec8dbf3905 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -302,7 +302,7 @@ void mca_pml_ob1_process_pending_rdma(void); if(opal_list_get_size(&mca_pml_ob1.recv_pending)) \ mca_pml_ob1_recv_request_process_pending(); \ if(opal_list_get_size(&mca_pml_ob1.send_pending)) \ - mca_pml_ob1_send_request_process_pending(bml_btl); \ + (void)mca_pml_ob1_send_request_process_pending(bml_btl); \ if(opal_list_get_size(&mca_pml_ob1.rdma_pending)) \ mca_pml_ob1_process_pending_rdma(); \ } while (0) diff --git a/ompi/mca/pml/ob1/pml_ob1_progress.c b/ompi/mca/pml/ob1/pml_ob1_progress.c index b17f2ee0a69..64c62997266 100644 --- a/ompi/mca/pml/ob1/pml_ob1_progress.c +++ b/ompi/mca/pml/ob1/pml_ob1_progress.c @@ -49,6 +49,9 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void) } while (progress > 0); /* Consider progressing dtoh events here in future */ + /* Update the number of potential pending events */ + mca_pml_ob1_enable_progress(-count); + return count; } #endif /* OPAL_CUDA_SUPPORT */ @@ -56,12 +59,22 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void) static opal_atomic_int32_t mca_pml_ob1_progress_needed = 0; int mca_pml_ob1_enable_progress(int32_t count) { + if( 0 == count ) return 0; /* nothing to do */ int32_t progress_count = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, count); + assert(progress_count >= 0); if( 1 < progress_count ) - return 0; /* progress was already on */ + return 0; /* progress was already on and no change necessary */ - opal_progress_register(mca_pml_ob1_progress); - return 1; + if( 0 == progress_count ) { /* only way to get here is if count is negative */ + opal_progress_unregister(mca_pml_ob1_progress); + return 1; + } + if( count > 0 ) { + opal_progress_register(mca_pml_ob1_progress); + return 1; + } + /* count was negative */ + return 0; } int mca_pml_ob1_progress(void) @@ -87,11 +100,11 @@ int mca_pml_ob1_progress(void) switch(pending_type) { case MCA_PML_OB1_SEND_PENDING_NONE: assert(0); - return 0; + goto update_pending_and_return; case MCA_PML_OB1_SEND_PENDING_SCHEDULE: if( mca_pml_ob1_send_request_schedule_exclusive(sendreq) == OMPI_ERR_OUT_OF_RESOURCE ) { - return 0; + goto update_pending_and_return; } completed_requests++; break; @@ -118,11 +131,9 @@ int mca_pml_ob1_progress(void) } } + update_pending_and_return: if( 0 != completed_requests ) { - j = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, -completed_requests); - if( 0 == j ) { - opal_progress_unregister(mca_pml_ob1_progress); - } + mca_pml_ob1_enable_progress(-completed_requests); } return completed_requests; diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index ea5fa6849d8..6c03eb8e71b 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -852,6 +852,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq (btl->btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV)) { void *strm = mca_common_cuda_get_htod_stream(); opal_cuda_set_copy_function_async(&recvreq->req_recv.req_base.req_convertor, strm); + mca_pml_ob1_enable_progress(1); } #endif diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 1626e13e353..543702e1f32 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -45,9 +45,10 @@ OBJ_CLASS_INSTANCE(mca_pml_ob1_send_range_t, opal_free_list_item_t, NULL, NULL); -void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) +int mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) { int rc, i, s = opal_list_get_size(&mca_pml_ob1.send_pending); + int completed_requests = 0; /* advance pending requests */ for(i = 0; i < s; i++) { @@ -63,8 +64,9 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) case MCA_PML_OB1_SEND_PENDING_SCHEDULE: rc = mca_pml_ob1_send_request_schedule_exclusive(sendreq); if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - return; + goto update_pending_and_return; } + completed_requests++; break; case MCA_PML_OB1_SEND_PENDING_START: send_dst = mca_bml_base_btl_array_find( @@ -81,8 +83,9 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) * list to minimize reordering and give up for now. */ add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_START, false); - return; + goto update_pending_and_return; } + completed_requests++; } break; default: @@ -91,6 +94,12 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) break; } } + + update_pending_and_return: + if( 0 != completed_requests ) { + mca_pml_ob1_enable_progress(-completed_requests); + } + return completed_requests; } /* @@ -954,7 +963,8 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) if( OPAL_UNLIKELY(num_fail == range->range_btl_cnt) ) { /*TODO : assert(sendreq->req_pending == MCA_PML_OB1_SEND_PENDING_NONE); */ add_request_to_send_pending(sendreq, - MCA_PML_OB1_SEND_PENDING_SCHEDULE, true); + MCA_PML_OB1_SEND_PENDING_SCHEDULE, true); + mca_pml_ob1_enable_progress(1); /* Note that request remains locked. send_request_process_pending() * function will call shedule_exclusive() directly without taking * the lock */ diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 787f48b554c..47d5bdb118b 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -100,7 +100,6 @@ add_request_to_send_pending(mca_pml_ob1_send_request_t* sendreq, opal_list_prepend(&mca_pml_ob1.send_pending, item); OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); - mca_pml_ob1_enable_progress(1); } static inline mca_pml_ob1_send_request_t* @@ -518,7 +517,7 @@ int mca_pml_ob1_send_request_put_frag(mca_pml_ob1_rdma_frag_t* frag); * available. bml_btl passed to the function doesn't represents sendreq * destination, it represents BTL on which resource was freed, so only this BTL * should be considered for sending packets */ -void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl); +int mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl); void mca_pml_ob1_send_request_copy_in_out(mca_pml_ob1_send_request_t *sendreq, uint64_t send_offset, uint64_t send_length);