From 59bb79b46fb805211738aa35a794d3aca0e2d9fb Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Sat, 14 Apr 2018 15:55:48 -0400 Subject: [PATCH] Add/remove OB1 and CUDA progress. Provide support for dynamically adding and removing the progress function for OB1 and CUDA. Signed-off-by: George Bosilca --- ompi/mca/pml/ob1/pml_ob1.h | 2 +- ompi/mca/pml/ob1/pml_ob1_progress.c | 29 ++++++++++++++++++++--------- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 1 + ompi/mca/pml/ob1/pml_ob1_sendreq.c | 18 ++++++++++++++---- ompi/mca/pml/ob1/pml_ob1_sendreq.h | 3 +-- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index 1f4bfbb5899..4ec8dbf3905 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -302,7 +302,7 @@ void mca_pml_ob1_process_pending_rdma(void); if(opal_list_get_size(&mca_pml_ob1.recv_pending)) \ mca_pml_ob1_recv_request_process_pending(); \ if(opal_list_get_size(&mca_pml_ob1.send_pending)) \ - mca_pml_ob1_send_request_process_pending(bml_btl); \ + (void)mca_pml_ob1_send_request_process_pending(bml_btl); \ if(opal_list_get_size(&mca_pml_ob1.rdma_pending)) \ mca_pml_ob1_process_pending_rdma(); \ } while (0) diff --git a/ompi/mca/pml/ob1/pml_ob1_progress.c b/ompi/mca/pml/ob1/pml_ob1_progress.c index e1f84e796b4..4a31edac05a 100644 --- a/ompi/mca/pml/ob1/pml_ob1_progress.c +++ b/ompi/mca/pml/ob1/pml_ob1_progress.c @@ -49,6 +49,9 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void) } while (progress > 0); /* Consider progressing dtoh events here in future */ + /* Update the number of potential pending events */ + mca_pml_ob1_enable_progress(-count); + return count; } #endif /* OPAL_CUDA_SUPPORT */ @@ -56,12 +59,22 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void) static int mca_pml_ob1_progress_needed = 0; int mca_pml_ob1_enable_progress(int32_t count) { + if( 0 == count ) return 0; /* nothing to do */ int32_t progress_count = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, count); + assert(progress_count >= 0); if( 1 < progress_count ) - return 0; /* progress was already on */ + return 0; /* progress was already on and no change necessary */ - opal_progress_register(mca_pml_ob1_progress); - return 1; + if( 0 == progress_count ) { /* only way to get here is if count is negative */ + opal_progress_unregister(mca_pml_ob1_progress); + return 1; + } + if( count > 0 ) { + opal_progress_register(mca_pml_ob1_progress); + return 1; + } + /* count was negative */ + return 0; } int mca_pml_ob1_progress(void) @@ -87,11 +100,11 @@ int mca_pml_ob1_progress(void) switch(pending_type) { case MCA_PML_OB1_SEND_PENDING_NONE: assert(0); - return 0; + goto update_pending_and_return; case MCA_PML_OB1_SEND_PENDING_SCHEDULE: if( mca_pml_ob1_send_request_schedule_exclusive(sendreq) == OMPI_ERR_OUT_OF_RESOURCE ) { - return 0; + goto update_pending_and_return; } completed_requests++; break; @@ -118,11 +131,9 @@ int mca_pml_ob1_progress(void) } } + update_pending_and_return: if( 0 != completed_requests ) { - j = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, -completed_requests); - if( 0 == j ) { - opal_progress_unregister(mca_pml_ob1_progress); - } + mca_pml_ob1_enable_progress(-completed_requests); } return completed_requests; diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 9ccb27e1af4..5038e7b623a 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -831,6 +831,7 @@ void mca_pml_ob1_recv_request_progress_rndv( mca_pml_ob1_recv_request_t* recvreq (btl->btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV)) { void *strm = mca_common_cuda_get_htod_stream(); opal_cuda_set_copy_function_async(&recvreq->req_recv.req_base.req_convertor, strm); + mca_pml_ob1_enable_progress(1); } #endif diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index a2aecae09ac..39af7c12755 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -43,9 +43,10 @@ OBJ_CLASS_INSTANCE(mca_pml_ob1_send_range_t, opal_free_list_item_t, NULL, NULL); -void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) +int mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) { int rc, i, s = opal_list_get_size(&mca_pml_ob1.send_pending); + int completed_requests = 0; /* advance pending requests */ for(i = 0; i < s; i++) { @@ -61,8 +62,9 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) case MCA_PML_OB1_SEND_PENDING_SCHEDULE: rc = mca_pml_ob1_send_request_schedule_exclusive(sendreq); if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - return; + goto update_pending_and_return; } + completed_requests++; break; case MCA_PML_OB1_SEND_PENDING_START: send_dst = mca_bml_base_btl_array_find( @@ -79,8 +81,9 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) * list to minimize reordering and give up for now. */ add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_START, false); - return; + goto update_pending_and_return; } + completed_requests++; } break; default: @@ -89,6 +92,12 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl) break; } } + + update_pending_and_return: + if( 0 != completed_requests ) { + mca_pml_ob1_enable_progress(-completed_requests); + } + return completed_requests; } /* @@ -938,7 +947,8 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq) if( OPAL_UNLIKELY(num_fail == range->range_btl_cnt) ) { /*TODO : assert(sendreq->req_pending == MCA_PML_OB1_SEND_PENDING_NONE); */ add_request_to_send_pending(sendreq, - MCA_PML_OB1_SEND_PENDING_SCHEDULE, true); + MCA_PML_OB1_SEND_PENDING_SCHEDULE, true); + mca_pml_ob1_enable_progress(1); /* Note that request remains locked. send_request_process_pending() * function will call shedule_exclusive() directly without taking * the lock */ diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index be36c3f2ac4..014d1c9ced7 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -99,7 +99,6 @@ add_request_to_send_pending(mca_pml_ob1_send_request_t* sendreq, opal_list_prepend(&mca_pml_ob1.send_pending, item); OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); - mca_pml_ob1_enable_progress(1); } static inline mca_pml_ob1_send_request_t* @@ -508,7 +507,7 @@ int mca_pml_ob1_send_request_put_frag(mca_pml_ob1_rdma_frag_t* frag); * available. bml_btl passed to the function doesn't represents sendreq * destination, it represents BTL on which resource was freed, so only this BTL * should be considered for sending packets */ -void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl); +int mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl); void mca_pml_ob1_send_request_copy_in_out(mca_pml_ob1_send_request_t *sendreq, uint64_t send_offset, uint64_t send_length);