@@ -39,6 +39,7 @@ const char *mca_coll_libnbc_component_version_string =
3939
4040
4141static int libnbc_priority = 10 ;
42+ static bool libnbc_in_progress = false; /* protect from recursive calls */
4243bool libnbc_ibcast_skip_dt_decision = true;
4344
4445
@@ -102,8 +103,6 @@ libnbc_open(void)
102103 a non-blocking collective started */
103104 mca_coll_libnbc_component .active_comms = 0 ;
104105
105- opal_atomic_init (& mca_coll_libnbc_component .progress_lock , OPAL_ATOMIC_UNLOCKED );
106-
107106 return OMPI_SUCCESS ;
108107}
109108
@@ -263,37 +262,38 @@ ompi_coll_libnbc_progress(void)
263262 ompi_coll_libnbc_request_t * request , * next ;
264263 int res ;
265264
266- /* return if invoked recursively */
267- if (opal_atomic_trylock (& mca_coll_libnbc_component .progress_lock )) return 0 ;
268-
269265 /* process active requests, and use mca_coll_libnbc_component.lock to access the
270266 * mca_coll_libnbc_component.active_requests list */
271267 OPAL_THREAD_LOCK (& mca_coll_libnbc_component .lock );
272- OPAL_LIST_FOREACH_SAFE (request , next , & mca_coll_libnbc_component .active_requests ,
273- ompi_coll_libnbc_request_t ) {
274- OPAL_THREAD_UNLOCK (& mca_coll_libnbc_component .lock );
275- res = NBC_Progress (request );
276- if ( NBC_CONTINUE != res ) {
277- /* done, remove and complete */
278- OPAL_THREAD_LOCK (& mca_coll_libnbc_component .lock );
279- opal_list_remove_item (& mca_coll_libnbc_component .active_requests ,
280- & request -> super .super .super );
281- OPAL_THREAD_UNLOCK (& mca_coll_libnbc_component .lock );
268+ /* return if invoked recursively */
269+ if (!libnbc_in_progress ) {
270+ libnbc_in_progress = true;
282271
283- if ( OMPI_SUCCESS == res || NBC_OK == res || NBC_SUCCESS == res ) {
284- request -> super .req_status .MPI_ERROR = OMPI_SUCCESS ;
285- }
286- else {
287- request -> super .req_status .MPI_ERROR = res ;
272+ OPAL_LIST_FOREACH_SAFE (request , next , & mca_coll_libnbc_component .active_requests ,
273+ ompi_coll_libnbc_request_t ) {
274+ OPAL_THREAD_UNLOCK (& mca_coll_libnbc_component .lock );
275+ res = NBC_Progress (request );
276+ if ( NBC_CONTINUE != res ) {
277+ /* done, remove and complete */
278+ OPAL_THREAD_LOCK (& mca_coll_libnbc_component .lock );
279+ opal_list_remove_item (& mca_coll_libnbc_component .active_requests ,
280+ & request -> super .super .super );
281+ OPAL_THREAD_UNLOCK (& mca_coll_libnbc_component .lock );
282+
283+ if ( OMPI_SUCCESS == res || NBC_OK == res || NBC_SUCCESS == res ) {
284+ request -> super .req_status .MPI_ERROR = OMPI_SUCCESS ;
285+ }
286+ else {
287+ request -> super .req_status .MPI_ERROR = res ;
288+ }
289+ ompi_request_complete (& request -> super , true);
288290 }
289- ompi_request_complete ( & request -> super , true );
291+ OPAL_THREAD_LOCK ( & mca_coll_libnbc_component . lock );
290292 }
291- OPAL_THREAD_LOCK ( & mca_coll_libnbc_component . lock ) ;
293+ libnbc_in_progress = false ;
292294 }
293295 OPAL_THREAD_UNLOCK (& mca_coll_libnbc_component .lock );
294296
295- opal_atomic_unlock (& mca_coll_libnbc_component .progress_lock );
296-
297297 return 0 ;
298298}
299299
0 commit comments