@@ -81,10 +81,10 @@ struct ompi_cont_request_t {
8181 ompi_request_t super ;
8282 opal_atomic_lock_t cont_lock ; /**< Lock used completing/restarting the cont request */
8383 bool cont_enqueue_complete ; /**< Whether to enqueue immediately complete requests */
84- bool cont_in_wait ; /**< Whether the continuation request is currently waited on */
8584 opal_atomic_int32_t cont_num_active ; /**< The number of active continuations registered with a continuation request */
8685 uint32_t continue_max_poll ; /**< max number of local continuations to execute at once */
8786 opal_list_t * cont_complete_list ; /**< List of complete continuations to be invoked during test */
87+ ompi_wait_sync_t * sync ; /**< Sync object this continuation request is attached to */
8888};
8989
9090static void ompi_cont_request_construct (ompi_cont_request_t * cont_req )
@@ -98,10 +98,10 @@ static void ompi_cont_request_construct(ompi_cont_request_t* cont_req)
9898 cont_req -> super .req_status = ompi_status_empty ; /* always returns MPI_SUCCESS */
9999 opal_atomic_lock_init (& cont_req -> cont_lock , false);
100100 cont_req -> cont_enqueue_complete = false;
101- cont_req -> cont_in_wait = false;
102101 cont_req -> cont_num_active = 0 ;
103102 cont_req -> continue_max_poll = UINT32_MAX ;
104103 cont_req -> cont_complete_list = NULL ;
104+ cont_req -> sync = NULL ;
105105}
106106
107107static void ompi_cont_request_destruct (ompi_cont_request_t * cont_req )
@@ -156,10 +156,13 @@ static opal_mutex_t request_cont_lock;
156156 */
157157static bool progress_callback_registered = false;
158158
159- /**
160- * Thread-local list of continuation requests that should be progressed.
161- */
162- static opal_thread_local opal_list_t * thread_progress_list = NULL ;
159+ struct lazy_list_s {
160+ opal_list_t list ;
161+ bool is_initialized ;
162+ };
163+ typedef struct lazy_list_s lazy_list_t ;
164+
165+ static opal_thread_local lazy_list_t thread_progress_list = { .is_initialized = false };
163166
164167static inline
165168void ompi_continue_cont_req_release (ompi_cont_request_t * cont_req ,
@@ -179,6 +182,10 @@ void ompi_continue_cont_req_release(ompi_cont_request_t *cont_req,
179182 /* signal that all continuations were found complete */
180183 ompi_request_complete (& cont_req -> super , true);
181184 }
185+ if (NULL != cont_req -> sync ) {
186+ /* release the sync object */
187+ OPAL_THREAD_ADD_FETCH32 (& cont_req -> sync -> num_req_need_progress , -1 );
188+ }
182189 if (take_lock && using_threads ) {
183190 opal_atomic_unlock (& cont_req -> cont_lock );
184191 }
@@ -191,12 +198,7 @@ void ompi_continue_cont_release(ompi_continuation_t *cont)
191198 ompi_cont_request_t * cont_req = cont -> cont_req ;
192199 assert (OMPI_REQUEST_CONT == cont_req -> super .req_type );
193200
194- /* if a thread is waiting on the request, we got here when
195- * the thread started executing the continuations, so the continuation
196- * request is complete already */
197- if (!cont_req -> cont_in_wait ) {
198- ompi_continue_cont_req_release (cont_req , 1 , true);
199- }
201+ ompi_continue_cont_req_release (cont_req , 1 , true);
200202 OBJ_RELEASE (cont_req );
201203
202204#ifdef OPAL_ENABLE_DEBUG
@@ -240,9 +242,13 @@ int ompi_continue_progress_n(const uint32_t max)
240242 in_progress = 1 ;
241243
242244 const bool using_threads = opal_using_threads ();
243- if (NULL != thread_progress_list ) {
245+
246+ /* execute thread-local continuations first
247+ * (e.g., from continuation requests the current thread is waiting on) */
248+ lazy_list_t * tl_list = & thread_progress_list ;
249+ if (tl_list -> is_initialized ) {
244250 ompi_cont_request_t * cont_req ;
245- OPAL_LIST_FOREACH (cont_req , thread_progress_list , ompi_cont_request_t ) {
251+ OPAL_LIST_FOREACH (cont_req , & tl_list -> list , ompi_cont_request_t ) {
246252 ompi_continuation_t * cb ;
247253 if (opal_list_is_empty (cont_req -> cont_complete_list )) continue ;
248254 while (max > completed ) {
@@ -289,6 +295,12 @@ static int ompi_continue_progress_callback()
289295 return ompi_continue_progress_n (1 );
290296}
291297
298+ static int ompi_continue_wait_progress_callback ()
299+ {
300+ return ompi_continue_progress_n (UINT32_MAX );
301+ }
302+
303+
292304int ompi_continue_progress_request (ompi_request_t * req )
293305{
294306 if (in_progress ) return 0 ;
@@ -329,60 +341,53 @@ int ompi_continue_progress_request(ompi_request_t *req)
329341
330342
331343/**
332- * Register the provided continuation request to be included in the
333- * global progress loop (used while a thread is waiting for the contnuation
334- * request to complete).
335- * We move all local continuations into the global continuation list
336- * and mark the continuation request such that future continuations
337- * are directly put into the global continuations list.
338- * Once the wait completed (i.e., all continuations registered with the
339- * continuation request) we unmark it (see ompi_continue_deregister_request_progress).
344+ * Register the continuation request so that it will be progressed even if
345+ * it is poll-only and the thread is waiting on the provided sync object.
340346 */
341- int ompi_continue_register_request_progress (ompi_request_t * req )
347+ int ompi_continue_register_request_progress (ompi_request_t * req , ompi_wait_sync_t * sync )
342348{
343349 ompi_cont_request_t * cont_req = (ompi_cont_request_t * )req ;
344350
345351 if (NULL == cont_req -> cont_complete_list ) return OMPI_SUCCESS ;
346352
347- opal_atomic_lock ( & cont_req -> cont_lock ) ;
353+ lazy_list_t * cont_req_list = & thread_progress_list ;
348354
349- cont_req -> cont_in_wait = true;
350-
351- ompi_continue_cont_req_release (cont_req , opal_list_get_size (cont_req -> cont_complete_list ), false);
355+ /* check that the thread-local list is initialized */
356+ if (!cont_req_list -> is_initialized ) {
357+ OBJ_CONSTRUCT (& cont_req_list -> list , opal_list_t );
358+ cont_req_list -> is_initialized = true;
359+ }
352360
353- opal_atomic_unlock (& cont_req -> cont_lock );
361+ /* add the continuation request to the thread-local list */
362+ opal_list_append (& cont_req_list -> list , & cont_req -> super .super .super );
354363
355- if (NULL == thread_progress_list ) {
356- thread_progress_list = OBJ_NEW (opal_list_t );
364+ /* register with the sync object */
365+ if (NULL != sync ) {
366+ sync -> num_req_need_progress ++ ;
367+ sync -> progress_cb = & ompi_continue_wait_progress_callback ;
357368 }
358-
359- /* enqueue the continuation request to allow for progress by this thread */
360- opal_list_append (thread_progress_list , & req -> super .super );
369+ cont_req -> sync = sync ;
361370
362371 return OMPI_SUCCESS ;
363372}
364373
365374/**
366- * Remove the continuation request from being progressed by the global progress
367- * loop (after a wait completes) .
375+ * Remove the poll-only continuation request from the thread's progress list after
376+ * it has completed .
368377 */
369378int ompi_continue_deregister_request_progress (ompi_request_t * req )
370379{
371380 ompi_cont_request_t * cont_req = (ompi_cont_request_t * )req ;
372381
373382 if (NULL == cont_req -> cont_complete_list ) return OMPI_SUCCESS ;
374383
375- /* make sure we execute all outstanding continuations */
376- uint32_t tmp_max_poll = cont_req -> continue_max_poll ;
377- cont_req -> continue_max_poll = UINT32_MAX ;
378- ompi_continue_progress_request (req );
379- cont_req -> continue_max_poll = tmp_max_poll ;
380-
381- cont_req -> cont_in_wait = false;
382-
384+ /* let the sync know we're done, it may suspend the thread now */
385+ if (NULL != cont_req -> sync ) {
386+ cont_req -> sync -> num_req_need_progress -- ;
387+ }
383388
384389 /* remove the continuation request from the thread-local progress list */
385- opal_list_remove_item (thread_progress_list , & req -> super .super );
390+ opal_list_remove_item (& thread_progress_list . list , & req -> super .super );
386391
387392 return OMPI_SUCCESS ;
388393}
@@ -439,13 +444,6 @@ ompi_continue_enqueue_runnable(ompi_continuation_t *cont)
439444 if (NULL != cont_req -> cont_complete_list ) {
440445 opal_atomic_lock (& cont_req -> cont_lock );
441446 opal_list_append (cont_req -> cont_complete_list , & cont -> super .super );
442- if (cont_req -> cont_in_wait ) {
443- /* if a thread is waiting for this request to complete, signal completions
444- * the continuations will be executed at the end of the wait
445- * but we need to ensure that the request is marked complete first
446- */
447- ompi_continue_cont_req_release (cont_req , 1 , false);
448- }
449447 opal_atomic_unlock (& cont_req -> cont_lock );
450448 } else {
451449 OPAL_THREAD_LOCK (& request_cont_lock );
@@ -601,15 +599,14 @@ int ompi_continue_attach(
601599 requests [i ] = MPI_REQUEST_NULL ;
602600 }
603601 }
604-
605602 }
606603 }
607604
608605 assert (count >= num_registered );
609606 int num_complete = count - num_registered ;
610607 int32_t last_num_active = OPAL_THREAD_ADD_FETCH32 (& cont -> num_active ,
611608 - num_complete );
612- if (0 == last_num_active && 0 == num_registered ) {
609+ if (0 == last_num_active ) {
613610 if (cont_req -> cont_enqueue_complete ) {
614611 /* enqueue for later processing */
615612 ompi_continue_enqueue_runnable (cont );
0 commit comments