Skip to content

Commit 72501f8

Browse files
bosilcadevreal
authored andcommitted
Consistent return from all progress functions.
This fix ensures that all progress functions return the number of completed events. Signed-off-by: George Bosilca <[email protected]>
1 parent 2c97187 commit 72501f8

File tree

7 files changed

+30
-22
lines changed

7 files changed

+30
-22
lines changed

ompi/communicator/comm_request.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ static int ompi_comm_request_progress (void)
100100
{
101101
ompi_comm_request_t *request, *next;
102102
static opal_atomic_int32_t progressing = 0;
103+
int completed = 0;
103104

104105
/* don't allow re-entry */
105106
if (opal_atomic_swap_32 (&progressing, 1)) {
@@ -126,6 +127,7 @@ static int ompi_comm_request_progress (void)
126127
}
127128
ompi_request_free (&subreq);
128129
request_item->subreq_count--;
130+
completed++;
129131
} else {
130132
item_complete = false;
131133
break;
@@ -163,7 +165,7 @@ static int ompi_comm_request_progress (void)
163165
opal_mutex_unlock (&ompi_comm_request_mutex);
164166
progressing = 0;
165167

166-
return 1;
168+
return completed;
167169
}
168170

169171
void ompi_comm_request_start (ompi_comm_request_t *request)

ompi/mca/mtl/psm2/mtl_psm2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,5 +469,5 @@ int ompi_mtl_psm2_progress( void ) {
469469
opal_show_help("help-mtl-psm2.txt",
470470
"error polling network", true,
471471
psm2_error_get_string(err));
472-
return 1;
472+
return OMPI_ERROR;
473473
}

ompi/request/grequest.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,13 @@ int ompi_grequest_invoke_query(ompi_request_t *request,
202202
int rc = OMPI_SUCCESS;
203203
ompi_grequest_t *g = (ompi_grequest_t*) request;
204204

205-
/* MPI-2:8.2 does not say what to do with the return value from
206-
the query function (i.e., the int return value from the C
207-
function or the ierr argument from the Fortran function).
208-
Making the command decision here to ignore it. If the handler
209-
wants to pass an error back, it should set it in the MPI_ERROR
210-
field in the status (which is always kept, regardless if the
211-
top-level function was invoked with MPI_STATUS[ES]_IGNORE or
212-
not). */
205+
/* MPI-3 mandates that the return value from the query function
206+
* (i.e., the int return value from the C function or the ierr
207+
* argument from the Fortran function) must be returned to the
208+
* user. Thus, if the return of the query function is not MPI_SUCCESS
209+
* we will update the MPI_ERROR field. Otherwise, the MPI_ERROR
210+
* field is untouched (or left to the discretion of the query function).
211+
*/
213212
if (NULL != g->greq_query.c_query) {
214213
if (g->greq_funcs_are_c) {
215214
rc = g->greq_query.c_query(g->greq_state, status);
@@ -221,7 +220,9 @@ int ompi_grequest_invoke_query(ompi_request_t *request,
221220
rc = OMPI_FINT_2_INT(ierr);
222221
}
223222
}
224-
223+
if( MPI_SUCCESS != rc ) {
224+
status->MPI_ERROR = rc;
225+
}
225226
return rc;
226227
}
227228

ompi/request/grequestx.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,11 @@ static int grequestx_progress(void) {
4444
MPI_Status status;
4545
OPAL_THREAD_UNLOCK(&lock);
4646
request->greq_poll.c_poll(request->greq_state, &status);
47+
OPAL_THREAD_LOCK(&lock);
4748
if (REQUEST_COMPLETE(&request->greq_base)) {
48-
OPAL_THREAD_LOCK(&lock);
4949
opal_list_remove_item(&requests, &request->greq_base.super.super);
50-
OPAL_THREAD_UNLOCK(&lock);
5150
completed++;
5251
}
53-
OPAL_THREAD_LOCK(&lock);
5452
}
5553
in_progress = false;
5654
}

opal/mca/common/ucx/common_ucx_wpool.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,11 @@ void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool)
279279
return;
280280
}
281281

282-
OPAL_DECLSPEC void
282+
OPAL_DECLSPEC int
283283
opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool)
284284
{
285285
_winfo_list_item_t *item = NULL, *next = NULL;
286+
int completed = 0, progressed = 0;
286287

287288
/* Go over all active workers and progress them
288289
* TODO: may want to have some partitioning to progress only part of
@@ -297,14 +298,19 @@ opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool)
297298
opal_list_remove_item(&wpool->active_workers, &item->super);
298299
_winfo_reset(winfo);
299300
opal_list_append(&wpool->idle_workers, &item->super);
301+
completed++;
300302
} else {
301303
/* Progress worker until there are existing events */
302-
while(ucp_worker_progress(winfo->worker));
304+
do {
305+
progressed = ucp_worker_progress(winfo->worker);
306+
completed += progressed;
307+
} while (progressed);
303308
}
304309
opal_mutex_unlock(&winfo->mutex);
305310
}
306311
opal_mutex_unlock(&wpool->mutex);
307312
}
313+
return completed;
308314
}
309315

310316
static int

opal/mca/common/ucx/common_ucx_wpool.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ OPAL_DECLSPEC void opal_common_ucx_wpool_free(opal_common_ucx_wpool_t *wpool);
165165
OPAL_DECLSPEC int opal_common_ucx_wpool_init(opal_common_ucx_wpool_t *wpool,
166166
int proc_world_size, bool enable_mt);
167167
OPAL_DECLSPEC void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool);
168-
OPAL_DECLSPEC void opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool);
168+
OPAL_DECLSPEC int opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool);
169169

170170
/* Manage Communication context */
171171
OPAL_DECLSPEC int opal_common_ucx_wpctx_create(opal_common_ucx_wpool_t *wpool, int comm_size,

oshmem/mca/spml/ucx/spml_ucx_component.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,20 +187,21 @@ static int mca_spml_ucx_component_register(void)
187187

188188
int spml_ucx_ctx_progress(void)
189189
{
190-
int i;
190+
int i, completed = 0;
191191
for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) {
192-
ucp_worker_progress(mca_spml_ucx.active_array.ctxs[i]->ucp_worker[0]);
192+
completed += ucp_worker_progress(mca_spml_ucx.active_array.ctxs[i]->ucp_worker[0]);
193193
}
194-
return 1;
194+
return completed;
195195
}
196196

197197
int spml_ucx_default_progress(void)
198198
{
199199
unsigned int i=0;
200+
int completed = 0;
200201
for (i = 0; i < mca_spml_ucx.ucp_workers; i++) {
201-
ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker[i]);
202+
completed += ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker[i]);
202203
}
203-
return 1;
204+
return completed;
204205
}
205206

206207
int spml_ucx_progress_aux_ctx(void)

0 commit comments

Comments
 (0)