16
16
* Author(s): Torsten Hoefler <[email protected] >
17
17
*
18
18
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
19
+ * Copyright (c) 2016 IBM Corporation. All rights reserved.
19
20
*
20
21
*/
21
22
#include "nbc_internal.h"
@@ -312,6 +313,8 @@ int NBC_Progress(NBC_Handle *handle) {
312
313
int flag , res , ret = NBC_CONTINUE ;
313
314
unsigned long size = 0 ;
314
315
char * delim ;
316
+ int i ;
317
+ ompi_status_public_t status ;
315
318
316
319
/* the handle is done if there is no schedule attached */
317
320
if (NULL == handle -> schedule ) {
@@ -325,8 +328,30 @@ int NBC_Progress(NBC_Handle *handle) {
325
328
#endif
326
329
res = ompi_request_test_all (handle -> req_count , handle -> req_array , & flag , MPI_STATUSES_IGNORE );
327
330
if (res != OMPI_SUCCESS ) {
328
- NBC_Error ("MPI Error in MPI_Testall() (%i)" , res );
329
- return res ;
331
+ // Attempt to cancel outstanding requests
332
+ for (i = 0 ; i < handle -> req_count ; ++ i ) {
333
+ // If the request is complete, then try to report the error code
334
+ if ( handle -> req_array [i ]-> req_complete ) {
335
+ if ( OMPI_SUCCESS != handle -> req_array [i ]-> req_status .MPI_ERROR ) {
336
+ NBC_Error ("MPI Error in MPI_Testall() (req %d = %d)" , i , handle -> req_array [i ]-> req_status .MPI_ERROR );
337
+ }
338
+ }
339
+ else {
340
+ ompi_request_cancel (handle -> req_array [i ]);
341
+ // If the PML actually canceled the request, then wait on it
342
+ if ( handle -> req_array [i ]-> req_status ._cancelled ) {
343
+ ompi_request_wait (& handle -> req_array [i ], & status );
344
+ }
345
+ // Warn the user that we had to leave a PML message outstanding so
346
+ // bad things could happen if they continue using nonblocking collectives
347
+ else {
348
+ NBC_Error ("MPI Error: Not able to cancel the internal request %d. "
349
+ "Be aware that continuing to use nonblocking collectives on this communicator may result in undefined behavior." , i );
350
+ }
351
+ }
352
+ }
353
+
354
+ return OMPI_ERROR ;
330
355
}
331
356
#ifdef NBC_TIMING
332
357
Test_time += MPI_Wtime ();
0 commit comments