1616 * Author(s): Torsten Hoefler <[email protected] > 1717 * 
1818 * Copyright (c) 2012      Oracle and/or its affiliates.  All rights reserved. 
19+  * Copyright (c) 2016      IBM Corporation.  All rights reserved. 
1920 * 
2021 */ 
2122#include  "nbc_internal.h" 
@@ -312,6 +313,8 @@ int NBC_Progress(NBC_Handle *handle) {
312313  int  flag , res , ret = NBC_CONTINUE ;
313314  unsigned long  size ;
314315  char  * delim ;
316+   int  i ;
317+   ompi_status_public_t  status ;
315318
316319  /* the handle is done if there is no schedule attached */ 
317320  if  (NULL  ==  handle -> schedule ) {
@@ -325,8 +328,30 @@ int NBC_Progress(NBC_Handle *handle) {
325328#endif 
326329    res  =  ompi_request_test_all (handle -> req_count , handle -> req_array , & flag , MPI_STATUSES_IGNORE );
327330    if (res  !=  OMPI_SUCCESS ) {
328-       NBC_Error  ("MPI Error in MPI_Testall() (%i)" , res );
329-       return  res ;
331+       // Attempt to cancel outstanding requests 
332+       for (i  =  0 ; i  <  handle -> req_count ; ++ i  ) {
333+         // If the request is complete, then try to report the error code 
334+         if ( handle -> req_array [i ]-> req_complete  ) {
335+           if ( OMPI_SUCCESS  !=  handle -> req_array [i ]-> req_status .MPI_ERROR  ) {
336+             NBC_Error  ("MPI Error in MPI_Testall() (req %d = %d)" , i , handle -> req_array [i ]-> req_status .MPI_ERROR );
337+           }
338+         }
339+         else  {
340+           ompi_request_cancel (handle -> req_array [i ]);
341+           // If the PML actually canceled the request, then wait on it 
342+           if ( handle -> req_array [i ]-> req_status ._cancelled ) {
343+             ompi_request_wait (& handle -> req_array [i ], & status );
344+           }
345+           // Warn the user that we had to leave a PML message outstanding so 
346+           // bad things could happen if they continue using nonblocking collectives 
347+           else  {
348+             NBC_Error  ("MPI Error: Not able to cancel the internal request %d. " 
349+                        "Be aware that continuing to use nonblocking collectives on this communicator may result in undefined behavior." , i );
350+           }
351+         }
352+       }
353+ 
354+       return  OMPI_ERROR ;
330355    }
331356#ifdef  NBC_TIMING 
332357    Test_time  +=  MPI_Wtime ();
0 commit comments