@@ -1056,30 +1056,55 @@ static bool match_error_registration(pmix_regevents_info_t *reginfoptr, pmix_not
10561056 size_t ninfo = reginfoptr -> ninfo ;
10571057 pmix_status_t error = cd -> status ;
10581058
1059+ if (NULL == info || ninfo <= 0 ) {
1060+ /* this is a general errhandler, and so it always matches.
1061+ * however, here we are looking for an exact match, and
1062+ * so we ignore general errhandlers unless the incoming
1063+ * one is also general */
1064+ if (NULL == cd -> info || 0 == cd -> ninfo ) {
1065+ return true;
1066+ } else {
1067+ return false;
1068+ }
1069+ }
1070+
1071+ /* since this errhandler has info keys, it is not a general errhandler.
1072+ * If the incoming errhandler *is* a general one, then we must not
1073+ * match so we can store the general case */
1074+ if (NULL == cd -> info || 0 == cd -> ninfo ) {
1075+ return false;
1076+ }
1077+
1078+ /* try to match using error name or error group keys - this indicates
1079+ * a request for a specific error state */
10591080 pmix_get_errorgroup (error , errgroup );
1060- /* try to match using error name or error group keys */
10611081 for (i = 0 ; i < ninfo ; i ++ ) {
10621082 // if we get a match on any key then we abort the search and return true.
1063- if ((0 == strcmp (info [i ].key , PMIX_ERROR_NAME )) &&
1083+ if ((0 == strncmp (info [i ].key , PMIX_ERROR_NAME , PMIX_MAX_KEYLEN )) &&
10641084 (error == info [i ].value .data .int32 )) {
10651085 return true;
1066- } else if ((0 == strcmp (info [i ].key , errgroup )) &&
1086+ } else if ((0 == strncmp (info [i ].key , errgroup , PMIX_MAX_KEYLEN )) &&
10671087 (true == info [i ].value .data .flag )) {
10681088 return true;
10691089 }
10701090 }
1071- /* search by node (error location) key if it is specified in the notify info list*/
1072- for (i = 0 ; i < cd -> ninfo ; i ++ ) {
1073- if (0 == strcmp (cd -> info [i ].key , PMIX_ERROR_NODE_NAME )) {
1074- for (j = 0 ; j < ninfo ; j ++ ) {
1075- if ((0 == strcmp (info [j ].key , PMIX_ERROR_NODE_NAME )) &&
1076- (0 == strcmp (info [j ].value .data .string , cd -> info [i ].value .data .string ))) {
1091+
1092+ /* if we get here, then they haven't asked for a specific error state.
1093+ * It is possible, however, that they are asking for all errors from a
1094+ * specific node, so search by node (error location) key if it is
1095+ * specified in the notify info list */
1096+ for (i = 0 ; i < cd -> ninfo ; i ++ ) {
1097+ if (0 == strncmp (cd -> info [i ].key , PMIX_ERROR_NODE_NAME , PMIX_MAX_KEYLEN )) {
1098+ for (j = 0 ; j < ninfo ; j ++ ) {
1099+ if ((0 == strncmp (info [j ].key , PMIX_ERROR_NODE_NAME , PMIX_MAX_KEYLEN )) &&
1100+ (0 == strcmp (info [j ].value .data .string , cd -> info [i ].value .data .string ))) {
10771101 return true;
10781102 }
10791103 }
10801104 }
10811105 }
1082- /* end of search return false*/
1106+
1107+ /* end of search and nothing matched, so return false */
10831108 return false;
10841109}
10851110
@@ -1093,9 +1118,11 @@ static void _notify_error(int sd, short args, void *cbdata)
10931118 pmix_peer_t * peer ;
10941119 pmix_regevents_info_t * reginfoptr ;
10951120 bool notify , notifyall ;
1121+
10961122 pmix_output_verbose (0 , pmix_globals .debug_output ,
10971123 "pmix_server: _notify_error notifying client of error %d" ,
10981124 cd -> status );
1125+
10991126 /* pack the command */
11001127 if (PMIX_SUCCESS != (rc = pmix_bfrop .pack (cd -> buf , & cmd , 1 , PMIX_CMD ))) {
11011128 PMIX_ERROR_LOG (rc );
@@ -1157,6 +1184,8 @@ static void _notify_error(int sd, short args, void *cbdata)
11571184 }
11581185 }
11591186 if (!notify ) {
1187+ /* if we are not notifying everyone, and this proc isn't to
1188+ * be notified, then just continue the main loop */
11601189 continue ;
11611190 }
11621191 }
@@ -1173,8 +1202,9 @@ static void _notify_error(int sd, short args, void *cbdata)
11731202 pmix_output_verbose (2 , pmix_globals .debug_output ,
11741203 "pmix_server _notify_error - match error registration returned notify =%d " , notify );
11751204 }
1176- if (notify )
1205+ if (notify ) {
11771206 break ;
1207+ }
11781208 }
11791209 if (notify ) {
11801210 pmix_output_verbose (2 , pmix_globals .debug_output ,
@@ -1212,6 +1242,7 @@ pmix_status_t pmix_server_notify_error(pmix_status_t status,
12121242 cd -> ninfo = ninfo ;
12131243 cd -> cbfunc = cbfunc ;
12141244 cd -> cbdata = cbdata ;
1245+
12151246 pmix_output_verbose (2 , pmix_globals .debug_output ,
12161247 "pmix_server_notify_error status =%d, nprocs = %lu, ninfo =%lu" ,
12171248 status , nprocs , ninfo );
@@ -1227,18 +1258,19 @@ static void reg_errhandler(int sd, short args, void *cbdata)
12271258 int index = 0 ;
12281259 pmix_status_t rc ;
12291260 pmix_shift_caddy_t * cd = (pmix_shift_caddy_t * )cbdata ;
1261+
12301262 /* check if this handler is already registered if so return error */
1231- if (PMIX_SUCCESS == pmix_lookup_errhandler (cd -> err , & index )) {
1263+ if (PMIX_SUCCESS == pmix_lookup_errhandler (cd -> err , & index )) {
12321264 /* complete request with error status and return its original reference */
12331265 pmix_output_verbose (2 , pmix_globals .debug_output ,
12341266 "pmix_server_register_errhandler error - hdlr already registered index = %d" ,
12351267 index );
1236- cd -> cbfunc .errregcbfn (PMIX_EXISTS , index , cd -> cbdata );
1268+ cd -> cbfunc .errregcbfn (PMIX_EXISTS , index , cd -> cbdata );
12371269 } else {
1238- rc = pmix_add_errhandler (cd -> err , cd -> info , cd -> ninfo , & index );
1270+ rc = pmix_add_errhandler (cd -> err , cd -> info , cd -> ninfo , & index );
12391271 pmix_output_verbose (2 , pmix_globals .debug_output ,
12401272 "pmix_server_register_errhandler - success index =%d" , index );
1241- cd -> cbfunc .errregcbfn (rc , index , cd -> cbdata );
1273+ cd -> cbfunc .errregcbfn (rc , index , cd -> cbdata );
12421274 }
12431275 cd -> active = false;
12441276 PMIX_RELEASE (cd );
@@ -1250,24 +1282,30 @@ void pmix_server_register_errhandler(pmix_info_t info[], size_t ninfo,
12501282 void * cbdata )
12511283{
12521284 pmix_shift_caddy_t * cd ;
1285+
12531286 /* need to thread shift this request */
12541287 cd = PMIX_NEW (pmix_shift_caddy_t );
12551288 cd -> info = info ;
12561289 cd -> ninfo = ninfo ;
12571290 cd -> err = errhandler ;
12581291 cd -> cbfunc .errregcbfn = cbfunc ;
12591292 cd -> cbdata = cbdata ;
1293+
12601294 pmix_output_verbose (2 , pmix_globals .debug_output ,
12611295 "pmix_server_register_errhandler shifting to server thread" );
1296+
12621297 PMIX_THREADSHIFT (cd , reg_errhandler );
12631298}
12641299
12651300static void dereg_errhandler (int sd , short args , void * cbdata )
12661301{
12671302 pmix_status_t rc ;
12681303 pmix_shift_caddy_t * cd = (pmix_shift_caddy_t * )cbdata ;
1269- rc = pmix_remove_errhandler (cd -> ref );
1270- cd -> cbfunc .opcbfn (rc , cd -> cbdata );
1304+
1305+ rc = pmix_remove_errhandler (cd -> ref );
1306+ if (NULL != cd -> cbfunc .opcbfn ) {
1307+ cd -> cbfunc .opcbfn (rc , cd -> cbdata );
1308+ }
12711309 cd -> active = false;
12721310}
12731311
@@ -1276,12 +1314,14 @@ void pmix_server_deregister_errhandler(int errhandler_ref,
12761314 void * cbdata )
12771315{
12781316 pmix_shift_caddy_t * cd ;
1317+
12791318 /* need to thread shift this request */
12801319 cd = PMIX_NEW (pmix_shift_caddy_t );
12811320 cd -> cbfunc .opcbfn = cbfunc ;
12821321 cd -> cbdata = cbdata ;
12831322 cd -> ref = errhandler_ref ;
12841323 PMIX_THREADSHIFT (cd , dereg_errhandler );
1324+
12851325 PMIX_WAIT_FOR_COMPLETION (cd -> active );
12861326 PMIX_RELEASE (cd );
12871327 }
0 commit comments