Skip to content

Commit ea84a53

Browse files
author
Ralph Castain
authored
Merge pull request #3218 from rhc54/topic/pmix2
Update to include the PMIx 2.0 APIs for monitoring and job control.
2 parents 10d401b + d645557 commit ea84a53

File tree

83 files changed

+2709
-2975
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+2709
-2975
lines changed

opal/include/opal/constants.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
13-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1414
* $COPYRIGHT$
1515
*
1616
* Additional copyrights may follow
@@ -94,7 +94,9 @@ enum {
9494
OPAL_ERR_PROC_RESTART = (OPAL_ERR_BASE - 63),
9595
OPAL_ERR_PROC_CHECKPOINT = (OPAL_ERR_BASE - 64),
9696
OPAL_ERR_PROC_MIGRATE = (OPAL_ERR_BASE - 65),
97-
OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66)
97+
OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66),
98+
OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67),
99+
OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68)
98100
};
99101

100102
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)

opal/mca/pmix/ext2x/pmix2x.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2014-2017 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@@ -352,7 +352,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
352352
if (NULL != chain->final_cbfunc) {
353353
chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata);
354354
}
355-
355+
356356
OBJ_RELEASE(chain);
357357

358358
return;

opal/mca/pmix/pmix2x/pmix/include/pmix.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,59 @@ pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t directive,
473473
pmix_info_t *info, size_t ninfo,
474474
pmix_info_cbfunc_t cbfunc, void *cbdata);
475475

476+
/* Request a job control action. The targets array identifies the
477+
* processes to which the requested job control action is to be applied.
478+
* A NULL value can be used to indicate all processes in the caller's
479+
* nspace. The use of PMIX_RANK_WILDARD can also be used to indicate
480+
* that all processes in the given nspace are to be included.
481+
*
482+
* The directives are provided as pmix_info_t structs in the directives
483+
* array. The callback function provides a status to indicate whether or
484+
* not the request was granted, and to provide some information as to
485+
* the reason for any denial in the pmix_info_cbfunc_t array of pmix_info_t
486+
* structures. If non-NULL, then the specified release_fn must be called
487+
* when the callback function completes - this will be used to release
488+
* any provided pmix_info_t array.
489+
*/
490+
pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_t ntargets,
491+
const pmix_info_t directives[], size_t ndirs,
492+
pmix_info_cbfunc_t cbfunc, void *cbdata);
493+
494+
/* Request that something be monitored - e.g., that the server monitor
495+
* this process for periodic heartbeats as an indication that the process
496+
* has not become "wedged". When a monitor detects the specified alarm
497+
* condition, it will generate an event notification using the provided
498+
* error code and passing along any available relevant information. It is
499+
* up to the caller to register a corresponding event handler.
500+
*
501+
* Params:
502+
*
503+
* monitor: attribute indicating the type of monitor being requested - e.g.,
504+
* PMIX_MONITOR_FILE to indicate that the requestor is asking that
505+
* a file be monitored.
506+
*
507+
* error: the status code to be used when generating an event notification
508+
* alerting that the monitor has been triggered. The range of the
509+
* notification defaults to PMIX_RANGE_NAMESPACE - this can be
510+
* changed by providing a PMIX_RANGE directive
511+
*
512+
* directives: characterize the monitoring request (e.g., monitor file size)
513+
* and frequency of checking to be done
514+
*
515+
* cbfunc: provides a status to indicate whether or not the request was granted,
516+
* and to provide some information as to the reason for any denial in
517+
* the pmix_info_cbfunc_t array of pmix_info_t structures.
518+
*
519+
* Note: a process can send a heartbeat to the server using the PMIx_Heartbeat
520+
* macro provided below*/
521+
pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t error,
522+
const pmix_info_t directives[], size_t ndirs,
523+
pmix_info_cbfunc_t cbfunc, void *cbdata);
524+
525+
/* define a special macro to simplify sending of a heartbeat */
526+
#define PMIx_Heartbeat() \
527+
PMIx_Process_monitor_nb(PMIX_SEND_HEARTBEAT, NULL, 0, NULL, NULL)
528+
476529
#if defined(c_plusplus) || defined(__cplusplus)
477530
}
478531
#endif

0 commit comments

Comments
 (0)