Skip to content

Commit d645557

Browse files
author
Ralph Castain
committed
Update to include the PMIx 2.0 APIs for monitoring and job control. Include required integration, but leave the monitors off for now. Move the sensor framework out of ORTE as it is being absorbed into PMIx
Fix typo and silence warnings Signed-off-by: Ralph Castain <[email protected]>
1 parent 20bf0dd commit d645557

File tree

83 files changed

+2709
-2975
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+2709
-2975
lines changed

opal/include/opal/constants.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
13-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1414
* $COPYRIGHT$
1515
*
1616
* Additional copyrights may follow
@@ -94,7 +94,9 @@ enum {
9494
OPAL_ERR_PROC_RESTART = (OPAL_ERR_BASE - 63),
9595
OPAL_ERR_PROC_CHECKPOINT = (OPAL_ERR_BASE - 64),
9696
OPAL_ERR_PROC_MIGRATE = (OPAL_ERR_BASE - 65),
97-
OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66)
97+
OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66),
98+
OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67),
99+
OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68)
98100
};
99101

100102
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)

opal/mca/pmix/ext2x/pmix2x.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2014-2017 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@@ -352,7 +352,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
352352
if (NULL != chain->final_cbfunc) {
353353
chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata);
354354
}
355-
355+
356356
OBJ_RELEASE(chain);
357357

358358
return;

opal/mca/pmix/pmix2x/pmix/include/pmix.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,59 @@ pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t directive,
473473
pmix_info_t *info, size_t ninfo,
474474
pmix_info_cbfunc_t cbfunc, void *cbdata);
475475

476+
/* Request a job control action. The targets array identifies the
477+
* processes to which the requested job control action is to be applied.
478+
* A NULL value can be used to indicate all processes in the caller's
479+
* nspace. The use of PMIX_RANK_WILDARD can also be used to indicate
480+
* that all processes in the given nspace are to be included.
481+
*
482+
* The directives are provided as pmix_info_t structs in the directives
483+
* array. The callback function provides a status to indicate whether or
484+
* not the request was granted, and to provide some information as to
485+
* the reason for any denial in the pmix_info_cbfunc_t array of pmix_info_t
486+
* structures. If non-NULL, then the specified release_fn must be called
487+
* when the callback function completes - this will be used to release
488+
* any provided pmix_info_t array.
489+
*/
490+
pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_t ntargets,
491+
const pmix_info_t directives[], size_t ndirs,
492+
pmix_info_cbfunc_t cbfunc, void *cbdata);
493+
494+
/* Request that something be monitored - e.g., that the server monitor
495+
* this process for periodic heartbeats as an indication that the process
496+
* has not become "wedged". When a monitor detects the specified alarm
497+
* condition, it will generate an event notification using the provided
498+
* error code and passing along any available relevant information. It is
499+
* up to the caller to register a corresponding event handler.
500+
*
501+
* Params:
502+
*
503+
* monitor: attribute indicating the type of monitor being requested - e.g.,
504+
* PMIX_MONITOR_FILE to indicate that the requestor is asking that
505+
* a file be monitored.
506+
*
507+
* error: the status code to be used when generating an event notification
508+
* alerting that the monitor has been triggered. The range of the
509+
* notification defaults to PMIX_RANGE_NAMESPACE - this can be
510+
* changed by providing a PMIX_RANGE directive
511+
*
512+
* directives: characterize the monitoring request (e.g., monitor file size)
513+
* and frequency of checking to be done
514+
*
515+
* cbfunc: provides a status to indicate whether or not the request was granted,
516+
* and to provide some information as to the reason for any denial in
517+
* the pmix_info_cbfunc_t array of pmix_info_t structures.
518+
*
519+
* Note: a process can send a heartbeat to the server using the PMIx_Heartbeat
520+
* macro provided below*/
521+
pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t error,
522+
const pmix_info_t directives[], size_t ndirs,
523+
pmix_info_cbfunc_t cbfunc, void *cbdata);
524+
525+
/* define a special macro to simplify sending of a heartbeat */
526+
#define PMIx_Heartbeat() \
527+
PMIx_Process_monitor_nb(PMIX_SEND_HEARTBEAT, NULL, 0, NULL, NULL)
528+
476529
#if defined(c_plusplus) || defined(__cplusplus)
477530
}
478531
#endif

0 commit comments

Comments
 (0)