Skip to content

Commit 9eab9a1

Browse files
author
Ralph Castain
committed
Remove stale global variables
Revamp the event notification integration to rely on the PMIx event chaining and remove the duplicate chaining in OPAL. This ensures we get system-level events that target non-default handlers. Restore the hostname entries for MPI-level error messages, but provide an MCA param (orte_hostname_cutoff) to remove them for large clusters where the memory footprint is problematic. Set the default at 1000 nodes in the job (not the allocation). Begin first cut at memory profiler Some minor cleanups of memprobe Signed-off-by: Ralph Castain <[email protected]>
1 parent 5f68d65 commit 9eab9a1

25 files changed

+556
-670
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ contrib/platform/intel/bend/*orcm*
111111
contrib/scaling/orte_no_op
112112
contrib/scaling/mpi_no_op
113113
contrib/scaling/mpi_barrier
114+
contrib/scaling/mpi_memprobe
114115

115116
examples/hello_c
116117
examples/hello_cxx

contrib/scaling/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
PROGS = orte_no_op mpi_no_op
1+
PROGS = orte_no_op mpi_no_op mpi_memprobe
22

33
all: $(PROGS)
44

@@ -10,5 +10,8 @@ orte_no_op:
1010
mpi_no_op:
1111
mpicc -o mpi_no_op mpi_no_op.c
1212

13+
mpi_memprobe:
14+
mpicc -o mpi_memprobe mpi_memprobe.c -lopen-pal
15+
1316
clean:
1417
rm -f $(PROGS) *~

contrib/scaling/mpi_memprobe.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/* -*- C -*-
2+
*
3+
* $HEADER$
4+
*
5+
* The most basic of MPI applications
6+
*/
7+
8+
#include "orte_config.h"
9+
10+
#include <stdio.h>
11+
#include "mpi.h"
12+
#include "opal/mca/pmix/pmix.h"
13+
#include "orte/runtime/runtime.h"
14+
#include "orte/util/proc_info.h"
15+
#include "orte/util/name_fns.h"
16+
#include "orte/runtime/orte_globals.h"
17+
#include "orte/mca/errmgr/errmgr.h"
18+
19+
static volatile int active;
20+
static volatile bool wait_for_release = true;
21+
#define MEMPROBE_RELEASE 12345
22+
23+
static void _release_fn(int status,
24+
const opal_process_name_t *source,
25+
opal_list_t *info, opal_list_t *results,
26+
opal_pmix_notification_complete_fn_t cbfunc,
27+
void *cbdata)
28+
{
29+
/* must let the notifier know we are done */
30+
if (NULL != cbfunc) {
31+
cbfunc(0, NULL, NULL, NULL, cbdata);
32+
}
33+
/* flag that the debugger is complete so we can exit */
34+
wait_for_release = false;
35+
}
36+
37+
static void _register_fn(int status,
38+
size_t evhandler_ref,
39+
void *cbdata)
40+
{
41+
volatile int *active = (volatile int*)cbdata;
42+
43+
if (0 != status) {
44+
fprintf(stderr, "Client EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
45+
status, (unsigned long)evhandler_ref);
46+
}
47+
*active = status;
48+
}
49+
50+
int main(int argc, char* argv[])
51+
{
52+
int rank, size;
53+
opal_list_t *codes;
54+
opal_value_t *kv;
55+
56+
MPI_Init(&argc, &argv);
57+
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
58+
MPI_Comm_size(MPI_COMM_WORLD, &size);
59+
60+
if (0 == rank) {
61+
fprintf(stderr, "Sampling memory usage after MPI_Init\n");
62+
}
63+
64+
codes = OBJ_NEW(opal_list_t);
65+
kv = OBJ_NEW(opal_value_t);
66+
kv->key = strdup("errorcode");
67+
kv->type = OPAL_INT;
68+
kv->data.integer = MEMPROBE_RELEASE;
69+
opal_list_append(codes, &kv->super);
70+
71+
active = -1;
72+
opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, (void*)&active);
73+
while (-1 == active) {
74+
usleep(10);
75+
}
76+
77+
/* now wait for notification */
78+
while (wait_for_release) {
79+
usleep(10);
80+
}
81+
wait_for_release = true;
82+
83+
/* perform a barrier so some communication will occur, thus
84+
* requiring exchange of endpoint info */
85+
MPI_Barrier(MPI_COMM_WORLD);
86+
87+
if (0 == rank) {
88+
fprintf(stderr, "\n\nSampling memory usage after MPI_Barrier\n");
89+
}
90+
91+
/* wait again while memory is sampled */
92+
while (wait_for_release) {
93+
usleep(10);
94+
}
95+
96+
MPI_Finalize();
97+
return 0;
98+
}

opal/mca/pmix/pmix2x/pmix/include/pmix_common.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
3+
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2016 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@@ -157,6 +157,7 @@ typedef uint32_t pmix_rank_t;
157157
#define PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
158158

159159
/* information about relative ranks as assigned by the RM */
160+
#define PMIX_PROCID "pmix.procid" // (pmix_proc_t) process identifier
160161
#define PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
161162
#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler
162163
#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
@@ -282,6 +283,8 @@ typedef uint32_t pmix_rank_t;
282283
#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform
283284
#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
284285
#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
286+
#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
287+
#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
285288

286289
/* log attributes */
287290
#define PMIX_LOG_STDERR "pmix.log.stderr" // (bool) log data to stderr

opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2014-2016 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* Copyright (c) 2014 Artem Y. Polyakov <[email protected]>.
@@ -553,11 +553,18 @@ static void _putfn(int sd, short args, void *cbdata)
553553
{
554554
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
555555
pmix_status_t rc;
556-
pmix_kval_t *kv;
556+
pmix_kval_t *kv = NULL;
557557
pmix_nspace_t *ns;
558558
uint8_t *tmp;
559559
size_t len;
560560

561+
/* no need to push info that starts with "pmix" as that is
562+
* info we would have been provided at startup */
563+
if (0 == strncmp(cb->key, "pmix", 4)) {
564+
rc = PMIX_SUCCESS;
565+
goto done;
566+
}
567+
561568
/* setup to xfer the data */
562569
kv = PMIX_NEW(pmix_kval_t);
563570
kv->key = strdup(cb->key); // need to copy as the input belongs to the user
@@ -622,7 +629,9 @@ static void _putfn(int sd, short args, void *cbdata)
622629
}
623630

624631
done:
625-
PMIX_RELEASE(kv); // maintain accounting
632+
if (NULL != kv) {
633+
PMIX_RELEASE(kv); // maintain accounting
634+
}
626635
cb->pstatus = rc;
627636
cb->active = false;
628637
}

opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2014-2016 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* Copyright (c) 2014 Artem Y. Polyakov <[email protected]>.
@@ -763,7 +763,7 @@ static void _getnbfn(int fd, short flags, void *cbdata)
763763
* us to attempt to retrieve it from the server */
764764
for (n=0; n < cb->ninfo; n++) {
765765
if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) &&
766-
cb->info[n].value.data.flag) {
766+
(PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) {
767767
/* they don't want us to try and retrieve it */
768768
pmix_output_verbose(2, pmix_globals.debug_output,
769769
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",

opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* $COPYRIGHT$
55
*
66
* Additional copyrights may follow
@@ -356,6 +356,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
356356
sing->code = cd->codes[0];
357357
index = pmix_globals.events.nhdlrs;
358358
sing->index = index;
359+
sing->evhdlr = cd->evhdlr;
359360
++pmix_globals.events.nhdlrs;
360361
sing->cbobject = cbobject;
361362
rc = _add_hdlr(&pmix_globals.events.single_events, &sing->super,
@@ -365,17 +366,17 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
365366
PMIX_ERR_WOULD_BLOCK != rc) {
366367
/* unable to register */
367368
--pmix_globals.events.nhdlrs;
368-
rc = PMIX_ERR_EVENT_REGISTRATION;
369-
index = UINT_MAX;
369+
rc = PMIX_ERR_EVENT_REGISTRATION;
370+
index = UINT_MAX;
371+
goto ack;
372+
}
373+
if (PMIX_ERR_WOULD_BLOCK == rc) {
374+
/* the callback will provide our response */
375+
PMIX_RELEASE(cd);
376+
return;
377+
}
370378
goto ack;
371379
}
372-
if (PMIX_ERR_WOULD_BLOCK == rc) {
373-
/* the callback will provide our response */
374-
PMIX_RELEASE(cd);
375-
return;
376-
}
377-
goto ack;
378-
}
379380

380381
/* must be a multi-code registration */
381382
multi = PMIX_NEW(pmix_multi_event_t);
@@ -387,6 +388,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
387388
memcpy(multi->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t));
388389
index = pmix_globals.events.nhdlrs;
389390
multi->index = index;
391+
multi->evhdlr = cd->evhdlr;
390392
++pmix_globals.events.nhdlrs;
391393
multi->cbobject = cbobject;
392394
rc = _add_hdlr(&pmix_globals.events.multi_events, &multi->super,
@@ -396,9 +398,9 @@ static void reg_event_hdlr(int sd, short args, void *cbdata)
396398
PMIX_ERR_WOULD_BLOCK != rc) {
397399
/* unable to register */
398400
--pmix_globals.events.nhdlrs;
399-
rc = PMIX_ERR_EVENT_REGISTRATION;
400-
index = UINT_MAX;
401-
goto ack;
401+
rc = PMIX_ERR_EVENT_REGISTRATION;
402+
index = UINT_MAX;
403+
goto ack;
402404
}
403405
if (PMIX_ERR_WOULD_BLOCK == rc) {
404406
/* the callback will provide our response */

0 commit comments

Comments
 (0)