Skip to content

Commit b343df4

Browse files
author
Ralph Castain
authored
Merge pull request #2669 from rhc54/topic/memprobe
Complete the memprobe support.
2 parents b4088c3 + 6509f60 commit b343df4

File tree

19 files changed

+468
-175
lines changed

19 files changed

+468
-175
lines changed

contrib/scaling/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@ all: $(PROGS)
44

55
CFLAGS = -O
66

7-
orte_no_op:
7+
orte_no_op: orte_no_op.c
88
ortecc -o orte_no_op orte_no_op.c
99

10-
mpi_no_op:
10+
mpi_no_op: mpi_no_op.c
1111
mpicc -o mpi_no_op mpi_no_op.c
1212

13-
mpi_memprobe:
14-
mpicc -o mpi_memprobe mpi_memprobe.c -lopen-pal
13+
mpi_memprobe: mpi_memprobe.c
14+
mpicc -o mpi_memprobe mpi_memprobe.c -lopen-pal -lopen-rte
1515

1616
clean:
1717
rm -f $(PROGS) *~

contrib/scaling/mpi_memprobe.c

Lines changed: 125 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "orte/mca/errmgr/errmgr.h"
1818

1919
static int rank, size;
20-
static volatile int active;
2120
static volatile bool wait_for_release = true;
2221
#define MEMPROBE_RELEASE 12345
2322

@@ -27,7 +26,6 @@ static void _release_fn(int status,
2726
opal_pmix_notification_complete_fn_t cbfunc,
2827
void *cbdata)
2928
{
30-
fprintf(stderr, "Rank %d: Release recvd\n", rank);
3129
/* must let the notifier know we are done */
3230
if (NULL != cbfunc) {
3331
cbfunc(OPAL_ERR_HANDLERS_COMPLETE, NULL, NULL, NULL, cbdata);
@@ -58,7 +56,6 @@ static void qcbfunc(int status,
5856
opal_list_t *results = (opal_list_t*)cbdata;
5957
opal_value_t *kv;
6058

61-
fprintf(stderr, "Rank %d: Query returned status %d\n", rank, status);
6259
if (NULL != info) {
6360
while (NULL != (kv = (opal_value_t*)opal_list_remove_first(info))) {
6461
opal_list_append(results, &kv->super);
@@ -70,61 +67,90 @@ static void qcbfunc(int status,
7067
wait_for_release = false;
7168
}
7269

73-
int main(int argc, char* argv[])
70+
static void notifycbfunc(int status, void *cbdata)
7471
{
75-
opal_list_t *codes;
76-
opal_value_t *kv;
77-
opal_pmix_query_t *q;
78-
opal_list_t query, response;
79-
80-
MPI_Init(&argc, &argv);
81-
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
82-
MPI_Comm_size(MPI_COMM_WORLD, &size);
72+
volatile int *active = (volatile int*)cbdata;
73+
*active = status;
74+
}
8375

84-
/* everyone registers their event handler */
85-
codes = OBJ_NEW(opal_list_t);
76+
static void sample(void)
77+
{
78+
opal_value_t *kv, *ival;
79+
opal_pmix_query_t *q;
80+
opal_list_t query, response, *lt;
81+
volatile int active;
82+
char **answer = NULL, *tmp, *msg;
83+
84+
OBJ_CONSTRUCT(&query, opal_list_t);
85+
OBJ_CONSTRUCT(&response, opal_list_t);
86+
q = OBJ_NEW(opal_pmix_query_t);
87+
opal_list_append(&query, &q->super);
88+
opal_argv_append_nosize(&q->keys, OPAL_PMIX_QUERY_MEMORY_USAGE);
89+
/* qualify that we just want local avg, min/max values reported */
8690
kv = OBJ_NEW(opal_value_t);
87-
kv->key = strdup("errorcode");
88-
kv->type = OPAL_INT;
89-
kv->data.integer = MEMPROBE_RELEASE;
90-
opal_list_append(codes, &kv->super);
91+
kv->key = strdup(OPAL_PMIX_QUERY_LOCAL_ONLY);
92+
kv->type = OPAL_BOOL;
93+
kv->data.flag = true;
94+
opal_list_append(&q->qualifiers, &kv->super);
95+
kv = OBJ_NEW(opal_value_t);
96+
kv->key = strdup(OPAL_PMIX_QUERY_REPORT_AVG);
97+
kv->type = OPAL_BOOL;
98+
kv->data.flag = true;
99+
opal_list_append(&q->qualifiers, &kv->super);
100+
kv = OBJ_NEW(opal_value_t);
101+
kv->key = strdup(OPAL_PMIX_QUERY_REPORT_MINMAX);
102+
kv->type = OPAL_BOOL;
103+
kv->data.flag = true;
104+
opal_list_append(&q->qualifiers, &kv->super);
105+
/* issue the request */
106+
wait_for_release = true;
107+
opal_pmix.query(&query, qcbfunc, (void*)&response);
108+
/* wait for the query to complete */
109+
while (wait_for_release) {
110+
usleep(10);
111+
}
112+
wait_for_release = true;
113+
/* log my own results as a single string so the output
114+
* doesn't get garbled on the other end */
115+
asprintf(&tmp, "Data for node %s", orte_process_info.nodename);
116+
opal_argv_append_nosize(&answer, tmp);
117+
free(tmp);
118+
OPAL_LIST_FOREACH(kv, &response, opal_value_t) {
119+
lt = (opal_list_t*)kv->data.ptr;
120+
OPAL_LIST_FOREACH(ival, lt, opal_value_t) {
121+
if (0 == strcmp(ival->key, OPAL_PMIX_DAEMON_MEMORY)) {
122+
asprintf(&tmp, "\tDaemon: %f", ival->data.fval);
123+
opal_argv_append_nosize(&answer, tmp);
124+
free(tmp);
125+
} else if (0 == strcmp(ival->key, OPAL_PMIX_CLIENT_AVG_MEMORY)) {
126+
asprintf(&tmp, "\tClient: %f", ival->data.fval);
127+
opal_argv_append_nosize(&answer, tmp);
128+
free(tmp);
129+
} else {
130+
fprintf(stderr, "\tUnknown key: %s", ival->key);
131+
}
132+
}
133+
}
134+
opal_argv_append_nosize(&answer, "\n");
135+
OPAL_LIST_DESTRUCT(&response);
91136

137+
/* construct the log output */
138+
OBJ_CONSTRUCT(&response, opal_list_t);
139+
kv = OBJ_NEW(opal_value_t);
140+
kv->key = strdup(OPAL_PMIX_LOG_STDOUT);
141+
kv->type = OPAL_STRING;
142+
kv->data.string = opal_argv_join(answer, '\n');
143+
opal_list_append(&response, &kv->super);
144+
opal_argv_free(answer);
92145
active = -1;
93-
opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, (void*)&active);
146+
opal_pmix.log(&response, notifycbfunc, (void*)&active);
94147
while (-1 == active) {
95148
usleep(10);
96149
}
150+
OPAL_LIST_DESTRUCT(&response);
151+
97152

98-
/* rank 0 asks for memory to be sampled, while everyone else waits */
99153
if (0 == rank) {
100-
fprintf(stderr, "Sampling memory usage after MPI_Init\n");
101-
OBJ_CONSTRUCT(&query, opal_list_t);
102-
OBJ_CONSTRUCT(&response, opal_list_t);
103-
q = OBJ_NEW(opal_pmix_query_t);
104-
opal_list_append(&query, &q->super);
105-
opal_argv_append_nosize(&q->keys, OPAL_PMIX_QUERY_MEMORY_USAGE);
106-
/* qualify that we just want avg, min/max values reported */
107-
kv = OBJ_NEW(opal_value_t);
108-
kv->key = strdup(OPAL_PMIX_QUERY_REPORT_AVG);
109-
kv->type = OPAL_BOOL;
110-
kv->data.flag = true;
111-
opal_list_append(&q->qualifiers, &kv->super);
112-
kv = OBJ_NEW(opal_value_t);
113-
kv->key = strdup(OPAL_PMIX_QUERY_REPORT_MINMAX);
114-
kv->type = OPAL_BOOL;
115-
kv->data.flag = true;
116-
opal_list_append(&q->qualifiers, &kv->super);
117-
/* issue the request */
118-
wait_for_release = true;
119-
opal_pmix.query(&query, qcbfunc, (void*)&response);
120-
while (wait_for_release) {
121-
usleep(10);
122-
}
123-
/* output the results */
124-
OPAL_LIST_FOREACH(kv, &response, opal_value_t) {
125-
fprintf(stderr, "\tResults: %s\n", kv->key);
126-
}
127-
OPAL_LIST_DESTRUCT(&response);
128154
/* send the notification to release the other procs */
129155
wait_for_release = true;
130156
OBJ_CONSTRUCT(&response, opal_list_t);
@@ -133,16 +159,58 @@ int main(int argc, char* argv[])
133159
kv->type = OPAL_BOOL;
134160
kv->data.flag = true;
135161
opal_list_append(&response, &kv->super);
162+
active = -1;
136163
if (OPAL_SUCCESS != opal_pmix.notify_event(MEMPROBE_RELEASE, NULL,
137164
OPAL_PMIX_RANGE_GLOBAL, &response,
138-
NULL, NULL)) {
165+
notifycbfunc, (void*)&active)) {
139166
fprintf(stderr, "Notify event failed\n");
140167
exit(1);
141168
}
142-
while (wait_for_release) {
169+
while (-1 == active) {
143170
usleep(10);
144171
}
145172
OPAL_LIST_DESTRUCT(&response);
173+
}
174+
175+
/* now wait for notification */
176+
while (wait_for_release) {
177+
usleep(10);
178+
}
179+
}
180+
181+
int main(int argc, char* argv[])
182+
{
183+
opal_list_t *codes;
184+
opal_value_t *kv;
185+
volatile int active;
186+
187+
MPI_Init(&argc, &argv);
188+
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
189+
MPI_Comm_size(MPI_COMM_WORLD, &size);
190+
191+
if (0 == rank) {
192+
fprintf(stderr, "Sampling memory usage after MPI_Init\n");
193+
}
194+
195+
/* everyone registers their event handler */
196+
codes = OBJ_NEW(opal_list_t);
197+
kv = OBJ_NEW(opal_value_t);
198+
kv->key = strdup("errorcode");
199+
kv->type = OPAL_INT;
200+
kv->data.integer = MEMPROBE_RELEASE;
201+
opal_list_append(codes, &kv->super);
202+
203+
active = -1;
204+
opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, (void*)&active);
205+
while (-1 == active) {
206+
usleep(10);
207+
}
208+
209+
/* if I am the local leader (i.e., local_rank=0), then I ask
210+
* my daemon to report the local memory usage, and send it
211+
* to rank=0 */
212+
if (0 == orte_process_info.my_local_rank) {
213+
sample();
146214
} else {
147215
/* now wait for notification */
148216
while (wait_for_release) {
@@ -157,60 +225,21 @@ int main(int argc, char* argv[])
157225

158226
if (0 == rank) {
159227
fprintf(stderr, "\n\nSampling memory usage after MPI_Barrier\n");
160-
OBJ_CONSTRUCT(&query, opal_list_t);
161-
OBJ_CONSTRUCT(&response, opal_list_t);
162-
q = OBJ_NEW(opal_pmix_query_t);
163-
opal_list_append(&query, &q->super);
164-
opal_argv_append_nosize(&q->keys, OPAL_PMIX_QUERY_MEMORY_USAGE);
165-
/* qualify that we just want avg, min/max values reported */
166-
kv = OBJ_NEW(opal_value_t);
167-
kv->key = strdup(OPAL_PMIX_QUERY_REPORT_AVG);
168-
kv->type = OPAL_BOOL;
169-
kv->data.flag = true;
170-
opal_list_append(&q->qualifiers, &kv->super);
171-
kv = OBJ_NEW(opal_value_t);
172-
kv->key = strdup(OPAL_PMIX_QUERY_REPORT_MINMAX);
173-
kv->type = OPAL_BOOL;
174-
kv->data.flag = true;
175-
opal_list_append(&q->qualifiers, &kv->super);
176-
/* issue the request */
177-
wait_for_release = true;
178-
opal_pmix.query(&query, qcbfunc, (void*)&response);
179-
while (wait_for_release) {
180-
usleep(10);
181-
}
182-
/* output the results */
183-
OPAL_LIST_FOREACH(kv, &response, opal_value_t) {
184-
fprintf(stderr, "\tResults: %s\n", kv->key);
185-
}
186-
OPAL_LIST_DESTRUCT(&response);
187-
/* send the notification to release the other procs */
188-
wait_for_release = true;
189-
OBJ_CONSTRUCT(&response, opal_list_t);
190-
kv = OBJ_NEW(opal_value_t);
191-
kv->key = strdup(OPAL_PMIX_EVENT_NON_DEFAULT);
192-
kv->type = OPAL_BOOL;
193-
kv->data.flag = true;
194-
opal_list_append(&response, &kv->super);
195-
if (OPAL_SUCCESS != opal_pmix.notify_event(MEMPROBE_RELEASE, NULL,
196-
OPAL_PMIX_RANGE_GLOBAL, &response,
197-
NULL, NULL)) {
198-
fprintf(stderr, "Notify event failed\n");
199-
exit(1);
200-
}
201-
while (wait_for_release) {
202-
usleep(10);
228+
}
229+
230+
if (0 == orte_process_info.my_local_rank) {
231+
if (0 != rank) {
232+
/* wait a little */
233+
usleep(1000);
203234
}
204-
OPAL_LIST_DESTRUCT(&response);
235+
sample();
205236
} else {
206237
/* wait again while memory is sampled */
207238
while (wait_for_release) {
208239
usleep(10);
209240
}
210241
}
211242

212-
fprintf(stderr, "%d: FINALIZING\n", rank);
213-
fflush(stderr);
214243
MPI_Finalize();
215244
return 0;
216245
}

opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
13-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1414
* Copyright (c) 2015 Research Organization for Information Science
1515
* and Technology (RIST). All rights reserved.
1616
* Copyright (c) 2016 Mellanox Technologies, Inc.
@@ -688,6 +688,7 @@ pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest,
688688
return PMIX_ERR_NOMEM;
689689
}
690690
if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, val->data.darray, &m, PMIX_DATA_ARRAY))) {
691+
PMIX_ERROR_LOG(ret);
691692
return ret;
692693
}
693694
break;
@@ -1274,6 +1275,9 @@ pmix_status_t pmix_bfrop_unpack_darray(pmix_buffer_t *buffer, void *dest,
12741275
case PMIX_COMPRESSED_STRING:
12751276
nbytes = sizeof(pmix_byte_object_t);
12761277
break;
1278+
case PMIX_INFO:
1279+
nbytes = sizeof(pmix_info_t);
1280+
break;
12771281
case PMIX_PERSIST:
12781282
nbytes = sizeof(pmix_persistence_t);
12791283
break;

opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
44
* Copyright (c) 2016 Mellanox Technologies, Inc.
55
* All rights reserved.
66
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@@ -78,7 +78,11 @@ static void query_cbfunc(struct pmix_peer_t *peer,
7878
cnt = results->ninfo;
7979
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, results->info, &cnt, PMIX_INFO))) {
8080
PMIX_ERROR_LOG(rc);
81-
goto complete;
81+
pmix_output(0, "TYPE: %d", results->info[0].value.type);
82+
results->status = rc;
83+
PMIX_INFO_FREE(results->info, results->ninfo);
84+
results->info = NULL;
85+
results->ninfo = 0;
8286
}
8387
}
8488

0 commit comments

Comments
 (0)