Skip to content

Commit c1050bc

Browse files
author
Ralph Castain
committed
Provide a mechanism for obtaining memory profiles of daemons and application profiles for use in studying our memory footprint. Setting OMPI_MEMPROFILE=N causes mpirun to set a timer for N seconds. When the timer fires, mpirun will query each daemon in the job to report its own memory usage plus the average memory usage of its child processes. The Proportional Set Size (PSS) is used for this purpose.
1 parent ed58460 commit c1050bc

File tree

11 files changed

+230
-2
lines changed

11 files changed

+230
-2
lines changed

opal/dss/dss_copy.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ int opal_dss_copy_pstat(opal_pstats_t **dest, opal_pstats_t *src,
219219
p->time = src->time;
220220
p->priority = src->priority;
221221
p->num_threads = src->num_threads;
222+
p->pss = src->pss;
222223
p->vsize = src->vsize;
223224
p->rss = src->rss;
224225
p->peak_vsize = src->peak_vsize;

opal/dss/dss_open_close.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ static void opal_pstat_construct(opal_pstats_t *obj)
156156
obj->time.tv_usec = 0;
157157
obj->priority = -1;
158158
obj->num_threads = -1;
159+
obj->pss = 0.0;
159160
obj->vsize = 0.0;
160161
obj->rss = 0.0;
161162
obj->peak_vsize = 0.0;

opal/dss/dss_pack.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ int opal_dss_pack_pstat(opal_buffer_t *buffer, const void *src,
499499
if (OPAL_SUCCESS != (ret = opal_dss_pack_buffer(buffer, &ptr[i]->num_threads, 1, OPAL_INT16))) {
500500
return ret;
501501
}
502+
if (OPAL_SUCCESS != (ret = opal_dss_pack_float(buffer, &ptr[i]->pss, 1, OPAL_FLOAT))) {
503+
return ret;
504+
}
502505
if (OPAL_SUCCESS != (ret = opal_dss_pack_float(buffer, &ptr[i]->vsize, 1, OPAL_FLOAT))) {
503506
return ret;
504507
}

opal/dss/dss_print.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -654,10 +654,10 @@ int opal_dss_print_pstat(char **output, char *prefix, opal_pstats_t *src, opal_d
654654
return OPAL_SUCCESS;
655655
}
656656
asprintf(output, "%sOPAL_PSTATS SAMPLED AT: %ld.%06ld\n%snode: %s rank: %d pid: %d cmd: %s state: %c pri: %d #threads: %d Processor: %d\n"
657-
"%s\ttime: %ld.%06ld cpu: %5.2f VMsize: %8.2f PeakVMSize: %8.2f RSS: %8.2f\n",
657+
"%s\ttime: %ld.%06ld cpu: %5.2f PSS: %8.2f VMsize: %8.2f PeakVMSize: %8.2f RSS: %8.2f\n",
658658
prefx, (long)src->sample_time.tv_sec, (long)src->sample_time.tv_usec,
659659
prefx, src->node, src->rank, src->pid, src->cmd, src->state[0], src->priority, src->num_threads, src->processor,
660-
prefx, (long)src->time.tv_sec, (long)src->time.tv_usec, src->percent_cpu, src->vsize, src->peak_vsize, src->rss);
660+
prefx, (long)src->time.tv_sec, (long)src->time.tv_usec, src->percent_cpu, src->pss, src->vsize, src->peak_vsize, src->rss);
661661
if (prefx != prefix) {
662662
free(prefx);
663663
}

opal/dss/dss_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ typedef struct {
182182
float percent_cpu;
183183
int32_t priority;
184184
int16_t num_threads;
185+
float pss; /* in MBytes */
185186
float vsize; /* in MBytes */
186187
float rss; /* in MBytes */
187188
float peak_vsize; /* in MBytes */

opal/dss/dss_unpack.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,11 @@ int opal_dss_unpack_pstat(opal_buffer_t *buffer, void *dest,
643643
return ret;
644644
}
645645
m=1;
646+
if (OPAL_SUCCESS != (ret = opal_dss_unpack_float(buffer, &ptr[i]->pss, &m, OPAL_FLOAT))) {
647+
OPAL_ERROR_LOG(ret);
648+
return ret;
649+
}
650+
m=1;
646651
if (OPAL_SUCCESS != (ret = opal_dss_unpack_float(buffer, &ptr[i]->vsize, &m, OPAL_FLOAT))) {
647652
OPAL_ERROR_LOG(ret);
648653
return ret;

opal/mca/pstat/linux/pstat_linux_module.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,31 @@ static int query(pid_t pid,
310310
}
311311
}
312312
fclose(fp);
313+
314+
/* now create the smaps filename for this proc */
315+
memset(data, 0, sizeof(data));
316+
numchars = snprintf(data, sizeof(data), "/proc/%d/smaps", pid);
317+
if (numchars >= sizeof(data)) {
318+
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
319+
}
320+
321+
if (NULL == (fp = fopen(data, "r"))) {
322+
/* ignore this */
323+
return OPAL_SUCCESS;
324+
}
325+
326+
/* parse it to find lines that start with "Pss" */
327+
while (NULL != (dptr = local_getline(fp))) {
328+
if (NULL == (value = local_stripper(dptr))) {
329+
/* cannot process */
330+
continue;
331+
}
332+
/* look for Pss */
333+
if (0 == strncmp(dptr, "Pss", strlen("Pss"))) {
334+
stats->pss += convert_value(value);
335+
}
336+
}
337+
fclose(fp);
313338
}
314339

315340
if (NULL != nstats) {

orte/mca/odls/odls_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ typedef uint8_t orte_daemon_cmd_flag_t;
8383
/* for debug purposes, get stack traces from all application procs */
8484
#define ORTE_DAEMON_GET_STACK_TRACES (orte_daemon_cmd_flag_t) 31
8585

86+
/* for memory profiling */
87+
#define ORTE_DAEMON_GET_MEMPROFILE (orte_daemon_cmd_flag_t) 32
88+
8689
/*
8790
* Struct written up the pipe from the child to the parent.
8891
*/

orte/mca/rml/rml_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,9 @@ BEGIN_C_DECLS
166166
/* stacktrace for debug */
167167
#define ORTE_RML_TAG_STACK_TRACE 60
168168

169+
/* memory profile */
170+
#define ORTE_RML_TAG_MEMPROFILE 61
171+
169172
#define ORTE_RML_TAG_MAX 100
170173

171174
#define ORTE_RML_TAG_NTOH(t) ntohl(t)

orte/orted/orted_comm.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545

4646
#include "opal/mca/event/event.h"
4747
#include "opal/mca/base/base.h"
48+
#include "opal/mca/pstat/pstat.h"
4849
#include "opal/util/output.h"
4950
#include "opal/util/opal_environ.h"
5051
#include "opal/util/path.h"
@@ -115,6 +116,8 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
115116
FILE *fp;
116117
char gscmd[256], path[1035], *pathptr;
117118
char string[256], *string_ptr = string;
119+
float pss;
120+
opal_pstats_t pstat;
118121

119122
/* unpack the command */
120123
n = 1;
@@ -1151,6 +1154,44 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
11511154
}
11521155
break;
11531156

1157+
case ORTE_DAEMON_GET_MEMPROFILE:
1158+
answer = OBJ_NEW(opal_buffer_t);
1159+
/* pack our hostname so they know where it came from */
1160+
opal_dss.pack(answer, &orte_process_info.nodename, 1, OPAL_STRING);
1161+
/* collect my memory usage */
1162+
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
1163+
opal_pstat.query(orte_process_info.pid, &pstat, NULL);
1164+
opal_dss.pack(answer, &pstat.pss, 1, OPAL_FLOAT);
1165+
OBJ_DESTRUCT(&pstat);
1166+
/* collect the memory usage of all my children */
1167+
pss = 0.0;
1168+
num_replies = 0;
1169+
for (i=0; i < orte_local_children->size; i++) {
1170+
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
1171+
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
1172+
/* collect the stats on this proc */
1173+
OBJ_CONSTRUCT(&pstat, opal_pstats_t);
1174+
if (OPAL_SUCCESS == opal_pstat.query(proct->pid, &pstat, NULL)) {
1175+
pss += pstat.pss;
1176+
++num_replies;
1177+
}
1178+
OBJ_DESTRUCT(&pstat);
1179+
}
1180+
}
1181+
/* compute the average value */
1182+
if (0 < num_replies) {
1183+
pss /= (float)num_replies;
1184+
}
1185+
opal_dss.pack(answer, &pss, 1, OPAL_FLOAT);
1186+
/* send it back */
1187+
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer,
1188+
ORTE_RML_TAG_MEMPROFILE,
1189+
orte_rml_send_callback, NULL))) {
1190+
ORTE_ERROR_LOG(ret);
1191+
OBJ_RELEASE(answer);
1192+
}
1193+
break;
1194+
11541195
default:
11551196
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
11561197
}
@@ -1222,6 +1263,9 @@ static char *get_orted_comm_cmd_str(int command)
12221263
case ORTE_DAEMON_GET_STACK_TRACES:
12231264
return strdup("ORTE_DAEMON_GET_STACK_TRACES");
12241265

1266+
case ORTE_DAEMON_GET_MEMPROFILE:
1267+
return strdup("ORTE_DAEMON_GET_MEMPROFILE");
1268+
12251269
default:
12261270
return strdup("Unknown Command!");
12271271
}

0 commit comments

Comments
 (0)