Skip to content

Commit 3a157f0

Browse files
author
Ralph Castain
committed
One more time - we "push" IOF for stdout, stderr, and stddiag with separate calls. However, we were creating the sinks for all three of them each time, which caused them to leak. Create the sinks only once for each channel.
Signed-off-by: Ralph Castain <[email protected]>
1 parent d9fc88c commit 3a157f0

File tree

4 files changed

+51
-68
lines changed

4 files changed

+51
-68
lines changed

orte/mca/iof/base/iof_base_setup.c

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
13-
* Copyright (c) 2016 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
1414
* $COPYRIGHT$
1515
*
1616
* Additional copyrights may follow
@@ -244,10 +244,7 @@ orte_iof_base_setup_parent(const orte_process_name_t* name,
244244

245245
int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
246246
orte_job_t *jobdat,
247-
orte_iof_proc_t *proct,
248-
orte_iof_sink_t **stdoutsink,
249-
orte_iof_sink_t **stderrsink,
250-
orte_iof_sink_t **stddiagsink)
247+
orte_iof_proc_t *proct)
251248
{
252249
int rc;
253250
char *dirname, *outdir, *outfile;
@@ -289,19 +286,15 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
289286
} else {
290287
asprintf(&outdir, "%s/rank.%0*lu", dirname,
291288
numdigs, (unsigned long)proct->name.vpid);
292-
293289
}
294290
/* ensure the directory exists */
295291
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(outdir, S_IRWXU|S_IRGRP|S_IXGRP))) {
296292
ORTE_ERROR_LOG(rc);
297293
free(outdir);
298294
return rc;
299295
}
300-
/* if they asked for stderr to be combined with stdout, then we
301-
* only create one file and tell the IOF to put both streams
302-
* into it. Otherwise, we create separate files for each stream */
303-
if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_MERGE_STDERR_STDOUT, NULL, OPAL_BOOL)) {
304-
/* create the output file */
296+
if (NULL != proct->revstdout && NULL == proct->revstdout->sink) {
297+
/* setup the stdout sink */
305298
asprintf(&outfile, "%s/stdout", outdir);
306299
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
307300
free(outfile);
@@ -311,40 +304,42 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
311304
return ORTE_ERR_FILE_OPEN_FAILURE;
312305
}
313306
/* define a sink to that file descriptor */
314-
ORTE_IOF_SINK_DEFINE(stdoutsink, dst_name, fdout, ORTE_IOF_STDMERGE,
315-
orte_iof_base_write_handler);
316-
/* point the stderr read event to it as well */
317-
OBJ_RETAIN(*stdoutsink);
318-
*stderrsink = *stdoutsink;
319-
} else {
320-
/* create separate files for stderr and stdout */
321-
asprintf(&outfile, "%s/stdout", outdir);
322-
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
323-
free(outfile);
324-
if (fdout < 0) {
325-
/* couldn't be opened */
326-
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
327-
return ORTE_ERR_FILE_OPEN_FAILURE;
328-
}
329-
/* define a sink to that file descriptor */
330-
ORTE_IOF_SINK_DEFINE(stdoutsink, dst_name, fdout, ORTE_IOF_STDOUT,
307+
ORTE_IOF_SINK_DEFINE(&proct->revstdout->sink, dst_name,
308+
proct->revstdout->fd, ORTE_IOF_STDOUT,
331309
orte_iof_base_write_handler);
310+
}
332311

333-
asprintf(&outfile, "%s/stderr", outdir);
334-
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
335-
free(outfile);
336-
if (fdout < 0) {
337-
/* couldn't be opened */
338-
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
339-
return ORTE_ERR_FILE_OPEN_FAILURE;
312+
if (NULL != proct->revstderr && NULL == proct->revstderr->sink) {
313+
/* if they asked for stderr to be combined with stdout, then we
314+
* only create one file and tell the IOF to put both streams
315+
* into it. Otherwise, we create separate files for each stream */
316+
if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_MERGE_STDERR_STDOUT, NULL, OPAL_BOOL)) {
317+
/* just use the stdout sink */
318+
OBJ_RETAIN(proct->revstdout->sink);
319+
proct->revstdout->sink->tag = ORTE_IOF_STDMERGE; // show that it is merged
320+
proct->revstderr->sink = proct->revstdout->sink;
321+
} else {
322+
asprintf(&outfile, "%s/stderr", outdir);
323+
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
324+
free(outfile);
325+
if (fdout < 0) {
326+
/* couldn't be opened */
327+
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
328+
return ORTE_ERR_FILE_OPEN_FAILURE;
329+
}
330+
/* define a sink to that file descriptor */
331+
ORTE_IOF_SINK_DEFINE(&proct->revstderr->sink, dst_name,
332+
proct->revstderr->fd, ORTE_IOF_STDERR,
333+
orte_iof_base_write_handler);
340334
}
341-
/* define a sink to that file descriptor */
342-
ORTE_IOF_SINK_DEFINE(stderrsink, dst_name, fdout, ORTE_IOF_STDERR,
343-
orte_iof_base_write_handler);
344335
}
345-
/* always tie the sink for stddiag to stderr */
346-
OBJ_RETAIN(*stderrsink);
347-
*stddiagsink = *stderrsink;
336+
337+
if (NULL != proct->revstddiag && NULL == proct->revstddiag->sink) {
338+
/* always tie the sink for stddiag to stderr */
339+
OBJ_RETAIN(proct->revstderr->sink);
340+
proct->revstddiag->sink = proct->revstderr->sink;
341+
}
348342
}
343+
349344
return ORTE_SUCCESS;
350345
}

orte/mca/iof/base/iof_base_setup.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
13-
* Copyright (c) 2016 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
1414
* $COPYRIGHT$
1515
*
1616
* Additional copyrights may follow
@@ -57,9 +57,6 @@ ORTE_DECLSPEC int orte_iof_base_setup_parent(const orte_process_name_t* name,
5757
/* setup output files */
5858
ORTE_DECLSPEC int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
5959
orte_job_t *jobdat,
60-
orte_iof_proc_t *proct,
61-
orte_iof_sink_t **stdoutsink,
62-
orte_iof_sink_t **stderrsink,
63-
orte_iof_sink_t **stddiagsink);
60+
orte_iof_proc_t *proct);
6461

6562
#endif

orte/mca/iof/hnp/iof_hnp.c

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
136136
orte_iof_proc_t *proct, *pptr;
137137
int flags, rc;
138138
orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL;
139-
orte_iof_sink_t *stdoutsink=NULL, *stderrsink=NULL, *stddiagsink=NULL;
140139

141140
/* don't do this if the dst vpid is invalid or the fd is negative! */
142141
if (ORTE_VPID_INVALID == dst_name->vpid || fd < 0) {
@@ -178,27 +177,23 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
178177
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
179178
return ORTE_ERR_NOT_FOUND;
180179
}
181-
/* setup any requested output files */
182-
if (ORTE_SUCCESS != (rc = orte_iof_base_setup_output_files(dst_name, jdata, proct,
183-
&stdoutsink, &stderrsink, &stddiagsink))) {
184-
ORTE_ERROR_LOG(rc);
185-
return rc;
186-
}
187-
188180
/* define a read event and activate it */
189181
if (src_tag & ORTE_IOF_STDOUT) {
190182
ORTE_IOF_READ_EVENT(&proct->revstdout, proct, fd, ORTE_IOF_STDOUT,
191183
orte_iof_hnp_read_local_handler, false);
192-
proct->revstdout->sink = stdoutsink;
193184
} else if (src_tag & ORTE_IOF_STDERR) {
194185
ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR,
195186
orte_iof_hnp_read_local_handler, false);
196-
proct->revstderr->sink = stderrsink;
197187
} else if (src_tag & ORTE_IOF_STDDIAG) {
198188
ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG,
199189
orte_iof_hnp_read_local_handler, false);
200-
proct->revstddiag->sink = stddiagsink;
201190
}
191+
/* setup any requested output files */
192+
if (ORTE_SUCCESS != (rc = orte_iof_base_setup_output_files(dst_name, jdata, proct))) {
193+
ORTE_ERROR_LOG(rc);
194+
return rc;
195+
}
196+
202197
/* if -all- of the readevents for this proc have been defined, then
203198
* activate them. Otherwise, we can think that the proc is complete
204199
* because one of the readevents fires -prior- to all of them having

orte/mca/iof/orted/iof_orted.c

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,11 @@ static int init(void)
120120
* to the HNP
121121
*/
122122

123-
static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd)
123+
static int orted_push(const orte_process_name_t* dst_name,
124+
orte_iof_tag_t src_tag, int fd)
124125
{
125126
int flags;
126127
orte_iof_proc_t *proct;
127-
orte_iof_sink_t *stdoutsink=NULL, *stderrsink=NULL, *stddiagsink=NULL;
128128
int rc;
129129
orte_job_t *jobdat=NULL;
130130
orte_ns_cmp_bitmask_t mask;
@@ -166,25 +166,21 @@ static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta
166166
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
167167
return ORTE_ERR_NOT_FOUND;
168168
}
169-
/* setup any requested output files */
170-
if (ORTE_SUCCESS != (rc = orte_iof_base_setup_output_files(dst_name, jobdat, proct,
171-
&stdoutsink, &stderrsink, &stddiagsink))) {
172-
ORTE_ERROR_LOG(rc);
173-
return rc;
174-
}
175169
/* define a read event and activate it */
176170
if (src_tag & ORTE_IOF_STDOUT) {
177171
ORTE_IOF_READ_EVENT(&proct->revstdout, proct, fd, ORTE_IOF_STDOUT,
178172
orte_iof_orted_read_handler, false);
179-
proct->revstdout->sink = stdoutsink;
180173
} else if (src_tag & ORTE_IOF_STDERR) {
181174
ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR,
182175
orte_iof_orted_read_handler, false);
183-
proct->revstderr->sink = stderrsink;
184176
} else if (src_tag & ORTE_IOF_STDDIAG) {
185177
ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG,
186178
orte_iof_orted_read_handler, false);
187-
proct->revstddiag->sink = stddiagsink;
179+
}
180+
/* setup any requested output files */
181+
if (ORTE_SUCCESS != (rc = orte_iof_base_setup_output_files(dst_name, jobdat, proct))) {
182+
ORTE_ERROR_LOG(rc);
183+
return rc;
188184
}
189185

190186
/* if -all- of the readevents for this proc have been defined, then

0 commit comments

Comments
 (0)