@@ -144,11 +144,7 @@ static int orte_odls_alps_restart_proc(orte_proc_t *child);
144
144
static void send_error_show_help (int fd , int exit_status ,
145
145
const char * file , const char * topic , ...)
146
146
__opal_attribute_noreturn__ ;
147
- static int do_child (orte_proc_t * child ,
148
- char * app , char * * argv ,
149
- char * * environ_copy ,
150
- orte_job_t * jobdat , int write_fd ,
151
- orte_iof_base_io_conf_t opts )
147
+ static int do_child (orte_odls_spawn_caddy_t * cd , int write_fd )
152
148
__opal_attribute_noreturn__ ;
153
149
154
150
@@ -342,20 +338,15 @@ static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opt
342
338
return ORTE_SUCCESS ;
343
339
}
344
340
345
- static int do_child ( orte_proc_t * child ,
346
- char * app , char * * argv ,
347
- char * * environ_copy ,
348
- orte_job_t * jobdat , int write_fd ,
349
- orte_iof_base_io_conf_t opts )
341
+ static int do_child (orte_odls_spawn_caddy_t * cd , int write_fd )
350
342
{
351
- int i , rc ;
343
+ int i ;
352
344
sigset_t sigs ;
353
- char * param , * msg ;
354
345
355
346
/* Setup the pipe to be close-on-exec */
356
347
opal_fd_set_cloexec (write_fd );
357
348
358
- if (NULL != child ) {
349
+ if (NULL != cd -> child ) {
359
350
/* setup stdout/stderr so that any error messages that we
360
351
may print out will get displayed back at orterun.
361
352
@@ -369,20 +360,19 @@ static int do_child( orte_proc_t *child,
369
360
always outputs a nice, single message indicating what
370
361
happened
371
362
*/
372
- if (ORTE_SUCCESS != (i = orte_iof_base_setup_child (& opts ,
373
- & environ_copy ))) {
363
+ if (ORTE_SUCCESS != (i = orte_iof_base_setup_child (& cd -> opts , & cd -> env ))) {
374
364
ORTE_ERROR_LOG (i );
375
365
send_error_show_help (write_fd , 1 ,
376
366
"help-orte-odls-alps.txt" ,
377
367
"iof setup failed" ,
378
- orte_process_info .nodename , app );
368
+ orte_process_info .nodename , cd -> app -> app );
379
369
/* Does not return */
380
370
}
381
371
382
372
/* now set any child-level controls such as binding */
383
- orte_rtc .set (jobdat , child , & environ_copy , write_fd );
373
+ orte_rtc .set (cd -> jdata , cd -> child , & cd -> env , write_fd );
384
374
385
- } else if (!ORTE_FLAG_TEST (jobdat , ORTE_JOB_FLAG_FORWARD_OUTPUT )) {
375
+ } else if (!ORTE_FLAG_TEST (cd -> jdata , ORTE_JOB_FLAG_FORWARD_OUTPUT )) {
386
376
/* tie stdin/out/err/internal to /dev/null */
387
377
int fdnull ;
388
378
for (i = 0 ; i < 3 ; i ++ ) {
@@ -393,24 +383,24 @@ static int do_child( orte_proc_t *child,
393
383
close (fdnull );
394
384
}
395
385
fdnull = open ("/dev/null" , O_RDONLY , 0 );
396
- if (fdnull > opts .p_internal [1 ]) {
397
- dup2 (fdnull , opts .p_internal [1 ]);
386
+ if (fdnull > cd -> opts .p_internal [1 ]) {
387
+ dup2 (fdnull , cd -> opts .p_internal [1 ]);
398
388
}
399
389
close (fdnull );
400
390
}
401
391
402
- if (ORTE_SUCCESS != close_open_file_descriptors (write_fd , opts )) {
392
+ if (ORTE_SUCCESS != close_open_file_descriptors (write_fd , cd -> opts )) {
403
393
send_error_show_help (write_fd , 1 , "help-orte-odls-alps.txt" ,
404
394
"close fds" ,
405
- orte_process_info .nodename , app ,
395
+ orte_process_info .nodename , cd -> app -> app ,
406
396
__FILE__ , __LINE__ );
407
397
}
408
398
409
399
410
- if (argv == NULL ) {
411
- argv = malloc (sizeof (char * )* 2 );
412
- argv [0 ] = strdup (app );
413
- argv [1 ] = NULL ;
400
+ if (cd -> argv == NULL ) {
401
+ cd -> argv = malloc (sizeof (char * )* 2 );
402
+ cd -> argv [0 ] = strdup (cd -> app -> app );
403
+ cd -> argv [1 ] = NULL ;
414
404
}
415
405
416
406
/* Set signal handlers back to the default. Do this close to
@@ -437,37 +427,33 @@ static int do_child( orte_proc_t *child,
437
427
438
428
if (10 < opal_output_get_verbosity (orte_odls_base_framework .framework_output )) {
439
429
int jout ;
440
- opal_output (0 , "%s STARTING %s" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), app );
441
- for (jout = 0 ; NULL != argv [jout ]; jout ++ ) {
442
- opal_output (0 , "%s\tARGV[%d]: %s" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), jout , argv [jout ]);
430
+ opal_output (0 , "%s STARTING %s" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), cd -> app -> app );
431
+ for (jout = 0 ; NULL != cd -> argv [jout ]; jout ++ ) {
432
+ opal_output (0 , "%s\tARGV[%d]: %s" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), jout , cd -> argv [jout ]);
443
433
}
444
- for (jout = 0 ; NULL != environ_copy [jout ]; jout ++ ) {
445
- opal_output (0 , "%s\tENVIRON[%d]: %s" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), jout , environ_copy [jout ]);
434
+ for (jout = 0 ; NULL != cd -> env [jout ]; jout ++ ) {
435
+ opal_output (0 , "%s\tENVIRON[%d]: %s" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), jout , cd -> env [jout ]);
446
436
}
447
437
}
448
438
449
- execve (app , argv , environ_copy );
439
+ execve (cd -> app -> app , cd -> argv , cd -> env );
450
440
send_error_show_help (write_fd , 1 ,
451
441
"help-orte-odls-alps.txt" , "execve error" ,
452
- orte_process_info .nodename , app , strerror (errno ));
442
+ orte_process_info .nodename , cd -> app -> app , strerror (errno ));
453
443
/* Does not return */
454
444
}
455
445
456
446
457
- static int do_parent (orte_proc_t * child ,
458
- char * app , char * * argv ,
459
- char * * environ_copy ,
460
- orte_job_t * jobdat , int read_fd ,
461
- orte_iof_base_io_conf_t opts )
447
+ static int do_parent (orte_odls_spawn_caddy_t * cd , int read_fd )
462
448
{
463
449
int rc ;
464
450
orte_odls_pipe_err_msg_t msg ;
465
451
char file [ORTE_ODLS_MAX_FILE_LEN + 1 ], topic [ORTE_ODLS_MAX_TOPIC_LEN + 1 ], * str = NULL ;
466
452
467
- close (opts .p_stdin [0 ]);
468
- close (opts .p_stdout [1 ]);
469
- close (opts .p_stderr [1 ]);
470
- close (opts .p_internal [1 ]);
453
+ close (cd -> opts .p_stdin [0 ]);
454
+ close (cd -> opts .p_stdout [1 ]);
455
+ close (cd -> opts .p_stderr [1 ]);
456
+ close (cd -> opts .p_internal [1 ]);
471
457
472
458
/* Block reading a message from the pipe */
473
459
while (1 ) {
@@ -483,18 +469,18 @@ static int do_parent(orte_proc_t *child,
483
469
ORTE_ERROR_LOG (rc );
484
470
close (read_fd );
485
471
486
- if (NULL != child ) {
487
- child -> state = ORTE_PROC_STATE_UNDEF ;
472
+ if (NULL != cd -> child ) {
473
+ cd -> child -> state = ORTE_PROC_STATE_UNDEF ;
488
474
}
489
475
return rc ;
490
476
}
491
477
492
478
/* Otherwise, we got a warning or error message from the child */
493
- if (NULL != child ) {
479
+ if (NULL != cd -> child ) {
494
480
if (msg .fatal ) {
495
- ORTE_FLAG_UNSET (child , ORTE_PROC_FLAG_ALIVE );
481
+ ORTE_FLAG_UNSET (cd -> child , ORTE_PROC_FLAG_ALIVE );
496
482
} else {
497
- ORTE_FLAG_SET (child , ORTE_PROC_FLAG_ALIVE );
483
+ ORTE_FLAG_SET (cd -> child , ORTE_PROC_FLAG_ALIVE );
498
484
}
499
485
}
500
486
@@ -504,10 +490,10 @@ static int do_parent(orte_proc_t *child,
504
490
if (OPAL_SUCCESS != rc ) {
505
491
orte_show_help ("help-orte-odls-alps.txt" , "syscall fail" ,
506
492
true,
507
- orte_process_info .nodename , app ,
493
+ orte_process_info .nodename , cd -> app ,
508
494
"opal_fd_read" , __FILE__ , __LINE__ );
509
- if (NULL != child ) {
510
- child -> state = ORTE_PROC_STATE_UNDEF ;
495
+ if (NULL != cd -> child ) {
496
+ cd -> child -> state = ORTE_PROC_STATE_UNDEF ;
511
497
}
512
498
return rc ;
513
499
}
@@ -518,10 +504,10 @@ static int do_parent(orte_proc_t *child,
518
504
if (OPAL_SUCCESS != rc ) {
519
505
orte_show_help ("help-orte-odls-alps.txt" , "syscall fail" ,
520
506
true,
521
- orte_process_info .nodename , app ,
507
+ orte_process_info .nodename , cd -> app ,
522
508
"opal_fd_read" , __FILE__ , __LINE__ );
523
- if (NULL != child ) {
524
- child -> state = ORTE_PROC_STATE_UNDEF ;
509
+ if (NULL != cd -> child ) {
510
+ cd -> child -> state = ORTE_PROC_STATE_UNDEF ;
525
511
}
526
512
return rc ;
527
513
}
@@ -532,10 +518,10 @@ static int do_parent(orte_proc_t *child,
532
518
if (NULL == str ) {
533
519
orte_show_help ("help-orte-odls-alps.txt" , "syscall fail" ,
534
520
true,
535
- orte_process_info .nodename , app ,
521
+ orte_process_info .nodename , cd -> app ,
536
522
"opal_fd_read" , __FILE__ , __LINE__ );
537
- if (NULL != child ) {
538
- child -> state = ORTE_PROC_STATE_UNDEF ;
523
+ if (NULL != cd -> child ) {
524
+ cd -> child -> state = ORTE_PROC_STATE_UNDEF ;
539
525
}
540
526
return rc ;
541
527
}
@@ -556,9 +542,9 @@ static int do_parent(orte_proc_t *child,
556
542
closed, indicating that the child launched
557
543
successfully). */
558
544
if (msg .fatal ) {
559
- if (NULL != child ) {
560
- child -> state = ORTE_PROC_STATE_FAILED_TO_START ;
561
- ORTE_FLAG_UNSET (child , ORTE_PROC_FLAG_ALIVE );
545
+ if (NULL != cd -> child ) {
546
+ cd -> child -> state = ORTE_PROC_STATE_FAILED_TO_START ;
547
+ ORTE_FLAG_UNSET (cd -> child , ORTE_PROC_FLAG_ALIVE );
562
548
}
563
549
close (read_fd );
564
550
return ORTE_ERR_FAILED_TO_START ;
@@ -568,9 +554,9 @@ static int do_parent(orte_proc_t *child,
568
554
/* If we got here, it means that the pipe closed without
569
555
indication of a fatal error, meaning that the child process
570
556
launched successfully. */
571
- if (NULL != child ) {
572
- child -> state = ORTE_PROC_STATE_RUNNING ;
573
- ORTE_FLAG_SET (child , ORTE_PROC_FLAG_ALIVE );
557
+ if (NULL != cd -> child ) {
558
+ cd -> child -> state = ORTE_PROC_STATE_RUNNING ;
559
+ ORTE_FLAG_SET (cd -> child , ORTE_PROC_FLAG_ALIVE );
574
560
}
575
561
close (read_fd );
576
562
@@ -581,14 +567,10 @@ static int do_parent(orte_proc_t *child,
581
567
/**
582
568
* Fork/exec the specified processes
583
569
*/
584
- static int odls_alps_fork_local_proc (orte_proc_t * child ,
585
- char * app ,
586
- char * * argv ,
587
- char * * environ_copy ,
588
- orte_job_t * jobdat ,
589
- orte_iof_base_io_conf_t opts )
570
+ static int odls_alps_fork_local_proc (void * cdptr )
590
571
{
591
- int rc , p [2 ];
572
+ orte_odls_spawn_caddy_t * cd = (orte_odls_spawn_caddy_t * )cdptr ;
573
+ int p [2 ];
592
574
pid_t pid ;
593
575
594
576
/* A pipe is used to communicate between the parent and child to
@@ -601,24 +583,24 @@ static int odls_alps_fork_local_proc(orte_proc_t *child,
601
583
the pipe, then the child was letting us know why it failed. */
602
584
if (pipe (p ) < 0 ) {
603
585
ORTE_ERROR_LOG (ORTE_ERR_SYS_LIMITS_PIPES );
604
- if (NULL != child ) {
605
- child -> state = ORTE_PROC_STATE_FAILED_TO_START ;
606
- child -> exit_code = ORTE_ERR_SYS_LIMITS_PIPES ;
586
+ if (NULL != cd -> child ) {
587
+ cd -> child -> state = ORTE_PROC_STATE_FAILED_TO_START ;
588
+ cd -> child -> exit_code = ORTE_ERR_SYS_LIMITS_PIPES ;
607
589
}
608
590
return ORTE_ERR_SYS_LIMITS_PIPES ;
609
591
}
610
592
611
593
/* Fork off the child */
612
594
pid = fork ();
613
- if (NULL != child ) {
614
- child -> pid = pid ;
595
+ if (NULL != cd -> child ) {
596
+ cd -> child -> pid = pid ;
615
597
}
616
598
617
599
if (pid < 0 ) {
618
600
ORTE_ERROR_LOG (ORTE_ERR_SYS_LIMITS_CHILDREN );
619
- if (NULL != child ) {
620
- child -> state = ORTE_PROC_STATE_FAILED_TO_START ;
621
- child -> exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN ;
601
+ if (NULL != cd -> child ) {
602
+ cd -> child -> state = ORTE_PROC_STATE_FAILED_TO_START ;
603
+ cd -> child -> exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN ;
622
604
}
623
605
return ORTE_ERR_SYS_LIMITS_CHILDREN ;
624
606
}
@@ -628,12 +610,12 @@ static int odls_alps_fork_local_proc(orte_proc_t *child,
628
610
#if HAVE_SETPGID
629
611
setpgid (0 , 0 );
630
612
#endif
631
- do_child (child , app , argv , environ_copy , jobdat , p [1 ], opts );
613
+ do_child (cd , p [1 ]);
632
614
/* Does not return */
633
615
}
634
616
635
617
close (p [1 ]);
636
- return do_parent (child , app , argv , environ_copy , jobdat , p [0 ], opts );
618
+ return do_parent (cd , p [0 ]);
637
619
}
638
620
639
621
@@ -643,8 +625,8 @@ static int odls_alps_fork_local_proc(orte_proc_t *child,
643
625
644
626
int orte_odls_alps_launch_local_procs (opal_buffer_t * data )
645
627
{
646
- int rc ;
647
628
orte_jobid_t job ;
629
+ int rc ;
648
630
649
631
/* construct the list of children we are to launch */
650
632
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list (data , & job ))) {
@@ -729,4 +711,3 @@ static int orte_odls_alps_restart_proc(orte_proc_t *child)
729
711
}
730
712
return rc ;
731
713
}
732
-
0 commit comments