@@ -164,6 +164,7 @@ static const char *orte_plm_rsh_shell_name[7] = {
164164 */ 
165165static  void  set_handler_default (int  sig );
166166static  orte_plm_rsh_shell_t  find_shell (char  * shell );
167+ static  int  launch_agent_setup (const  char  * agent , char  * path );
167168static  void  ssh_child (int  argc , char  * * argv ) __opal_attribute_noreturn__ ;
168169static  int  rsh_probe (char  * nodename ,
169170                     orte_plm_rsh_shell_t  * shell );
@@ -177,14 +178,59 @@ static void process_launch_list(int fd, short args, void *cbdata);
177178static  int  num_in_progress = 0 ;
178179static  opal_list_t  launch_list ;
179180static  opal_event_t  launch_event ;
181+ static  char  * rsh_agent_path = NULL ;
182+ static  char  * * rsh_agent_argv = NULL ;
180183
181184/** 
182185 * Init the module 
183186 */ 
184187static  int  rsh_init (void )
185188{
189+     char  * tmp ;
186190    int  rc ;
187191
192+     /* we were selected, so setup the launch agent */ 
193+     if  (mca_plm_rsh_component .using_qrsh ) {
194+         /* perform base setup for qrsh */ 
195+         asprintf (& tmp , "%s/bin/%s" , getenv ("SGE_ROOT" ), getenv ("ARC" ));
196+         if  (ORTE_SUCCESS  !=  (rc  =  launch_agent_setup ("qrsh" , tmp ))) {
197+             ORTE_ERROR_LOG (rc );
198+             free (tmp );
199+             return  rc ;
200+         }
201+         free (tmp );
202+         /* automatically add -inherit and grid engine PE related flags */ 
203+         opal_argv_append_nosize (& rsh_agent_argv , "-inherit" );
204+         /* Don't use the "-noshell" flag as qrsh would have a problem 
205+          * swallowing a long command */ 
206+         opal_argv_append_nosize (& rsh_agent_argv , "-nostdin" );
207+         opal_argv_append_nosize (& rsh_agent_argv , "-V" );
208+         if  (0  <  opal_output_get_verbosity (orte_plm_base_framework .framework_output )) {
209+             opal_argv_append_nosize (& rsh_agent_argv , "-verbose" );
210+             tmp  =  opal_argv_join (rsh_agent_argv , ' ' );
211+             opal_output_verbose (1 , orte_plm_base_framework .framework_output ,
212+                                 "%s plm:rsh: using \"%s\" for launching\n" ,
213+                                 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), tmp );
214+             free (tmp );
215+         }
216+     } else  if (mca_plm_rsh_component .using_llspawn ) {
217+         /* perform base setup for llspawn */ 
218+         if  (ORTE_SUCCESS  !=  (rc  =  launch_agent_setup ("llspawn" , NULL ))) {
219+             ORTE_ERROR_LOG (rc );
220+             return  rc ;
221+         }
222+         opal_output_verbose (1 , orte_plm_base_framework .framework_output ,
223+                             "%s plm:rsh: using \"%s\" for launching\n" ,
224+                             ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
225+                             rsh_agent_path );
226+     } else  {
227+         /* not using qrsh or llspawn - use MCA-specified agent */ 
228+         if  (ORTE_SUCCESS  !=  (rc  =  launch_agent_setup (mca_plm_rsh_component .agent , NULL ))) {
229+             ORTE_ERROR_LOG (rc );
230+             return  rc ;
231+         }
232+     }
233+ 
188234    /* point to our launch command */ 
189235    if  (ORTE_SUCCESS  !=  (rc  =  orte_state .add_job_state (ORTE_JOB_STATE_LAUNCH_DAEMONS ,
190236                                                       launch_daemons , ORTE_SYS_PRI ))) {
@@ -325,8 +371,8 @@ static int setup_launch(int *argcptr, char ***argvptr,
325371    /* 
326372     * Build argv array 
327373     */ 
328-     argv  =  opal_argv_copy (mca_plm_rsh_component . agent_argv );
329-     argc  =  opal_argv_count (mca_plm_rsh_component . agent_argv );
374+     argv  =  opal_argv_copy (rsh_agent_argv );
375+     argc  =  opal_argv_count (argv );
330376    /* if any ssh args were provided, now is the time to add them */ 
331377    if  (NULL  !=  mca_plm_rsh_component .ssh_args ) {
332378        char  * * ssh_argv ;
@@ -676,7 +722,7 @@ static void ssh_child(int argc, char **argv)
676722     * about remote launches here 
677723     */ 
678724    exec_argv  =  argv ;
679-     exec_path  =  mca_plm_rsh_component . agent_path ;
725+     exec_path  =  strdup ( rsh_agent_path ) ;
680726
681727    /* Don't let ssh slurp all of our stdin! */ 
682728    fdin  =  open ("/dev/null" , O_RDWR );
@@ -1331,6 +1377,63 @@ static orte_plm_rsh_shell_t find_shell(char *shell)
13311377    return  ORTE_PLM_RSH_SHELL_UNKNOWN ;
13321378}
13331379
1380+ static  int  launch_agent_setup (const  char  * agent , char  * path )
1381+ {
1382+     char  * bname ;
1383+     int  i ;
1384+ 
1385+     /* if no agent was provided, then report not found */ 
1386+     if  (NULL  ==  mca_plm_rsh_component .agent  &&  NULL  ==  agent ) {
1387+         return  ORTE_ERR_NOT_FOUND ;
1388+     }
1389+ 
1390+     /* search for the argv */ 
1391+     OPAL_OUTPUT_VERBOSE ((5 , orte_plm_base_framework .framework_output ,
1392+                          "%s plm:rsh_setup on agent %s path %s" ,
1393+                          ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
1394+                          (NULL  ==  agent ) ? mca_plm_rsh_component .agent  : agent ,
1395+                          (NULL  ==  path ) ? "NULL"  : path ));
1396+     rsh_agent_argv  =  orte_plm_rsh_search (agent , path );
1397+ 
1398+     if  (0  ==  opal_argv_count (rsh_agent_argv )) {
1399+         /* nothing was found */ 
1400+         return  ORTE_ERR_NOT_FOUND ;
1401+     }
1402+ 
1403+     /* see if we can find the agent in the path */ 
1404+     rsh_agent_path  =  opal_path_findv (rsh_agent_argv [0 ], X_OK , environ , path );
1405+ 
1406+     if  (NULL  ==  rsh_agent_path ) {
1407+         /* not an error - just report not found */ 
1408+         opal_argv_free (rsh_agent_argv );
1409+         return  ORTE_ERR_NOT_FOUND ;
1410+     }
1411+ 
1412+     bname  =  opal_basename (rsh_agent_argv [0 ]);
1413+     if  (NULL  !=  bname  &&  0  ==  strcmp (bname , "ssh" )) {
1414+         /* if xterm option was given, add '-X', ensuring we don't do it twice */ 
1415+         if  (NULL  !=  orte_xterm ) {
1416+             opal_argv_append_unique_nosize (& rsh_agent_argv , "-X" , false);
1417+         } else  if  (0  >= opal_output_get_verbosity (orte_plm_base_framework .framework_output )) {
1418+             /* if debug was not specified, and the user didn't explicitly 
1419+              * specify X11 forwarding/non-forwarding, add "-x" if it 
1420+              * isn't already there (check either case) 
1421+              */ 
1422+             for  (i  =  1 ; NULL  !=  rsh_agent_argv [i ]; ++ i ) {
1423+                 if  (0  ==  strcasecmp ("-x" , rsh_agent_argv [i ])) {
1424+                     break ;
1425+                 }
1426+             }
1427+             if  (NULL  ==  rsh_agent_argv [i ]) {
1428+                 opal_argv_append_nosize (& rsh_agent_argv , "-x" );
1429+             }
1430+         }
1431+     }
1432+ 
1433+     /* the caller can append any additional argv's they desire */ 
1434+     return  ORTE_SUCCESS ;
1435+ }
1436+ 
13341437/** 
13351438 * Check the Shell variable and system type on the specified node 
13361439 */ 
@@ -1454,15 +1557,10 @@ static int setup_shell(orte_plm_rsh_shell_t *rshell,
14541557        struct  passwd  * p ;
14551558
14561559        p  =  getpwuid (getuid ());
1457-         if ( NULL  ==  p  ) {
1458-             /* This user is unknown to the system. Therefore, there is no reason we 
1459-              * spawn whatsoever in his name. Give up with a HUGE error message. 
1460-              */ 
1461-             orte_show_help ( "help-plm-rsh.txt" , "unknown-user" , true, (int )getuid () );
1462-             return  ORTE_ERR_FATAL ;
1560+         if ( NULL  !=  p  ) {
1561+             param  =  p -> pw_shell ;
1562+             local_shell  =  find_shell (p -> pw_shell );
14631563        }
1464-         param  =  p -> pw_shell ;
1465-         local_shell  =  find_shell (p -> pw_shell );
14661564    }
14671565#endif 
14681566
0 commit comments