@@ -164,6 +164,7 @@ static const char *orte_plm_rsh_shell_name[7] = {
164164 */
165165static void set_handler_default (int sig );
166166static orte_plm_rsh_shell_t find_shell (char * shell );
167+ static int launch_agent_setup (const char * agent , char * path );
167168static void ssh_child (int argc , char * * argv ) __opal_attribute_noreturn__ ;
168169static int rsh_probe (char * nodename ,
169170 orte_plm_rsh_shell_t * shell );
@@ -177,14 +178,59 @@ static void process_launch_list(int fd, short args, void *cbdata);
177178static int num_in_progress = 0 ;
178179static opal_list_t launch_list ;
179180static opal_event_t launch_event ;
181+ static char * rsh_agent_path = NULL ;
182+ static char * * rsh_agent_argv = NULL ;
180183
181184/**
182185 * Init the module
183186 */
184187static int rsh_init (void )
185188{
189+ char * tmp ;
186190 int rc ;
187191
192+ /* we were selected, so setup the launch agent */
193+ if (mca_plm_rsh_component .using_qrsh ) {
194+ /* perform base setup for qrsh */
195+ asprintf (& tmp , "%s/bin/%s" , getenv ("SGE_ROOT" ), getenv ("ARC" ));
196+ if (ORTE_SUCCESS != (rc = launch_agent_setup ("qrsh" , tmp ))) {
197+ ORTE_ERROR_LOG (rc );
198+ free (tmp );
199+ return rc ;
200+ }
201+ free (tmp );
202+ /* automatically add -inherit and grid engine PE related flags */
203+ opal_argv_append_nosize (& rsh_agent_argv , "-inherit" );
204+ /* Don't use the "-noshell" flag as qrsh would have a problem
205+ * swallowing a long command */
206+ opal_argv_append_nosize (& rsh_agent_argv , "-nostdin" );
207+ opal_argv_append_nosize (& rsh_agent_argv , "-V" );
208+ if (0 < opal_output_get_verbosity (orte_plm_base_framework .framework_output )) {
209+ opal_argv_append_nosize (& rsh_agent_argv , "-verbose" );
210+ tmp = opal_argv_join (rsh_agent_argv , ' ' );
211+ opal_output_verbose (1 , orte_plm_base_framework .framework_output ,
212+ "%s plm:rsh: using \"%s\" for launching\n" ,
213+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), tmp );
214+ free (tmp );
215+ }
216+ } else if (mca_plm_rsh_component .using_llspawn ) {
217+ /* perform base setup for llspawn */
218+ if (ORTE_SUCCESS != (rc = launch_agent_setup ("llspawn" , NULL ))) {
219+ ORTE_ERROR_LOG (rc );
220+ return rc ;
221+ }
222+ opal_output_verbose (1 , orte_plm_base_framework .framework_output ,
223+ "%s plm:rsh: using \"%s\" for launching\n" ,
224+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
225+ rsh_agent_path );
226+ } else {
227+ /* not using qrsh or llspawn - use MCA-specified agent */
228+ if (ORTE_SUCCESS != (rc = launch_agent_setup (mca_plm_rsh_component .agent , NULL ))) {
229+ ORTE_ERROR_LOG (rc );
230+ return rc ;
231+ }
232+ }
233+
188234 /* point to our launch command */
189235 if (ORTE_SUCCESS != (rc = orte_state .add_job_state (ORTE_JOB_STATE_LAUNCH_DAEMONS ,
190236 launch_daemons , ORTE_SYS_PRI ))) {
@@ -325,8 +371,8 @@ static int setup_launch(int *argcptr, char ***argvptr,
325371 /*
326372 * Build argv array
327373 */
328- argv = opal_argv_copy (mca_plm_rsh_component . agent_argv );
329- argc = opal_argv_count (mca_plm_rsh_component . agent_argv );
374+ argv = opal_argv_copy (rsh_agent_argv );
375+ argc = opal_argv_count (argv );
330376 /* if any ssh args were provided, now is the time to add them */
331377 if (NULL != mca_plm_rsh_component .ssh_args ) {
332378 char * * ssh_argv ;
@@ -676,7 +722,7 @@ static void ssh_child(int argc, char **argv)
676722 * about remote launches here
677723 */
678724 exec_argv = argv ;
679- exec_path = mca_plm_rsh_component . agent_path ;
725+ exec_path = strdup ( rsh_agent_path ) ;
680726
681727 /* Don't let ssh slurp all of our stdin! */
682728 fdin = open ("/dev/null" , O_RDWR );
@@ -1331,6 +1377,63 @@ static orte_plm_rsh_shell_t find_shell(char *shell)
13311377 return ORTE_PLM_RSH_SHELL_UNKNOWN ;
13321378}
13331379
1380+ static int launch_agent_setup (const char * agent , char * path )
1381+ {
1382+ char * bname ;
1383+ int i ;
1384+
1385+ /* if no agent was provided, then report not found */
1386+ if (NULL == mca_plm_rsh_component .agent && NULL == agent ) {
1387+ return ORTE_ERR_NOT_FOUND ;
1388+ }
1389+
1390+ /* search for the argv */
1391+ OPAL_OUTPUT_VERBOSE ((5 , orte_plm_base_framework .framework_output ,
1392+ "%s plm:rsh_setup on agent %s path %s" ,
1393+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
1394+ (NULL == agent ) ? mca_plm_rsh_component .agent : agent ,
1395+ (NULL == path ) ? "NULL" : path ));
1396+ rsh_agent_argv = orte_plm_rsh_search (agent , path );
1397+
1398+ if (0 == opal_argv_count (rsh_agent_argv )) {
1399+ /* nothing was found */
1400+ return ORTE_ERR_NOT_FOUND ;
1401+ }
1402+
1403+ /* see if we can find the agent in the path */
1404+ rsh_agent_path = opal_path_findv (rsh_agent_argv [0 ], X_OK , environ , path );
1405+
1406+ if (NULL == rsh_agent_path ) {
1407+ /* not an error - just report not found */
1408+ opal_argv_free (rsh_agent_argv );
1409+ return ORTE_ERR_NOT_FOUND ;
1410+ }
1411+
1412+ bname = opal_basename (rsh_agent_argv [0 ]);
1413+ if (NULL != bname && 0 == strcmp (bname , "ssh" )) {
1414+ /* if xterm option was given, add '-X', ensuring we don't do it twice */
1415+ if (NULL != orte_xterm ) {
1416+ opal_argv_append_unique_nosize (& rsh_agent_argv , "-X" , false);
1417+ } else if (0 >= opal_output_get_verbosity (orte_plm_base_framework .framework_output )) {
1418+ /* if debug was not specified, and the user didn't explicitly
1419+ * specify X11 forwarding/non-forwarding, add "-x" if it
1420+ * isn't already there (check either case)
1421+ */
1422+ for (i = 1 ; NULL != rsh_agent_argv [i ]; ++ i ) {
1423+ if (0 == strcasecmp ("-x" , rsh_agent_argv [i ])) {
1424+ break ;
1425+ }
1426+ }
1427+ if (NULL == rsh_agent_argv [i ]) {
1428+ opal_argv_append_nosize (& rsh_agent_argv , "-x" );
1429+ }
1430+ }
1431+ }
1432+
1433+ /* the caller can append any additional argv's they desire */
1434+ return ORTE_SUCCESS ;
1435+ }
1436+
13341437/**
13351438 * Check the Shell variable and system type on the specified node
13361439 */
@@ -1454,15 +1557,10 @@ static int setup_shell(orte_plm_rsh_shell_t *rshell,
14541557 struct passwd * p ;
14551558
14561559 p = getpwuid (getuid ());
1457- if ( NULL == p ) {
1458- /* This user is unknown to the system. Therefore, there is no reason we
1459- * spawn whatsoever in his name. Give up with a HUGE error message.
1460- */
1461- orte_show_help ( "help-plm-rsh.txt" , "unknown-user" , true, (int )getuid () );
1462- return ORTE_ERR_FATAL ;
1560+ if ( NULL != p ) {
1561+ param = p -> pw_shell ;
1562+ local_shell = find_shell (p -> pw_shell );
14631563 }
1464- param = p -> pw_shell ;
1465- local_shell = find_shell (p -> pw_shell );
14661564 }
14671565#endif
14681566
0 commit comments