Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit a85789d

Browse files
authored
Merge pull request #1318 from rhc54/cmr20/rsh
If the ssh agent hasn't been given, then check for qrsh and friends
2 parents a3d9ed7 + 42799f4 commit a85789d

File tree

2 files changed

+113
-13
lines changed

2 files changed

+113
-13
lines changed

orte/mca/plm/rsh/plm_rsh_component.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,10 @@ static int rsh_component_query(mca_base_module_t **module, int *priority)
258258
char *tmp;
259259

260260
/* Check if we are under Grid Engine parallel environment by looking at several
261-
* environment variables. If so, setup the path and argv[0]. */
262-
if (NULL == mca_plm_rsh_component.agent) {
261+
* environment variables. If so, setup the path and argv[0].
262+
* Note that we allow the user to specify the launch agent
263+
* even if they are in a Grid Engine environment */
264+
if (0 == strcmp(mca_plm_rsh_component.agent, "ssh : rsh")) {
263265
if (!mca_plm_rsh_component.disable_qrsh &&
264266
NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") &&
265267
NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) {

orte/mca/plm/rsh/plm_rsh_module.c

Lines changed: 109 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ static const char *orte_plm_rsh_shell_name[7] = {
164164
*/
165165
static void set_handler_default(int sig);
166166
static orte_plm_rsh_shell_t find_shell(char *shell);
167+
static int launch_agent_setup(const char *agent, char *path);
167168
static void ssh_child(int argc, char **argv) __opal_attribute_noreturn__;
168169
static int rsh_probe(char *nodename,
169170
orte_plm_rsh_shell_t *shell);
@@ -177,14 +178,59 @@ static void process_launch_list(int fd, short args, void *cbdata);
177178
static int num_in_progress=0;
178179
static opal_list_t launch_list;
179180
static opal_event_t launch_event;
181+
static char *rsh_agent_path=NULL;
182+
static char **rsh_agent_argv=NULL;
180183

181184
/**
182185
* Init the module
183186
*/
184187
static int rsh_init(void)
185188
{
189+
char *tmp;
186190
int rc;
187191

192+
/* we were selected, so setup the launch agent */
193+
if (mca_plm_rsh_component.using_qrsh) {
194+
/* perform base setup for qrsh */
195+
asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC"));
196+
if (ORTE_SUCCESS != (rc = launch_agent_setup("qrsh", tmp))) {
197+
ORTE_ERROR_LOG(rc);
198+
free(tmp);
199+
return rc;
200+
}
201+
free(tmp);
202+
/* automatically add -inherit and grid engine PE related flags */
203+
opal_argv_append_nosize(&rsh_agent_argv, "-inherit");
204+
/* Don't use the "-noshell" flag as qrsh would have a problem
205+
* swallowing a long command */
206+
opal_argv_append_nosize(&rsh_agent_argv, "-nostdin");
207+
opal_argv_append_nosize(&rsh_agent_argv, "-V");
208+
if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
209+
opal_argv_append_nosize(&rsh_agent_argv, "-verbose");
210+
tmp = opal_argv_join(rsh_agent_argv, ' ');
211+
opal_output_verbose(1, orte_plm_base_framework.framework_output,
212+
"%s plm:rsh: using \"%s\" for launching\n",
213+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
214+
free(tmp);
215+
}
216+
} else if(mca_plm_rsh_component.using_llspawn) {
217+
/* perform base setup for llspawn */
218+
if (ORTE_SUCCESS != (rc = launch_agent_setup("llspawn", NULL))) {
219+
ORTE_ERROR_LOG(rc);
220+
return rc;
221+
}
222+
opal_output_verbose(1, orte_plm_base_framework.framework_output,
223+
"%s plm:rsh: using \"%s\" for launching\n",
224+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
225+
rsh_agent_path);
226+
} else {
227+
/* not using qrsh or llspawn - use MCA-specified agent */
228+
if (ORTE_SUCCESS != (rc = launch_agent_setup(mca_plm_rsh_component.agent, NULL))) {
229+
ORTE_ERROR_LOG(rc);
230+
return rc;
231+
}
232+
}
233+
188234
/* point to our launch command */
189235
if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_LAUNCH_DAEMONS,
190236
launch_daemons, ORTE_SYS_PRI))) {
@@ -325,8 +371,8 @@ static int setup_launch(int *argcptr, char ***argvptr,
325371
/*
326372
* Build argv array
327373
*/
328-
argv = opal_argv_copy(mca_plm_rsh_component.agent_argv);
329-
argc = opal_argv_count(mca_plm_rsh_component.agent_argv);
374+
argv = opal_argv_copy(rsh_agent_argv);
375+
argc = opal_argv_count(argv);
330376
/* if any ssh args were provided, now is the time to add them */
331377
if (NULL != mca_plm_rsh_component.ssh_args) {
332378
char **ssh_argv;
@@ -676,7 +722,7 @@ static void ssh_child(int argc, char **argv)
676722
* about remote launches here
677723
*/
678724
exec_argv = argv;
679-
exec_path = mca_plm_rsh_component.agent_path;
725+
exec_path = strdup(rsh_agent_path);
680726

681727
/* Don't let ssh slurp all of our stdin! */
682728
fdin = open("/dev/null", O_RDWR);
@@ -1331,6 +1377,63 @@ static orte_plm_rsh_shell_t find_shell(char *shell)
13311377
return ORTE_PLM_RSH_SHELL_UNKNOWN;
13321378
}
13331379

1380+
static int launch_agent_setup(const char *agent, char *path)
1381+
{
1382+
char *bname;
1383+
int i;
1384+
1385+
/* if no agent was provided, then report not found */
1386+
if (NULL == mca_plm_rsh_component.agent && NULL == agent) {
1387+
return ORTE_ERR_NOT_FOUND;
1388+
}
1389+
1390+
/* search for the argv */
1391+
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1392+
"%s plm:rsh_setup on agent %s path %s",
1393+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1394+
(NULL == agent) ? mca_plm_rsh_component.agent : agent,
1395+
(NULL == path) ? "NULL" : path));
1396+
rsh_agent_argv = orte_plm_rsh_search(agent, path);
1397+
1398+
if (0 == opal_argv_count(rsh_agent_argv)) {
1399+
/* nothing was found */
1400+
return ORTE_ERR_NOT_FOUND;
1401+
}
1402+
1403+
/* see if we can find the agent in the path */
1404+
rsh_agent_path = opal_path_findv(rsh_agent_argv[0], X_OK, environ, path);
1405+
1406+
if (NULL == rsh_agent_path) {
1407+
/* not an error - just report not found */
1408+
opal_argv_free(rsh_agent_argv);
1409+
return ORTE_ERR_NOT_FOUND;
1410+
}
1411+
1412+
bname = opal_basename(rsh_agent_argv[0]);
1413+
if (NULL != bname && 0 == strcmp(bname, "ssh")) {
1414+
/* if xterm option was given, add '-X', ensuring we don't do it twice */
1415+
if (NULL != orte_xterm) {
1416+
opal_argv_append_unique_nosize(&rsh_agent_argv, "-X", false);
1417+
} else if (0 >= opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
1418+
/* if debug was not specified, and the user didn't explicitly
1419+
* specify X11 forwarding/non-forwarding, add "-x" if it
1420+
* isn't already there (check either case)
1421+
*/
1422+
for (i = 1; NULL != rsh_agent_argv[i]; ++i) {
1423+
if (0 == strcasecmp("-x", rsh_agent_argv[i])) {
1424+
break;
1425+
}
1426+
}
1427+
if (NULL == rsh_agent_argv[i]) {
1428+
opal_argv_append_nosize(&rsh_agent_argv, "-x");
1429+
}
1430+
}
1431+
}
1432+
1433+
/* the caller can append any additional argv's they desire */
1434+
return ORTE_SUCCESS;
1435+
}
1436+
13341437
/**
13351438
* Check the Shell variable and system type on the specified node
13361439
*/
@@ -1454,15 +1557,10 @@ static int setup_shell(orte_plm_rsh_shell_t *rshell,
14541557
struct passwd *p;
14551558

14561559
p = getpwuid(getuid());
1457-
if( NULL == p ) {
1458-
/* This user is unknown to the system. Therefore, there is no reason we
1459-
* spawn whatsoever in his name. Give up with a HUGE error message.
1460-
*/
1461-
orte_show_help( "help-plm-rsh.txt", "unknown-user", true, (int)getuid() );
1462-
return ORTE_ERR_FATAL;
1560+
if( NULL != p ) {
1561+
param = p->pw_shell;
1562+
local_shell = find_shell(p->pw_shell);
14631563
}
1464-
param = p->pw_shell;
1465-
local_shell = find_shell(p->pw_shell);
14661564
}
14671565
#endif
14681566

0 commit comments

Comments
 (0)