Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions orte/mca/plm/rsh/plm_rsh_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,10 @@ static int rsh_component_query(mca_base_module_t **module, int *priority)
char *tmp;

/* Check if we are under Grid Engine parallel environment by looking at several
* environment variables. If so, setup the path and argv[0]. */
if (NULL == mca_plm_rsh_component.agent) {
* environment variables. If so, setup the path and argv[0].
* Note that we allow the user to specify the launch agent
* even if they are in a Grid Engine environment */
if (0 == strcmp(mca_plm_rsh_component.agent, "ssh : rsh")) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit: open-mpi/ompi#1972 might be a more robust way to check to see if we're not using the default value...?

if (!mca_plm_rsh_component.disable_qrsh &&
NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") &&
NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) {
Expand Down
120 changes: 109 additions & 11 deletions orte/mca/plm/rsh/plm_rsh_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ static const char *orte_plm_rsh_shell_name[7] = {
*/
static void set_handler_default(int sig);
static orte_plm_rsh_shell_t find_shell(char *shell);
static int launch_agent_setup(const char *agent, char *path);
static void ssh_child(int argc, char **argv) __opal_attribute_noreturn__;
static int rsh_probe(char *nodename,
orte_plm_rsh_shell_t *shell);
Expand All @@ -177,14 +178,59 @@ static void process_launch_list(int fd, short args, void *cbdata);
static int num_in_progress=0;
static opal_list_t launch_list;
static opal_event_t launch_event;
static char *rsh_agent_path=NULL;
static char **rsh_agent_argv=NULL;

/**
* Init the module
*/
static int rsh_init(void)
{
char *tmp;
int rc;

/* we were selected, so setup the launch agent */
if (mca_plm_rsh_component.using_qrsh) {
/* perform base setup for qrsh */
asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC"));
if (ORTE_SUCCESS != (rc = launch_agent_setup("qrsh", tmp))) {
ORTE_ERROR_LOG(rc);
free(tmp);
return rc;
}
free(tmp);
/* automatically add -inherit and grid engine PE related flags */
opal_argv_append_nosize(&rsh_agent_argv, "-inherit");
/* Don't use the "-noshell" flag as qrsh would have a problem
* swallowing a long command */
opal_argv_append_nosize(&rsh_agent_argv, "-nostdin");
opal_argv_append_nosize(&rsh_agent_argv, "-V");
if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
opal_argv_append_nosize(&rsh_agent_argv, "-verbose");
tmp = opal_argv_join(rsh_agent_argv, ' ');
opal_output_verbose(1, orte_plm_base_framework.framework_output,
"%s plm:rsh: using \"%s\" for launching\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
free(tmp);
}
} else if(mca_plm_rsh_component.using_llspawn) {
/* perform base setup for llspawn */
if (ORTE_SUCCESS != (rc = launch_agent_setup("llspawn", NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_output_verbose(1, orte_plm_base_framework.framework_output,
"%s plm:rsh: using \"%s\" for launching\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
rsh_agent_path);
} else {
/* not using qrsh or llspawn - use MCA-specified agent */
if (ORTE_SUCCESS != (rc = launch_agent_setup(mca_plm_rsh_component.agent, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}

/* point to our launch command */
if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_LAUNCH_DAEMONS,
launch_daemons, ORTE_SYS_PRI))) {
Expand Down Expand Up @@ -325,8 +371,8 @@ static int setup_launch(int *argcptr, char ***argvptr,
/*
* Build argv array
*/
argv = opal_argv_copy(mca_plm_rsh_component.agent_argv);
argc = opal_argv_count(mca_plm_rsh_component.agent_argv);
argv = opal_argv_copy(rsh_agent_argv);
argc = opal_argv_count(argv);
/* if any ssh args were provided, now is the time to add them */
if (NULL != mca_plm_rsh_component.ssh_args) {
char **ssh_argv;
Expand Down Expand Up @@ -676,7 +722,7 @@ static void ssh_child(int argc, char **argv)
* about remote launches here
*/
exec_argv = argv;
exec_path = mca_plm_rsh_component.agent_path;
exec_path = strdup(rsh_agent_path);

/* Don't let ssh slurp all of our stdin! */
fdin = open("/dev/null", O_RDWR);
Expand Down Expand Up @@ -1331,6 +1377,63 @@ static orte_plm_rsh_shell_t find_shell(char *shell)
return ORTE_PLM_RSH_SHELL_UNKNOWN;
}

static int launch_agent_setup(const char *agent, char *path)
{
char *bname;
int i;

/* if no agent was provided, then report not found */
if (NULL == mca_plm_rsh_component.agent && NULL == agent) {
return ORTE_ERR_NOT_FOUND;
}

/* search for the argv */
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:rsh_setup on agent %s path %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == agent) ? mca_plm_rsh_component.agent : agent,
(NULL == path) ? "NULL" : path));
rsh_agent_argv = orte_plm_rsh_search(agent, path);

if (0 == opal_argv_count(rsh_agent_argv)) {
/* nothing was found */
return ORTE_ERR_NOT_FOUND;
}

/* see if we can find the agent in the path */
rsh_agent_path = opal_path_findv(rsh_agent_argv[0], X_OK, environ, path);

if (NULL == rsh_agent_path) {
/* not an error - just report not found */
opal_argv_free(rsh_agent_argv);
return ORTE_ERR_NOT_FOUND;
}

bname = opal_basename(rsh_agent_argv[0]);
if (NULL != bname && 0 == strcmp(bname, "ssh")) {
/* if xterm option was given, add '-X', ensuring we don't do it twice */
if (NULL != orte_xterm) {
opal_argv_append_unique_nosize(&rsh_agent_argv, "-X", false);
} else if (0 >= opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
/* if debug was not specified, and the user didn't explicitly
* specify X11 forwarding/non-forwarding, add "-x" if it
* isn't already there (check either case)
*/
for (i = 1; NULL != rsh_agent_argv[i]; ++i) {
if (0 == strcasecmp("-x", rsh_agent_argv[i])) {
break;
}
}
if (NULL == rsh_agent_argv[i]) {
opal_argv_append_nosize(&rsh_agent_argv, "-x");
}
}
}

/* the caller can append any additional argv's they desire */
return ORTE_SUCCESS;
}

/**
* Check the Shell variable and system type on the specified node
*/
Expand Down Expand Up @@ -1454,15 +1557,10 @@ static int setup_shell(orte_plm_rsh_shell_t *rshell,
struct passwd *p;

p = getpwuid(getuid());
if( NULL == p ) {
/* This user is unknown to the system. Therefore, there is no reason we
* spawn whatsoever in his name. Give up with a HUGE error message.
*/
orte_show_help( "help-plm-rsh.txt", "unknown-user", true, (int)getuid() );
return ORTE_ERR_FATAL;
if( NULL != p ) {
param = p->pw_shell;
local_shell = find_shell(p->pw_shell);
}
param = p->pw_shell;
local_shell = find_shell(p->pw_shell);
}
#endif

Expand Down