Skip to content

Commit 2fa8b6c

Browse files
author
rhc54
committed
Merge pull request #1525 from rhc54/topic/schizo
Extend the schizo framework
2 parents c239ef5 + 6ac7929 commit 2fa8b6c

File tree

35 files changed

+2124
-4859
lines changed

35 files changed

+2124
-4859
lines changed

config/orte_config_files.m4

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# Corporation. All rights reserved.
77
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
88
# reserved.
9-
# Copyright (c) 2015 Intel, Inc. All rights reserved
9+
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved
1010
# $COPYRIGHT$
1111
#
1212
# Additional copyrights may follow
@@ -33,7 +33,6 @@ AC_DEFUN([ORTE_CONFIG_FILES],[
3333
orte/tools/orte-migrate/Makefile
3434
orte/tools/orte-info/Makefile
3535
orte/tools/orte-server/Makefile
36-
orte/tools/orte-submit/Makefile
3736
orte/tools/orte-dvm/Makefile
3837
])
3938
])

ompi/mca/rte/orte/Makefile.am

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#
22
# Copyright (c) 2012 Los Alamos National Security, LLC.
33
# All rights reserved.
4-
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
4+
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
5+
# Copyright (c) 2016 Intel, Inc. All rights reserved.
56
# $COPYRIGHT$
67
#
78
# Additional copyrights may follow
@@ -27,7 +28,7 @@ libmca_rte_orte_la_SOURCES =$(sources) $(headers)
2728
libmca_rte_orte_la_LDFLAGS = -module -avoid-version
2829
libmca_rte_orte_la_LIBADD = $(top_builddir)/orte/lib@[email protected]
2930

30-
man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1 ompi-dvm.1 ompi-submit.1
31+
man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1 ompi-dvm.1
3132

3233
if WANT_FT
3334
man_pages += ompi-checkpoint.1 ompi-restart.1
@@ -44,7 +45,6 @@ install-exec-hook:
4445
(cd $(DESTDIR)$(bindir); rm -f ompi-top$(EXEEXT); $(LN_S) orte-top$(EXEEXT) ompi-top$(EXEEXT))
4546
(cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT))
4647
(cd $(DESTDIR)$(bindir); rm -f ompi-dvm$(EXEEXT); $(LN_S) orte-dvm$(EXEEXT) ompi-dvm$(EXEEXT))
47-
(cd $(DESTDIR)$(bindir); rm -f ompi-submit$(EXEEXT); $(LN_S) orte-submit$(EXEEXT) ompi-submit$(EXEEXT))
4848
if WANT_FT
4949
(cd $(DESTDIR)$(bindir); rm -f ompi-checkpoint$(EXEEXT); $(LN_S) orte-checkpoint$(EXEEXT) ompi-checkpoint$(EXEEXT))
5050
(cd $(DESTDIR)$(bindir); rm -f ompi-restart$(EXEEXT); $(LN_S) orte-restart$(EXEEXT) ompi-restart$(EXEEXT))
@@ -58,8 +58,7 @@ uninstall-local:
5858
$(DESTDIR)$(bindir)/ompi-clean$(EXEEXT) \
5959
$(DESTDIR)$(bindir)/ompi-top$(EXEEXT) \
6060
$(DESTDIR)$(bindir)/ompi-server$(EXEEXT) \
61-
$(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT) \
62-
$(DESTDIR)$(bindir)/ompi-submit$(EXEEXT)
61+
$(DESTDIR)$(bindir)/ompi-dvm$(EXEEXT)
6362
if WANT_FT
6463
rm -f $(DESTDIR)$(bindir)/ompi-checkpoint$(EXEEXT) \
6564
$(DESTDIR)$(bindir)/ompi-restart$(EXEEXT) \
@@ -122,8 +121,5 @@ ompi-server.1: $(top_builddir)/orte/tools/orte-server/orte-server.1
122121
ompi-dvm.1: $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1
123122
cp -f $(top_builddir)/orte/tools/orte-dvm/orte-dvm.1 ompi-dvm.1
124123

125-
ompi-submit.1: $(top_builddir)/orte/tools/orte-submit/orte-submit.1
126-
cp -f $(top_builddir)/orte/tools/orte-submit/orte-submit.1 ompi-submit.1
127-
128124
clean-local:
129125
rm -f $(man_pages)

opal/mca/base/base.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ OPAL_DECLSPEC int mca_base_is_component_required(opal_list_t *components_availab
156156
/* mca_base_cmd_line.c */
157157

158158
OPAL_DECLSPEC int mca_base_cmd_line_setup(opal_cmd_line_t *cmd);
159-
OPAL_DECLSPEC int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd,
159+
OPAL_DECLSPEC int mca_base_cmd_line_process_args(char **argv,
160160
char ***app_env,
161161
char ***global_env);
162162
OPAL_DECLSPEC void mca_base_cmd_line_wrap_args(char **args);

opal/mca/base/mca_base_cmd_line.c

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -94,29 +94,25 @@ int mca_base_cmd_line_setup(opal_cmd_line_t *cmd)
9494
/*
9595
* Look for and handle any -mca options on the command line
9696
*/
97-
int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd,
97+
int mca_base_cmd_line_process_args(char **argv,
9898
char ***context_env, char ***global_env)
9999
{
100-
int i, num_insts, rc;
100+
int i, rc;
101101
char **params;
102102
char **values;
103103

104-
/* If no relevant parameters were given, just return */
105-
106-
if (!opal_cmd_line_is_taken(cmd, OPAL_MCA_CMD_LINE_ID) &&
107-
!opal_cmd_line_is_taken(cmd, "g"OPAL_MCA_CMD_LINE_ID)) {
108-
return OPAL_SUCCESS;
109-
}
110-
111-
/* Handle app context-specific parameters */
112-
113-
num_insts = opal_cmd_line_get_ninsts(cmd, OPAL_MCA_CMD_LINE_ID);
114104
params = values = NULL;
115-
for (i = 0; i < num_insts; ++i) {
116-
if (OPAL_SUCCESS != (rc = process_arg(opal_cmd_line_get_param(cmd, OPAL_MCA_CMD_LINE_ID, i, 0),
117-
opal_cmd_line_get_param(cmd, OPAL_MCA_CMD_LINE_ID, i, 1),
118-
&params, &values))) {
119-
return rc;
105+
for (i = 0; NULL != argv[i]; ++i) {
106+
if (0 == strcmp("-"OPAL_MCA_CMD_LINE_ID, argv[i]) ||
107+
0 == strcmp("--"OPAL_MCA_CMD_LINE_ID, argv[i])) {
108+
if (NULL == argv[i+1] || NULL == argv[i+2]) {
109+
return OPAL_ERR_BAD_PARAM;
110+
}
111+
if (OPAL_SUCCESS != (rc = process_arg(argv[i+1], argv[i+2],
112+
&params, &values))) {
113+
return rc;
114+
}
115+
i += 2;
120116
}
121117
}
122118
if (NULL != params) {
@@ -125,15 +121,19 @@ int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd,
125121
opal_argv_free(values);
126122
}
127123

128-
/* Handle global parameters */
129124

130-
num_insts = opal_cmd_line_get_ninsts(cmd, "g"OPAL_MCA_CMD_LINE_ID);
131125
params = values = NULL;
132-
for (i = 0; i < num_insts; ++i) {
133-
if (OPAL_SUCCESS != (rc = process_arg(opal_cmd_line_get_param(cmd, "g"OPAL_MCA_CMD_LINE_ID, i, 0),
134-
opal_cmd_line_get_param(cmd, "g"OPAL_MCA_CMD_LINE_ID, i, 1),
135-
&params, &values))) {
136-
return rc;
126+
for (i = 0; NULL != argv[i]; ++i) {
127+
if (0 == strcmp("-g"OPAL_MCA_CMD_LINE_ID, argv[i]) ||
128+
0 == strcmp("--g"OPAL_MCA_CMD_LINE_ID, argv[i])) {
129+
if (NULL == argv[i+1] || NULL == argv[i+2]) {
130+
return OPAL_ERR_BAD_PARAM;
131+
}
132+
if (OPAL_SUCCESS != (rc = process_arg(argv[i+1], argv[i+2],
133+
&params, &values))) {
134+
return rc;
135+
}
136+
i += 2;
137137
}
138138
}
139139
if (NULL != params) {
@@ -190,7 +190,6 @@ static int process_arg(const char *param, const char *value,
190190

191191
/* If we didn't already have an value for the same param, save
192192
this one away */
193-
194193
opal_argv_append_nosize(params, param);
195194
opal_argv_append_nosize(values, p1);
196195
free(p1);

opal/runtime/opal_info_support.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ int opal_info_init(int argc, char **argv,
207207
exit(cmd_error ? 1 : 0);
208208
}
209209

210-
mca_base_cmd_line_process_args(opal_info_cmd_line, &app_env, &global_env);
210+
mca_base_cmd_line_process_args(argv, &app_env, &global_env);
211211

212212

213213
/* set the flags */

opal/util/cmd_line.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,9 @@ int opal_cmd_line_create(opal_cmd_line_t *cmd,
156156
}
157157
OBJ_CONSTRUCT(cmd, opal_cmd_line_t);
158158

159-
ret = opal_cmd_line_add(cmd, table);
159+
if (NULL != table) {
160+
ret = opal_cmd_line_add(cmd, table);
161+
}
160162
return ret;
161163
}
162164

orte/mca/ess/hnp/ess_hnp_module.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
#include "orte/mca/state/base/base.h"
7777
#include "orte/mca/state/state.h"
7878

79+
#include "orte/orted/orted_submit.h"
7980
#include "orte/orted/pmix/pmix_server.h"
8081

8182
#include "orte/util/show_help.h"
@@ -713,6 +714,14 @@ static int rte_init(void)
713714
goto error;
714715
}
715716

717+
/* setup to support debugging */
718+
orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS,
719+
orte_debugger_init_after_spawn,
720+
ORTE_SYS_PRI);
721+
orte_state.add_job_state(ORTE_JOB_STATE_DEBUGGER_DETACH,
722+
orte_debugger_detached,
723+
ORTE_SYS_PRI);
724+
716725
/* if a tool has launched us and is requesting event reports,
717726
* then set its contact info into the comm system
718727
*/

orte/mca/grpcomm/base/grpcomm_base_stubs.c

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
324324
*dmns = NULL;
325325
return ORTE_ERR_NOT_FOUND;
326326
}
327-
if (NULL == jdata->map) {
327+
if (0 == jdata->map->num_nodes) {
328328
/* we haven't generated a job map yet - if we are the HNP,
329329
* then we should only involve ourselves. Otherwise, we have
330330
* no choice but to abort to avoid hangs */
@@ -340,12 +340,6 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
340340
*dmns = NULL;
341341
return ORTE_ERR_NOT_FOUND;
342342
}
343-
/* get the array */
344-
if (0 == jdata->map->num_nodes) {
345-
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
346-
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
347-
return ORTE_ERR_SILENT;
348-
}
349343
dns = (orte_vpid_t*)malloc(jdata->map->num_nodes * sizeof(vpid));
350344
nds = 0;
351345
for (i=0; i < jdata->map->nodes->size && (int)nds < jdata->map->num_nodes; i++) {

orte/mca/plm/base/plm_base_launch_support.c

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,6 +1537,9 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
15371537
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
15381538
return ORTE_ERR_NOT_FOUND;
15391539
}
1540+
if (NULL == daemons->map) {
1541+
daemons->map = OBJ_NEW(orte_job_map_t);
1542+
}
15401543
map = daemons->map;
15411544

15421545
/* if this job is being launched against a fixed DVM, then there is
@@ -1552,8 +1555,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
15521555
* the virtual machine unless specifically requested to do so
15531556
*/
15541557
if (ORTE_JOBID_INVALID != jdata->originator.jobid) {
1555-
OBJ_CONSTRUCT(&nodes, opal_list_t);
1556-
if (NULL == daemons->map) {
1558+
if (0 == map->num_nodes) {
15571559
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
15581560
"%s plm:base:setup_vm creating map",
15591561
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@@ -1562,16 +1564,15 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
15621564
* are obviously already here! The ess will already
15631565
* have assigned our node to us.
15641566
*/
1565-
daemons->map = OBJ_NEW(orte_job_map_t);
15661567
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
1567-
opal_pointer_array_add(daemons->map->nodes, (void*)node);
1568-
++(daemons->map->num_nodes);
1568+
opal_pointer_array_add(map->nodes, (void*)node);
1569+
++(map->num_nodes);
15691570
/* maintain accounting */
15701571
OBJ_RETAIN(node);
15711572
/* mark that this is from a singleton */
15721573
singleton = true;
15731574
}
1574-
map = daemons->map;
1575+
OBJ_CONSTRUCT(&nodes, opal_list_t);
15751576
for (i=1; i < orte_node_pool->size; i++) {
15761577
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
15771578
continue;
@@ -1618,16 +1619,6 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
16181619
*/
16191620
if (orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL)) {
16201621
OBJ_CONSTRUCT(&nodes, opal_list_t);
1621-
if (NULL == daemons->map) {
1622-
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1623-
"%s plm:base:setup_vm creating map",
1624-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
1625-
/* this is the first time thru, so the vm is just getting
1626-
* defined - create a map for it
1627-
*/
1628-
daemons->map = OBJ_NEW(orte_job_map_t);
1629-
}
1630-
map = daemons->map;
16311622
/* loop across all nodes and include those that have
16321623
* num_procs > 0 && no daemon already on them
16331624
*/
@@ -1685,23 +1676,21 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
16851676
goto process;
16861677
}
16871678

1688-
if (NULL == daemons->map) {
1679+
if (0 == map->num_nodes) {
16891680
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
16901681
"%s plm:base:setup_vm creating map",
16911682
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
16921683
/* this is the first time thru, so the vm is just getting
1693-
* defined - create a map for it and put us in as we
1684+
* defined - put us in as we
16941685
* are obviously already here! The ess will already
16951686
* have assigned our node to us.
16961687
*/
1697-
daemons->map = OBJ_NEW(orte_job_map_t);
16981688
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
1699-
opal_pointer_array_add(daemons->map->nodes, (void*)node);
1700-
++(daemons->map->num_nodes);
1689+
opal_pointer_array_add(map->nodes, (void*)node);
1690+
++(map->num_nodes);
17011691
/* maintain accounting */
17021692
OBJ_RETAIN(node);
17031693
}
1704-
map = daemons->map;
17051694

17061695
/* zero-out the number of new daemons as we will compute this
17071696
* each time we are called

orte/mca/rmaps/ppr/rmaps_ppr.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,10 @@ static int ppr_mapper(orte_job_t *jdata)
110110
ORTE_MAPPING_PPR != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
111111
/* not for us */
112112
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
113-
"mca:rmaps:ppr: job %s not using ppr mapper",
114-
ORTE_JOBID_PRINT(jdata->jobid));
113+
"mca:rmaps:ppr: job %s not using ppr mapper PPR %s policy %s",
114+
ORTE_JOBID_PRINT(jdata->jobid),
115+
(NULL == jdata->map->ppr) ? "NULL" : jdata->map->ppr,
116+
(ORTE_MAPPING_PPR == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) ? "PPRSET" : "PPR NOTSET");
115117
return ORTE_ERR_TAKE_NEXT_OPTION;
116118
}
117119

0 commit comments

Comments
 (0)