Skip to content

Commit 8762574

Browse files
author
rhc54
committed
Merge pull request #1728 from rhc54/topic/sim
Enable simulation of large-scale clusters
2 parents a93c01d + 3913595 commit 8762574

File tree

10 files changed

+84
-10
lines changed

10 files changed

+84
-10
lines changed

orte/mca/ess/base/ess_base_std_orted.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,17 @@ int orte_ess_base_orted_setup(char **hosts)
245245
error = "orte_session_dir define";
246246
goto error;
247247
}
248+
/* if we have multiple daemons/node, then add our pid to the name */
249+
if (NULL != (param = getenv("OMPI_MCA_ras_base_multiplier")) &&
250+
1 < strtol(param, NULL, 10)) {
251+
if (0 > asprintf(&param, "%s.%lu", orte_process_info.top_session_dir, (unsigned long)orte_process_info.pid)) {
252+
ret = ORTE_ERR_OUT_OF_RESOURCE;
253+
error = "create top session dir";
254+
goto error;
255+
}
256+
free(orte_process_info.top_session_dir);
257+
orte_process_info.top_session_dir = param;
258+
}
248259
/* clear the session directory just in case there are
249260
* stale directories laying around
250261
*/

orte/mca/plm/base/plm_base_launch_support.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,6 +1528,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
15281528
bool default_hostfile_used;
15291529
char *hosts;
15301530
bool singleton=false;
1531+
bool multi_sim = false;
15311532

15321533
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
15331534
"%s plm:base:setup_vm",
@@ -1617,7 +1618,8 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
16171618
* look across all jobs and ensure that the "VM" contains
16181619
* all nodes with application procs on them
16191620
*/
1620-
if (orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL)) {
1621+
multi_sim = orte_get_attribute(&jdata->attributes, ORTE_JOB_MULTI_DAEMON_SIM, NULL, OPAL_BOOL);
1622+
if (orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL) || multi_sim) {
16211623
OBJ_CONSTRUCT(&nodes, opal_list_t);
16221624
/* loop across all nodes and include those that have
16231625
* num_procs > 0 && no daemon already on them
@@ -1645,14 +1647,17 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
16451647
/* not to be used */
16461648
continue;
16471649
}
1648-
if (0 < node->num_procs) {
1650+
if (0 < node->num_procs || multi_sim) {
16491651
/* retain a copy for our use in case the item gets
16501652
* destructed along the way
16511653
*/
16521654
OBJ_RETAIN(node);
16531655
opal_list_append(&nodes, &node->super);
16541656
}
16551657
}
1658+
if (multi_sim) {
1659+
goto process;
1660+
}
16561661
/* see if anybody had procs */
16571662
if (0 == opal_list_get_size(&nodes)) {
16581663
/* if the HNP has some procs, then we are still good */

orte/mca/ras/base/base.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* All rights reserved.
1212
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
1313
* reserved.
14+
* Copyright (c) 2016 Intel, Inc. All rights reserved.
1415
* $COPYRIGHT$
1516
*
1617
* Additional copyrights may follow
@@ -49,6 +50,7 @@ typedef struct orte_ras_base_t {
4950
bool allocation_read;
5051
orte_ras_base_module_t *active_module;
5152
int total_slots_alloc;
53+
int multiplier;
5254
} orte_ras_base_t;
5355

5456
ORTE_DECLSPEC extern orte_ras_base_t orte_ras_base;

orte/mca/ras/base/ras_base_frame.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@
5050
*/
5151
orte_ras_base_t orte_ras_base = {0};
5252

53+
static int ras_register(mca_base_register_flag_t flags)
54+
{
55+
orte_ras_base.multiplier = 1;
56+
mca_base_var_register("orte", "ras", "base", "multiplier",
57+
"Simulate a larger cluster by launching N daemons/node",
58+
MCA_BASE_VAR_TYPE_INT,
59+
NULL, 0, 0,
60+
OPAL_INFO_LVL_9,
61+
MCA_BASE_VAR_SCOPE_READONLY, &orte_ras_base.multiplier);
62+
return ORTE_SUCCESS;
63+
}
64+
5365
static int orte_ras_base_close(void)
5466
{
5567
/* Close selected component */
@@ -76,5 +88,5 @@ static int orte_ras_base_open(mca_base_open_flag_t flags)
7688
}
7789

7890
MCA_BASE_FRAMEWORK_DECLARE(orte, ras, "ORTE Resource Allocation Subsystem",
79-
NULL, orte_ras_base_open, orte_ras_base_close,
91+
ras_register, orte_ras_base_open, orte_ras_base_close,
8092
mca_ras_base_static_components, 0);

orte/mca/ras/base/ras_base_node.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
4444
opal_list_item_t* item;
4545
orte_std_cntr_t num_nodes;
4646
int rc, i;
47-
orte_node_t *node, *hnp_node;
47+
orte_node_t *node, *hnp_node, *nptr;
4848
char *ptr;
4949
bool hnp_alone = true;
5050
orte_attribute_t *kv;
@@ -61,10 +61,16 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
6161
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
6262
(long)num_nodes));
6363

64+
/* mark the job as being a large-cluster sim if that was requested */
65+
if (1 < orte_ras_base.multiplier) {
66+
orte_set_attribute(&jdata->attributes, ORTE_JOB_MULTI_DAEMON_SIM,
67+
ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
68+
}
69+
6470
/* set the size of the global array - this helps minimize time
6571
* spent doing realloc's
6672
*/
67-
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes))) {
73+
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes * orte_ras_base.multiplier))) {
6874
ORTE_ERROR_LOG(rc);
6975
return rc;
7076
}
@@ -139,6 +145,12 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
139145
}
140146
/* don't keep duplicate copy */
141147
OBJ_RELEASE(node);
148+
/* create copies, if required */
149+
for (i=1; i < orte_ras_base.multiplier; i++) {
150+
opal_dss.copy((void**)&node, hnp_node, ORTE_NODE);
151+
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED);
152+
node->index = opal_pointer_array_add(orte_node_pool, node);
153+
}
142154
} else {
143155
/* insert the object onto the orte_nodes global array */
144156
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
@@ -166,7 +178,11 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
166178
}
167179
/* indicate the HNP is not alone */
168180
hnp_alone = false;
169-
}
181+
for (i=1; i < orte_ras_base.multiplier; i++) {
182+
opal_dss.copy((void**)&nptr, node, ORTE_NODE);
183+
nptr->index = opal_pointer_array_add(orte_node_pool, nptr);
184+
}
185+
}
170186
}
171187

172188
/* if we didn't find any fqdn names in the allocation, then

orte/mca/schizo/ompi/schizo_ompi.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -941,8 +941,9 @@ static int setup_fork(orte_job_t *jdata,
941941
opal_setenv("OMPI_COMM_WORLD_LOCAL_SIZE", param, true, &app->env);
942942
free(param);
943943

944-
/* forcibly set the local tmpdir base to match ours */
944+
/* forcibly set the local tmpdir base and top session dir to match ours */
945945
opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env);
946+
opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env);
946947

947948
/* MPI-3 requires we provide some further info to the procs,
948949
* so we pass them as envars to avoid introducing further

orte/runtime/data_type_support/orte_dt_copy_fns.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -66,8 +66,18 @@ int orte_dt_copy_job(orte_job_t **dest, orte_job_t *src, opal_data_type_t type)
6666
*/
6767
int orte_dt_copy_node(orte_node_t **dest, orte_node_t *src, opal_data_type_t type)
6868
{
69-
(*dest) = src;
70-
OBJ_RETAIN(src);
69+
orte_node_t *node;
70+
71+
node = OBJ_NEW(orte_node_t);
72+
node->name = strdup(src->name);
73+
node->state = src->state;
74+
node->slots = src->slots;
75+
node->slots_inuse = src->slots_inuse;
76+
node->slots_max = src->slots_max;
77+
node->topology = src->topology;
78+
node->flags = src->flags;
79+
(*dest) = node;
80+
7181
return ORTE_SUCCESS;
7282
}
7383

orte/runtime/orte_mca_params.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ static char *orte_fork_agent_string = NULL;
5050
static char *orte_tmpdir_base = NULL;
5151
static char *orte_local_tmpdir_base = NULL;
5252
static char *orte_remote_tmpdir_base = NULL;
53+
static char *orte_top_session_dir = NULL;
5354

5455
int orte_register_params(void)
5556
{
@@ -150,6 +151,20 @@ int orte_register_params(void)
150151
orte_process_info.tmpdir_base = strdup (orte_remote_tmpdir_base);
151152
}
152153

154+
orte_top_session_dir = NULL;
155+
(void) mca_base_var_register ("orte", "orte", NULL, "top_session_dir",
156+
"Top of the session directory tree for applications",
157+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
158+
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
159+
&orte_top_session_dir);
160+
161+
if (NULL != orte_top_session_dir) {
162+
if (NULL != orte_process_info.top_session_dir) {
163+
free(orte_process_info.top_session_dir);
164+
}
165+
orte_process_info.top_session_dir = strdup(orte_top_session_dir);
166+
}
167+
153168
orte_prohibited_session_dirs = NULL;
154169
(void) mca_base_var_register ("orte", "orte", NULL, "no_session_dirs",
155170
"Prohibited locations for session directories (multiple locations separated by ',', default=NULL)",

orte/tools/orterun/orterun.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ int orterun(int argc, char *argv[])
290290
DONE:
291291
/* cleanup and leave */
292292
orte_submit_finalize();
293+
orte_finalize();
293294

294295
if (orte_debug_flag) {
295296
fprintf(stderr, "exiting with status %d\n", orte_exit_status);

orte/util/attr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ typedef uint16_t orte_job_flags_t;
138138
#define ORTE_JOB_MERGE_STDERR_STDOUT (ORTE_JOB_START_KEY + 46) // bool - merge stderr into stdout stream
139139
#define ORTE_JOB_TAG_OUTPUT (ORTE_JOB_START_KEY + 47) // bool - tag stdout/stderr
140140
#define ORTE_JOB_TIMESTAMP_OUTPUT (ORTE_JOB_START_KEY + 48) // bool - timestamp stdout/stderr
141+
#define ORTE_JOB_MULTI_DAEMON_SIM (ORTE_JOB_START_KEY + 49) // bool - multiple daemons/node to simulate large cluster
141142

142143
#define ORTE_JOB_MAX_KEY 300
143144

0 commit comments

Comments
 (0)