Skip to content

Commit ed04d83

Browse files
committed
Merge pull request open-mpi#562 from rhc54/cmr2.0/pmix
Integrate PMIx 1.0 with OMPI.
2 parents 3560d02 + 22c912f commit ed04d83

File tree

390 files changed

+54219
-19266
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

390 files changed

+54219
-19266
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,11 @@ opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h
299299

300300
opal/mca/installdirs/config/install_dirs.h
301301

302+
opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h
303+
opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h
304+
opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h
305+
opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h.in
306+
302307
opal/tools/opal-checkpoint/opal-checkpoint
303308
opal/tools/opal-checkpoint/opal-checkpoint.1
304309
opal/tools/opal-restart/opal-restart

contrib/platform/intel/bend/linux.conf

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -58,26 +58,8 @@
5858
# parameters available and their default values.
5959
#
6060

61-
#default hostfile
62-
#orte_default_hostfile = /home/common/hosts
63-
#ras_slurm_enable_dyn_alloc = 1
64-
#ras_slurm_config_file = /home/common/slurm/conf/slurm.conf
65-
6661
# Basic behavior to smooth startup
6762
mca_base_component_show_load_errors = 1
68-
mpi_param_check = 0
6963
orte_abort_timeout = 10
7064
hwloc_base_mem_bind_failure_action = silent
7165

72-
## Protect the shared file systems
73-
74-
## Add the interface for out-of-band communication
75-
## and set it up
76-
oob_tcp_peer_retries = 120
77-
#oob_tcp_connect_timeout=600
78-
79-
## Define the MPI interconnects
80-
btl = sm,tcp,self
81-
82-
## Setup shared memory
83-
btl_sm_free_list_max = 768

contrib/platform/intel/bend/mac-orcm.conf

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,6 @@
6060

6161
# Basic behavior to smooth startup
6262
mca_base_component_show_load_errors = 1
63-
mpi_param_check = 0
6463
orte_abort_timeout = 10
6564
hwloc_base_mem_bind_failure_action = silent
6665

67-
## Protect the shared file systems
68-
69-
## Add the interface for out-of-band communication
70-
## and set it up
71-
oob_tcp_peer_retries = 120
72-
#oob_tcp_connect_timeout=600
73-
74-
## Define the MPI interconnects
75-
btl = sm,tcp,self
76-
77-
## Setup shared memory
78-
btl_sm_free_list_max = 768

contrib/platform/intel/bend/mac.conf

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,5 @@
6060

6161
# Basic behavior to smooth startup
6262
mca_base_component_show_load_errors = 1
63-
mpi_param_check = 0
6463
orte_abort_timeout = 10
6564
hwloc_base_mem_bind_failure_action = silent
66-
67-
## Protect the shared file systems
68-
69-
## Add the interface for out-of-band communication
70-
## and set it up
71-
oob_tcp_peer_retries = 120
72-
#oob_tcp_connect_timeout=600
73-
74-
## Define the MPI interconnects
75-
btl = sm,tcp,self
76-
77-
## Setup shared memory
78-
btl_sm_free_list_max = 768

ompi/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# Copyright (c) 2010-2011 Sandia National Laboratories. All rights reserved.
1515
# Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
1616
# reserved.
17+
# Copyright (c) 2015 Intel, Inc. All rights reserved.
1718
# $COPYRIGHT$
1819
#
1920
# Additional copyrights may follow
@@ -172,6 +173,7 @@ endif
172173
include class/Makefile.am
173174
include attribute/Makefile.am
174175
include communicator/Makefile.am
176+
include dpm/Makefile.am
175177
include errhandler/Makefile.am
176178
include file/Makefile.am
177179
include group/Makefile.am

ompi/communicator/comm.c

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
* All rights reserved.
2121
* Copyright (c) 2014 Research Organization for Information Science
2222
* and Technology (RIST). All rights reserved.
23-
* Copyright (c) 2014 Intel, Inc. All rights reserved.
23+
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
2424
* $COPYRIGHT$
2525
*
2626
* Additional copyrights may follow
@@ -34,16 +34,16 @@
3434

3535
#include "ompi/constants.h"
3636
#include "opal/mca/hwloc/base/base.h"
37-
#include "opal/mca/dstore/dstore.h"
3837
#include "opal/dss/dss.h"
38+
#include "opal/mca/pmix/pmix.h"
3939

4040
#include "ompi/proc/proc.h"
4141
#include "opal/threads/mutex.h"
4242
#include "opal/util/bit_ops.h"
4343
#include "opal/util/output.h"
4444
#include "ompi/mca/topo/topo.h"
4545
#include "ompi/mca/topo/base/base.h"
46-
#include "ompi/mca/dpm/dpm.h"
46+
#include "ompi/dpm/dpm.h"
4747

4848
#include "ompi/attribute/attribute.h"
4949
#include "ompi/communicator/communicator.h"
@@ -202,7 +202,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
202202
Necessary for the disconnect of dynamic communicators. */
203203

204204
if ( 0 < local_size && (OMPI_COMM_IS_INTRA(newcomm) || 0 <remote_size) ) {
205-
ompi_dpm.mark_dyncomm (newcomm);
205+
ompi_dpm_mark_dyncomm (newcomm);
206206
}
207207

208208
/* Set error handler */
@@ -1766,8 +1766,6 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
17661766
char *recvbuf;
17671767
ompi_proc_t **proc_list=NULL;
17681768
int i;
1769-
opal_list_t myvals;
1770-
opal_value_t *kv;
17711769

17721770
local_rank = ompi_comm_rank (local_comm);
17731771
local_size = ompi_comm_size (local_comm);
@@ -1780,15 +1778,15 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
17801778
}
17811779
if(OMPI_GROUP_IS_DENSE(local_comm->c_local_group)) {
17821780
rc = ompi_proc_pack(local_comm->c_local_group->grp_proc_pointers,
1783-
local_size, true, sbuf);
1781+
local_size, sbuf);
17841782
}
17851783
/* get the proc list for the sparse implementations */
17861784
else {
17871785
proc_list = (ompi_proc_t **) calloc (local_comm->c_local_group->grp_proc_count,
17881786
sizeof (ompi_proc_t *));
17891787
for(i=0 ; i<local_comm->c_local_group->grp_proc_count ; i++)
17901788
proc_list[i] = ompi_group_peer_lookup(local_comm->c_local_group,i);
1791-
rc = ompi_proc_pack (proc_list, local_size, true, sbuf);
1789+
rc = ompi_proc_pack (proc_list, local_size, sbuf);
17921790
}
17931791
if ( OMPI_SUCCESS != rc ) {
17941792
goto err_exit;
@@ -1867,7 +1865,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
18671865
}
18681866

18691867
/* decode the names into a proc-list */
1870-
rc = ompi_proc_unpack(rbuf, rsize, &rprocs, true, NULL, NULL);
1868+
rc = ompi_proc_unpack(rbuf, rsize, &rprocs, NULL, NULL);
18711869
OBJ_RELEASE(rbuf);
18721870
if (OMPI_SUCCESS != rc) {
18731871
OMPI_ERROR_LOG(rc);
@@ -1876,22 +1874,16 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
18761874

18771875
/* set the locality of the remote procs */
18781876
for (i=0; i < rsize; i++) {
1879-
/* get the locality information - do not use modex recv for
1880-
* this request as that will automatically cause the hostname
1881-
* to be loaded as well. All RTEs are required to provide this
1882-
* information at startup for procs on our node. Thus, not
1883-
* finding the info indicates that the proc is non-local.
1884-
*/
1885-
OBJ_CONSTRUCT(&myvals, opal_list_t);
1886-
if (OMPI_SUCCESS != opal_dstore.fetch(opal_dstore_internal,
1887-
&rprocs[i]->super.proc_name,
1888-
OPAL_DSTORE_LOCALITY, &myvals)) {
1889-
rprocs[i]->super.proc_flags = OPAL_PROC_NON_LOCAL;
1877+
/* get the locality information - all RTEs are required
1878+
* to provide this information at startup */
1879+
uint16_t *u16ptr, u16;
1880+
u16ptr = &u16;
1881+
OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_LOCALITY, &rprocs[i]->super.proc_name, &u16ptr, OPAL_UINT16);
1882+
if (OPAL_SUCCESS == rc) {
1883+
rprocs[i]->super.proc_flags = u16;
18901884
} else {
1891-
kv = (opal_value_t*)opal_list_get_first(&myvals);
1892-
rprocs[i]->super.proc_flags = kv->data.uint16;
1885+
rprocs[i]->super.proc_flags = OPAL_PROC_NON_LOCAL;
18931886
}
1894-
OPAL_LIST_DESTRUCT(&myvals);
18951887
}
18961888

18971889
/* And now add the information into the database */
@@ -2210,7 +2202,7 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm,
22102202
if( MPI_UNDEFINED != my_rank ) {
22112203
/* verify whether to set the flag, that this comm
22122204
contains process from more than one jobid. */
2213-
ompi_dpm.mark_dyncomm (comm);
2205+
ompi_dpm_mark_dyncomm (comm);
22142206
}
22152207

22162208
/* set the error handler */

0 commit comments

Comments
 (0)