Skip to content

Commit 06c6325

Browse files
Merge pull request #6822 from ggouaillardet/topic/pmix_refresh
pmix/pmix4x: refresh to the latest PMIx master
2 parents 0df0e5c + 4510711 commit 06c6325

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+2218
-663
lines changed

opal/mca/pmix/pmix4x/pmix/NEWS

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,45 @@ Master (not on release branches yet)
7676
- Fix a bug when registering default event handlers
7777

7878

79+
3.1.0 -- 17 Jan 2019
80+
----------------------
81+
**** THIS RELEASE MARKS THE STARTING POINT FOR FULL COMPLIANCE
82+
**** WITH THE PMIX v3 STANDARD. ALL API BEHAVIORS AND ATTRIBUTE
83+
**** DEFINITIONS MEET THE v3 STANDARD SPECIFICATIONS.
84+
- Add a new, faster dstore GDS component 'ds21'
85+
- Performance optimizations for the dstore GDS components.
86+
- Plug miscellaneous memory leaks
87+
- Silence an unnecessary warning message when checking connection
88+
to a non-supporting server
89+
- Ensure lost-connection events get delivered to default event
90+
handlers
91+
- Correctly handle cache refresh for queries
92+
- Protect against race conditions between host and internal library
93+
when dealing with async requests
94+
- Cleanup tool operations and add support for connections to
95+
remote servers. Initial support for debugger direct/indirect
96+
launch verified with PRRTE. Cleanup setting of tmpdir options.
97+
Drop rendezvous files when acting as a launcher
98+
- Automatically store the server URI for easy access by client
99+
- Provide MCA parameter to control TCP connect retry/timeout
100+
- Update event notification system to properly evict oldest events
101+
when more space is needed
102+
- Fix a number of error paths
103+
- Update IOF cache code to properly drop oldest message. Provide
104+
MCA parameter for setting cache size.
105+
- Handle setsockopt(SO_RCVTIMEO) not being supported
106+
- Ensure that epilogs get run even when connections unexpectedly
107+
terminate. Properly split epilog strings to process multiple
108+
paths
109+
- Pass the tool's command line to the server so it can be returned
110+
in queries
111+
- Add support for C11 atomics
112+
- Support collection and forwarding of fabric-specific envars
113+
- Improve handling of hwloc configure option
114+
- Fix PMIx_server_generate_regex to preserve node ordering
115+
- Fix a bug when registering default event handlers
116+
117+
79118
3.0.2 -- 18 Sept 2018
80119
----------------------
81120
- Ensure we cleanup any active sensors when a peer departs. Allow the

opal/mca/pmix/pmix4x/pmix/VERSION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ greek=a1
3030
# command, or with the date (if "git describe" fails) in the form of
3131
# "date<date>".
3232

33-
repo_rev=git99971222
33+
repo_rev=git03a8b5da
3434

3535
# If tarball_version is not empty, it is used as the version string in
3636
# the tarball filename, regardless of all other versions listed in
@@ -44,7 +44,7 @@ tarball_version=
4444

4545
# The date when this release was created
4646

47-
date="Jun 27, 2019"
47+
date="Jul 16, 2019"
4848

4949
# The shared library version of each of PMIx's public libraries.
5050
# These versions are maintained in accordance with the "Library

opal/mca/pmix/pmix4x/pmix/config/pmix.m4

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[
191191
[Link the output PMIx library to this extra lib (used in embedded mode)]))
192192
AC_MSG_CHECKING([for extra lib])
193193
AS_IF([test ! -z "$with_pmix_extra_lib"],
194-
[AS_IF([test "$with_pmix_extra_lib" == "yes" || test "$with_pmix_extra_lib" == "no"],
194+
[AS_IF([test "$with_pmix_extra_lib" = "yes" || test "$with_pmix_extra_lib" = "no"],
195195
[AC_MSG_RESULT([ERROR])
196196
AC_MSG_WARN([Invalid value for --with-extra-pmix-lib:])
197197
AC_MSG_WARN([ $with_pmix_extra_lib])
@@ -209,7 +209,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[
209209
[Link any embedded components/tools that require it to the provided libtool lib (used in embedded mode)]))
210210
AC_MSG_CHECKING([for extra ltlib])
211211
AS_IF([test ! -z "$with_pmix_extra_ltlib"],
212-
[AS_IF([test "$with_pmix_extra_ltlib" == "yes" || test "$with_pmix_extra_ltlib" == "no"],
212+
[AS_IF([test "$with_pmix_extra_ltlib" = "yes" || test "$with_pmix_extra_ltlib" = "no"],
213213
[AC_MSG_RESULT([ERROR])
214214
AC_MSG_WARN([Invalid value for --with-pmix-extra-ltlib:])
215215
AC_MSG_WARN([ $with_pmix_extra_ltlib])
@@ -664,7 +664,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[
664664
# -lrt might be needed for clock_gettime
665665
PMIX_SEARCH_LIBS_CORE([clock_gettime], [rt])
666666

667-
AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp setpgid ptsname openpty])
667+
AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp setpgid ptsname openpty setenv])
668668

669669
# On some hosts, htonl is a define, so the AC_CHECK_FUNC will get
670670
# confused. On others, it's in the standard library, but stubbed with
@@ -1245,7 +1245,7 @@ AC_MSG_CHECKING([if want to support dlopen of non-global namespaces])
12451245
AC_ARG_ENABLE([nonglobal-dlopen],
12461246
AC_HELP_STRING([--enable-nonglobal-dlopen],
12471247
[enable non-global dlopen (default: enabled)]))
1248-
if test "$enable_nonglobal_dlopen" == "no"; then
1248+
if test "$enable_nonglobal_dlopen" = "no"; then
12491249
AC_MSG_RESULT([no])
12501250
pmix_need_libpmix=0
12511251
else

opal/mca/pmix/pmix4x/pmix/config/pmix_setup_hwloc.m4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ AC_DEFUN([_PMIX_HWLOC_EXTERNAL],[
6363
if test "$with_hwloc" != "no"; then
6464
AC_MSG_CHECKING([for hwloc in])
6565
if test ! -z "$with_hwloc" && test "$with_hwloc" != "yes"; then
66-
pmix_hwloc_dir=$with_hwloc
66+
pmix_hwloc_dir=$with_hwloc/include
6767
pmix_hwloc_standard_header_location=no
6868
pmix_hwloc_standard_lib_location=no
6969
AS_IF([test -z "$with_hwloc_libdir" || test "$with_hwloc_libdir" = "yes"],

opal/mca/pmix/pmix4x/pmix/configure.ac

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ LT_PREREQ([2.2.6])
171171

172172
pmix_enable_shared="$enable_shared"
173173
pmix_enable_static="$enable_static"
174-
AS_IF([test ! -z "$enable_static" && test "$enable_static" == "yes"],
174+
AS_IF([test ! -z "$enable_static" && test "$enable_static" = "yes"],
175175
[CFLAGS="$CFLAGS -fPIC"])
176176

177177
AM_ENABLE_SHARED
@@ -201,7 +201,6 @@ AS_IF([test "$pmix_debug" = "1"],
201201

202202
LT_INIT()
203203
LT_LANG([C])
204-
LT_LANG([C++])
205204

206205
############################################################################
207206
# Setup the core

opal/mca/pmix/pmix4x/pmix/contrib/whitespace-purge.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/bin/bash
22
#
3-
# Copyright (c) 2015 Intel, Inc. All rights reserved.
3+
# Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
44
# Copyright (c) 2015 Los Alamos National Security, LLC. All rights
55
# reserved
66
# Copyright (c) 2015 Cisco Systems, Inc.
@@ -18,7 +18,7 @@ for file in $(git ls-files) ; do
1818
# skip sym links, pdfs, etc. If any other file types should be
1919
# skipped add the check here.
2020
type=$(file -b --mime-type -h $file)
21-
if test ${type::4} == "text" ; then
21+
if test ${type::4} = "text" ; then
2222
# Eliminate whitespace at the end of lines
2323
perl -pi -e 's/\s*$/\n/' $file
2424
fi

opal/mca/pmix/pmix4x/pmix/include/pmix_common.h.in

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -268,14 +268,19 @@ typedef uint32_t pmix_rank_t;
268268
#define PMIX_PARENT_ID "pmix.parent" // (pmix_proc_t*) identifier of the process that called PMIx_Spawn
269269
// to launch this proc's application
270270
#define PMIX_EXIT_CODE "pmix.exit.code" // (int) exit code returned when proc terminated
271-
#define PMIX_NETWORK_COORDINATE "pmix.net.coord" // (pmix_coord_t*) Network coordinate of the specified process
272-
#define PMIX_NETWORK_COORD_SYSTEM "pmix.net.coordsys" // (pmix_coord_system_t) Network coordinate system being employed to
273-
// describe the specified network plane
271+
#define PMIX_NETWORK_COORDINATE "pmix.net.coord" // (pmix_coord_t*) Network coordinate of the specified process in
272+
// the given view type (e.g., logical vs physical)
273+
#define PMIX_NETWORK_VIEW "pmix.net.view" // (pmix_coord_view_t) Requested view type (e.g., logical vs physical)
274+
#define PMIX_NETWORK_DIMS "pmix.net.dims" // (uint32_t) Number of dimensions in the specified network plane/view
274275
#define PMIX_NETWORK_PLANE "pmix.net.plane" // (char*) string ID of a network plane
275276
#define PMIX_NETWORK_SWITCH "pmix.net.switch" // (char*) string ID of a network switch
276277
#define PMIX_NETWORK_NIC "pmix.net.nic" // (char*) string ID of a NIC
277278
#define PMIX_NETWORK_ENDPT "pmix.net.endpt" // (assigned) network endpt for process - type assigned by
278279
// fabric provider
280+
#define PMIX_NETWORK_SHAPE "pmix.net.shape" // (pmix_data_array_t*) number of interfaces (uint32_t) on each dimension of the
281+
// specified network plane in the requested view
282+
#define PMIX_NETWORK_SHAPE_STRING "pmix.net.shapestr" // (char*) network shape expressed as a string (e.g., "10x12x2")
283+
279284

280285
/* size info */
281286
#define PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
@@ -1079,19 +1084,6 @@ typedef enum {
10791084
} pmix_group_operation_t;
10801085

10811086

1082-
/* define a structure for a proc's network coordinate */
1083-
typedef struct pmix_coord {
1084-
int x;
1085-
int y;
1086-
int z;
1087-
} pmix_coord_t;
1088-
1089-
/* define coordinate system values */
1090-
typedef uint8_t pmix_coord_system_t;
1091-
#define PMIX_COORD_CARTESIAN 0x00
1092-
#define PMIX_COORD_TOROID 0x01
1093-
#define PMIX_COORD_CYLINDRICAL 0x02
1094-
10951087

10961088
/* define some "hooks" external libraries can use to
10971089
* intercept memory allocation/release operations */
@@ -1143,6 +1135,60 @@ static inline void* pmix_calloc(size_t n, size_t m)
11431135
(PMIX_CHECK_NSPACE((a)->nspace, (b)->nspace) && ((a)->rank == (b)->rank || (PMIX_RANK_WILDCARD == (a)->rank || PMIX_RANK_WILDCARD == (b)->rank)))
11441136

11451137

1138+
/**** PMIX COORD ****/
1139+
/* define coordinate system views */
1140+
typedef uint8_t pmix_coord_view_t;
1141+
#define PMIX_COORD_VIEW_UNDEF 0x00
1142+
#define PMIX_COORD_LOGICAL_VIEW 0x01
1143+
#define PMIX_COORD_PHYSICAL_VIEW 0x02
1144+
1145+
/* define a structure for a proc's network coordinate */
1146+
typedef struct pmix_coord {
1147+
pmix_coord_view_t view;
1148+
int *coord;
1149+
size_t dims;
1150+
} pmix_coord_t;
1151+
1152+
#define PMIX_COORD_CREATE(m, d, n) \
1153+
do { \
1154+
(m) = (pmix_coord_t*)pmix_calloc((n), sizeof(pmix_coord_t)); \
1155+
if (NULL != (m)) { \
1156+
(m)->view = PMIX_COORD_VIEW_UNDEF; \
1157+
(m)->dims = (d); \
1158+
(m)->coord = (int*)pmix_calloc((m)->dims, sizeof(int)); \
1159+
} \
1160+
} while(0)
1161+
1162+
#define PMIX_COORD_CONSTRUCT(m, d) \
1163+
do { \
1164+
(m)->view = PMIX_COORD_VIEW_UNDEF; \
1165+
(m)->coord = NULL; \
1166+
(m)->dims = 0; \
1167+
} while(0)
1168+
1169+
#define PMIX_COORD_DESTRUCT(m) \
1170+
do { \
1171+
(m)->view = PMIX_COORD_VIEW_UNDEF; \
1172+
if (NULL != (m)->coord) { \
1173+
pmix_free((m)->coord); \
1174+
(m)->coord = NULL; \
1175+
(m)->dims = 0; \
1176+
} \
1177+
} while(0)
1178+
1179+
#define PMIX_COORD_FREE(m, n) \
1180+
do { \
1181+
size_t _nc_; \
1182+
if (NULL != (m)) { \
1183+
for (_nc_ = 0; _nc_ < (n); _nc_++) { \
1184+
PMIX_COORD_DESTRUCT(&(m)[_nc_]); \
1185+
} \
1186+
free((m)); \
1187+
(m) = NULL; \
1188+
} \
1189+
} while(0)
1190+
1191+
11461192
/**** PMIX BYTE OBJECT ****/
11471193
typedef struct pmix_byte_object {
11481194
char *bytes;

opal/mca/pmix/pmix4x/pmix/include/pmix_sched.h

Lines changed: 20 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,20 @@ extern "C" {
6464
typedef struct pmix_fabric_s {
6565
/* user-supplied name for this fabric */
6666
char *name;
67-
/* revision - tracks how many times the
68-
* fabric info has been updated. Used to detect
69-
* that a change has occurred since the last
70-
* time the data was accessed. Restricted to
71-
* PMIx-internal use */
72-
uint64_t revision;
73-
/* PMIx server-defined object for internal use */
67+
/* communication cost array - the number of vertices
68+
* (nverts) equals the number of interfaces in the
69+
* fabric. This equates to the number of columns & rows
70+
* in the commcost array as the matrix is symmetric */
71+
uint16_t **commcost;
72+
uint32_t nverts;
73+
/* object pointer for use by the PMIx server library */
7474
void *module;
7575
} pmix_fabric_t;
7676

77+
/* convenience macros to support pmix_fabric_t */
78+
#define PMIX_FABRIC_CONSTRUCT(x) \
79+
memset(x, 0, sizeof(pmix_fabric_t))
80+
7781
/* Register for access to fabric-related information, including
7882
* communication cost matrix. This call must be made prior to
7983
* requesting information from a fabric.
@@ -83,9 +87,9 @@ typedef struct pmix_fabric_s {
8387
* utilize this field
8488
*
8589
* directives - an optional array of values indicating desired
86-
* behaviors and/or fabric to be accessed via
87-
* the returned struct. If NULL, then the highest
88-
* priority available fabric will return the struct
90+
* behaviors and/or fabric to be accessed. If NULL,
91+
* then the highest priority available fabric will
92+
* be used
8993
*
9094
* ndirs - number of elements in the directives array
9195
*
@@ -106,62 +110,9 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_fabric(pmix_fabric_t *fabric,
106110
*/
107111
PMIX_EXPORT pmix_status_t PMIx_server_deregister_fabric(pmix_fabric_t *fabric);
108112

109-
/* Get the number of vertices in the provided fabric.
110-
* To avoid blocking the caller, this function will
111-
* always return immediately, returning a PMIX_ERR_RESOURCE_BUSY
112-
* status if the matrix is in the process of being updated.
113-
*
114-
* fabric - pointer to the pmix_fabric_t struct provided
115-
* to the registration function
116-
*
117-
* nverts - pointer to the location where the number of
118-
* vertices is to be returned
119-
*
120-
* Return values include:
121-
*
122-
* PMIX_SUCCESS - indicates return of a valid value
123-
* PMIX_ERR_RESOURCE_BUSY - matrix is being updated
124-
* PMIX_ERR_FABRIC_UPDATED - fabric info has been updated since
125-
* last call involving this pmix_fabric_t
126-
*/
127-
PMIX_EXPORT pmix_status_t PMIx_server_get_num_vertices(pmix_fabric_t *fabric,
128-
uint32_t *nverts);
129-
130-
/* Obtain communication cost for messages transmitted from indicated
131-
* source to destination across the provided fabric - i.e.,
132-
* the value of the (src,dest) entry of that fabric's communication
133-
* cost matrix. To avoid blocking the caller, this function will
134-
* always return immediately, returning a PMIX_ERR_RESOURCE_BUSY
135-
* status if the matrix is in the process of being updated.
136-
*
137-
* fabric - pointer to the pmix_fabric_t struct provided to
138-
* the registration function
139-
*
140-
* src - the index of the originating vertex for the communication
141-
*
142-
* dest - the index of the destination vertex for the communication
143-
*
144-
* cost - pointer to the location where the cost is to be returned
145-
*
146-
* Return values include:
147-
*
148-
* PMIX_SUCCESS - indicates return of a valid value
149-
* PMIX_ERR_BAD_PARAM - src and/or dest is out of bounds
150-
* PMIX_ERR_RESOURCE_BUSY - matrix is being updated
151-
* PMIX_ERR_FABRIC_UPDATED - fabric info has been updated since
152-
* last call involving this pmix_fabric_t
153-
*/
154-
PMIX_EXPORT pmix_status_t PMIx_server_get_comm_cost(pmix_fabric_t *fabric,
155-
uint32_t src, uint32_t dest,
156-
uint16_t *cost);
157-
158-
/* Given a communication cost matrix index, return the corresponding
159-
* vertex info in the provided fabric and the name of the node upon
113+
/* Given a communication cost matrix index for a specified fabric,
114+
* return the corresponding vertex info and the name of the node upon
160115
* which it resides.
161-
* If the PMIX_ERR_RESOURCE_BUSY or PMIX_ERR_FABRIC_UPDATED status is
162-
* returned, then the caller should update their cost information
163-
* before re-issuing this request to ensure accurate correlation
164-
* between cost and LID
165116
*
166117
* fabric - pointer to the pmix_fabric_t struct provided to
167118
* the registration function
@@ -179,20 +130,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_get_comm_cost(pmix_fabric_t *fabric,
179130
*
180131
* PMIX_SUCCESS - indicates return of a valid value
181132
* PMIX_ERR_BAD_PARAM - provided index is out of bounds
182-
* PMIX_ERR_RESOURCE_BUSY - matrix is being updated
183-
* PMIX_ERR_FABRIC_UPDATED - fabric info has been updated since
184-
* last call involving this pmix_fabric_t
185133
*/
186134
PMIX_EXPORT pmix_status_t PMIx_server_get_vertex_info(pmix_fabric_t *fabric,
187135
uint32_t i, pmix_value_t *vertex,
188136
char **nodename);
189137

190-
/* Given vertex info, return the corresponding communication cost matrix
191-
* index and the name of the node upon which it resides.
192-
* If the PMIX_ERR_RESOURCE_BUSY or PMIX_ERR_FABRIC_UPDATED status is
193-
* returned, then the caller should update their cost information
194-
* before re-issuing this request to ensure accurate correlation
195-
* between cost and LID
138+
/* Given vertex info and the name of the device upon which that
139+
* vertex resides, return the corresponding communication cost matrix
140+
* index
196141
*
197142
* fabric - pointer to the pmix_fabric_t struct provided to
198143
* the registration function
@@ -201,10 +146,6 @@ PMIX_EXPORT pmix_status_t PMIx_server_get_vertex_info(pmix_fabric_t *fabric,
201146
*
202147
* i - pointer to the location where the index is to be returned
203148
*
204-
* nodename - pointer to the location where the string nodename
205-
* is to be returned. The caller is responsible for
206-
* releasing the string when done
207-
*
208149
* Return values include:
209150
*
210151
* PMIX_SUCCESS - indicates return of a valid value
@@ -214,8 +155,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_get_vertex_info(pmix_fabric_t *fabric,
214155
* last call involving this pmix_fabric_t
215156
*/
216157
PMIX_EXPORT pmix_status_t PMIx_server_get_index(pmix_fabric_t *fabric,
217-
pmix_value_t *vertex, uint32_t *i,
218-
char **nodename);
158+
pmix_value_t *vertex, uint32_t *i);
219159

220160
#if defined(c_plusplus) || defined(__cplusplus)
221161
}

0 commit comments

Comments
 (0)