Skip to content

Commit 0b273c2

Browse files
authored
Merge pull request #2808 from jjhursey/fix/ibm/reduce-local-to-coll
coll: Move reduce_local into the coll framework
2 parents a17b547 + 78006f9 commit 0b273c2

38 files changed

+360
-185
lines changed

ompi/communicator/communicator.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
2121
* Copyright (c) 2015 Research Organization for Information Science
2222
* and Technology (RIST). All rights reserved.
23+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
2324
* $COPYRIGHT$
2425
*
2526
* Additional copyrights may follow
@@ -232,6 +233,28 @@ typedef struct ompi_communicator_t ompi_communicator_t;
232233
* size so when the bitness changes the size of the handle changes.
233234
* This is done so we don't end up needing a structure that is
234235
* incredibly larger than necessary because of the bitness.
236+
*
237+
* This padding mechanism works as a (likely) compile time check for when the
238+
* size of the ompi_communicator_t exceeds the predetermined size of the
239+
* ompi_predefined_communicator_t. It also allows us to change the size of
240+
* the ompi_communicator_t without impacting the size of the
241+
* ompi_predefined_communicator_t structure for some number of additions.
242+
*
243+
* As an example:
244+
* If the size of ompi_communicator_t is less than the size of the _PAD then
245+
* the _PAD ensures that the size of the ompi_predefined_communicator_t is
246+
* whatever size is defined below in the _PAD macro.
247+
* However, if the size of the ompi_communicator_t grows larger than the _PAD
248+
* (say by adding a few more function pointers to the structure) then the
249+
* 'padding' variable will be initialized to a large number often triggering
250+
* a 'array is too large' compile time error. This signals two things:
251+
* 1) That the _PAD should be increased.
252+
* 2) That users need to be made aware of the size change for the
253+
* ompi_predefined_communicator_t structure.
254+
*
255+
* Q: So you just made a change to communicator structure, do you need to adjust
256+
* the PREDEFINED_COMMUNICATOR_PAD macro?
257+
* A: Most likely not, but it would be good to check.
235258
*/
236259
#define PREDEFINED_COMMUNICATOR_PAD (sizeof(void*) * 192)
237260

ompi/mca/coll/base/coll_base_comm_select.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* reserved.
2020
* Copyright (c) 2014 Research Organization for Information Science
2121
* and Technology (RIST). All rights reserved.
22-
* Copyright (c) 2016 IBM Corporation. All rights reserved.
22+
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
2323
* $COPYRIGHT$
2424
*
2525
* Additional copyrights may follow
@@ -52,7 +52,7 @@ struct avail_coll_t {
5252
opal_list_item_t super;
5353

5454
int ac_priority;
55-
mca_coll_base_module_2_1_0_t *ac_module;
55+
mca_coll_base_module_2_2_0_t *ac_module;
5656
const char * ac_component_name;
5757
};
5858
typedef struct avail_coll_t avail_coll_t;
@@ -65,16 +65,16 @@ static opal_list_t *check_components(opal_list_t * components,
6565
ompi_communicator_t * comm);
6666
static int check_one_component(ompi_communicator_t * comm,
6767
const mca_base_component_t * component,
68-
mca_coll_base_module_2_1_0_t ** module);
68+
mca_coll_base_module_2_2_0_t ** module);
6969

7070
static int query(const mca_base_component_t * component,
7171
ompi_communicator_t * comm, int *priority,
72-
mca_coll_base_module_2_1_0_t ** module);
72+
mca_coll_base_module_2_2_0_t ** module);
7373

7474
static int query_2_0_0(const mca_coll_base_component_2_0_0_t *
7575
coll_component, ompi_communicator_t * comm,
7676
int *priority,
77-
mca_coll_base_module_2_1_0_t ** module);
77+
mca_coll_base_module_2_2_0_t ** module);
7878

7979
/*
8080
* Stuff for the OBJ interface
@@ -203,6 +203,8 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
203203
COPY(avail->ac_module, comm, ineighbor_alltoall);
204204
COPY(avail->ac_module, comm, ineighbor_alltoallv);
205205
COPY(avail->ac_module, comm, ineighbor_alltoallw);
206+
207+
COPY(avail->ac_module, comm, reduce_local);
206208
}
207209
/* release the original module reference and the list item */
208210
OBJ_RELEASE(avail->ac_module);
@@ -246,7 +248,8 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
246248
CHECK_NULL(which_func, comm, ireduce_scatter) ||
247249
((OMPI_COMM_IS_INTRA(comm)) && CHECK_NULL(which_func, comm, iscan)) ||
248250
CHECK_NULL(which_func, comm, iscatter) ||
249-
CHECK_NULL(which_func, comm, iscatterv)) {
251+
CHECK_NULL(which_func, comm, iscatterv) ||
252+
CHECK_NULL(which_func, comm, reduce_local) ) {
250253
/* TODO -- Once the topology flags are set before coll_select then
251254
* check if neighborhood collectives have been set. */
252255

@@ -285,7 +288,7 @@ static opal_list_t *check_components(opal_list_t * components,
285288
int priority;
286289
const mca_base_component_t *component;
287290
mca_base_component_list_item_t *cli;
288-
mca_coll_base_module_2_1_0_t *module;
291+
mca_coll_base_module_2_2_0_t *module;
289292
opal_list_t *selectable;
290293
avail_coll_t *avail;
291294

@@ -341,7 +344,7 @@ static opal_list_t *check_components(opal_list_t * components,
341344
*/
342345
static int check_one_component(ompi_communicator_t * comm,
343346
const mca_base_component_t * component,
344-
mca_coll_base_module_2_1_0_t ** module)
347+
mca_coll_base_module_2_2_0_t ** module)
345348
{
346349
int err;
347350
int priority = -1;
@@ -375,7 +378,7 @@ static int check_one_component(ompi_communicator_t * comm,
375378
*/
376379
static int query(const mca_base_component_t * component,
377380
ompi_communicator_t * comm,
378-
int *priority, mca_coll_base_module_2_1_0_t ** module)
381+
int *priority, mca_coll_base_module_2_2_0_t ** module)
379382
{
380383
*module = NULL;
381384
if (2 == component->mca_type_major_version &&
@@ -395,9 +398,9 @@ static int query(const mca_base_component_t * component,
395398

396399
static int query_2_0_0(const mca_coll_base_component_2_0_0_t * component,
397400
ompi_communicator_t * comm, int *priority,
398-
mca_coll_base_module_2_1_0_t ** module)
401+
mca_coll_base_module_2_2_0_t ** module)
399402
{
400-
mca_coll_base_module_2_1_0_t *ret;
403+
mca_coll_base_module_2_2_0_t *ret;
401404

402405
/* There's currently no need for conversion */
403406

ompi/mca/coll/base/coll_base_comm_unselect.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2014 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
17+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1718
* $COPYRIGHT$
1819
*
1920
* Additional copyrights may follow
@@ -98,6 +99,8 @@ int mca_coll_base_comm_unselect(ompi_communicator_t * comm)
9899
CLOSE(comm, ineighbor_alltoallv);
99100
CLOSE(comm, ineighbor_alltoallw);
100101

102+
CLOSE(comm, reduce_local);
103+
101104
/* All done */
102105
return OMPI_SUCCESS;
103106
}

ompi/mca/coll/base/coll_base_functions.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* reserved.
1717
* Copyright (c) 2015 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
19+
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
1920
* $COPYRIGHT$
2021
*
2122
* Additional copyrights may follow
@@ -169,6 +170,11 @@ int ompi_coll_base_scatter_intra_binomial(SCATTER_ARGS);
169170

170171
/* ScatterV */
171172

173+
/* Reduce_local */
174+
int mca_coll_base_reduce_local(const void *inbuf, void *inoutbuf, int count,
175+
struct ompi_datatype_t * dtype, struct ompi_op_t * op,
176+
mca_coll_base_module_t *module);
177+
172178
END_C_DECLS
173179

174180
#define COLL_BASE_UPDATE_BINTREE( OMPI_COMM, BASE_MODULE, ROOT ) \

ompi/mca/coll/base/coll_base_reduce.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* reserved.
1515
* Copyright (c) 2015-2016 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
17+
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
1718
* $COPYRIGHT$
1819
*
1920
* Additional copyrights may follow
@@ -34,6 +35,15 @@
3435
#include "ompi/mca/coll/base/coll_base_functions.h"
3536
#include "coll_base_topo.h"
3637

38+
int mca_coll_base_reduce_local(const void *inbuf, void *inoutbuf, int count,
39+
struct ompi_datatype_t * dtype, struct ompi_op_t * op,
40+
mca_coll_base_module_t *module)
41+
{
42+
/* XXX -- CONST -- do not cast away const -- update ompi/op/op.h */
43+
ompi_op_reduce(op, (void *)inbuf, inoutbuf, count, dtype);
44+
return OMPI_SUCCESS;
45+
}
46+
3747
/**
3848
* This is a generic implementation of the reduce protocol. It used the tree
3949
* provided as an argument and execute all operations using a segment of

ompi/mca/coll/basic/coll_basic_module.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18+
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -132,6 +133,8 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
132133
basic_module->super.coll_neighbor_alltoallv = mca_coll_basic_neighbor_alltoallv;
133134
basic_module->super.coll_neighbor_alltoallw = mca_coll_basic_neighbor_alltoallw;
134135

136+
basic_module->super.coll_reduce_local = mca_coll_base_reduce_local;
137+
135138
return &(basic_module->super);
136139
}
137140

0 commit comments

Comments
 (0)