Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit c3caf15

Browse files
committed
Merge pull request #794 from tkordenbrock/topic/add.triggered.gather
coll-portals4: add gather and igather implementations that use Portals4 triggered operations
2 parents 2b22d17 + e84e727 commit c3caf15

File tree

5 files changed

+1473
-4
lines changed

5 files changed

+1473
-4
lines changed

ompi/mca/coll/portals4/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ local_sources = \
1515
coll_portals4_barrier.c \
1616
coll_portals4_bcast.c \
1717
coll_portals4_reduce.c \
18+
coll_portals4_gather.c \
1819
coll_portals4_request.h \
1920
coll_portals4_request.c
2021

ompi/mca/coll/portals4/coll_portals4.h

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,18 @@
2424
#include "ompi/datatype/ompi_datatype_internal.h"
2525
#include "ompi/op/op.h"
2626
#include "ompi/mca/mca.h"
27+
#include "opal/datatype/opal_convertor.h"
2728
#include "ompi/mca/coll/coll.h"
2829
#include "ompi/request/request.h"
2930
#include "ompi/communicator/communicator.h"
3031
#include "ompi/mca/coll/base/base.h"
32+
#include "ompi/datatype/ompi_datatype.h"
33+
#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
3134

3235
#include "ompi/mca/mtl/portals4/mtl_portals4.h"
3336

37+
#define MAXTREEFANOUT 32
38+
3439
BEGIN_C_DECLS
3540

3641
#define COLL_PORTALS4_NO_OP ((ptl_op_t)-1)
@@ -61,10 +66,27 @@ struct mca_coll_portals4_component_t {
6166

6267
ptl_ni_limits_t ni_limits;
6368

69+
int use_binomial_gather_algorithm;
70+
6471
};
6572
typedef struct mca_coll_portals4_component_t mca_coll_portals4_component_t;
6673
OMPI_MODULE_DECLSPEC extern mca_coll_portals4_component_t mca_coll_portals4_component;
6774

75+
76+
/*
77+
* Borrowed with thanks from the coll-tuned component, then modified for Portals4.
78+
*/
79+
typedef struct ompi_coll_portals4_tree_t {
80+
int32_t tree_root;
81+
int32_t tree_fanout;
82+
int32_t tree_bmtree;
83+
int32_t tree_prev;
84+
int32_t tree_next[MAXTREEFANOUT];
85+
int32_t tree_nextsize;
86+
int32_t tree_numdescendants;
87+
} ompi_coll_portals4_tree_t;
88+
89+
6890
struct mca_coll_portals4_module_t {
6991
mca_coll_base_module_t super;
7092
size_t coll_count;
@@ -79,6 +101,13 @@ struct mca_coll_portals4_module_t {
79101
mca_coll_base_module_t *previous_allreduce_module;
80102
mca_coll_base_module_iallreduce_fn_t previous_iallreduce;
81103
mca_coll_base_module_t *previous_iallreduce_module;
104+
105+
/* binomial tree */
106+
ompi_coll_portals4_tree_t *cached_in_order_bmtree;
107+
int cached_in_order_bmtree_root;
108+
109+
size_t barrier_count;
110+
size_t gather_count;
82111
};
83112
typedef struct mca_coll_portals4_module_t mca_coll_portals4_module_t;
84113
OBJ_CLASS_DECLARATION(mca_coll_portals4_module_t);
@@ -135,6 +164,22 @@ int
135164
opal_stderr(const char *msg, const char *file,
136165
const int line, const int ret);
137166

167+
/*
168+
* Borrowed with thanks from the coll-tuned component.
169+
*/
170+
#define COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, PORTALS4_MODULE, ROOT ) \
171+
do { \
172+
if( !( ((PORTALS4_MODULE)->cached_in_order_bmtree) \
173+
&& ((PORTALS4_MODULE)->cached_in_order_bmtree_root == (ROOT)) ) ) { \
174+
if( (PORTALS4_MODULE)->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
175+
ompi_coll_portals4_destroy_tree( &((PORTALS4_MODULE)->cached_in_order_bmtree) ); \
176+
} \
177+
(PORTALS4_MODULE)->cached_in_order_bmtree = ompi_coll_portals4_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
178+
(PORTALS4_MODULE)->cached_in_order_bmtree_root = (ROOT); \
179+
} \
180+
} while (0)
181+
182+
138183
int ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
139184
mca_coll_base_module_t *module);
140185
int ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
@@ -177,6 +222,20 @@ int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int
177222
int
178223
ompi_coll_portals4_iallreduce_intra_fini(struct ompi_coll_portals4_request_t *request);
179224

225+
int ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
226+
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
227+
int root,
228+
struct ompi_communicator_t *comm,
229+
mca_coll_base_module_t *module);
230+
int ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
231+
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
232+
int root,
233+
struct ompi_communicator_t *comm,
234+
ompi_request_t **request,
235+
mca_coll_base_module_t *module);
236+
int ompi_coll_portals4_igather_intra_fini(struct ompi_coll_portals4_request_t *request);
237+
238+
180239
static inline ptl_process_t
181240
ompi_coll_portals4_get_peer(struct ompi_communicator_t *comm, int rank)
182241
{
@@ -357,6 +416,43 @@ void get_k_ary_tree(const unsigned int k_ary,
357416
return;
358417
}
359418

419+
420+
static inline void
421+
ompi_coll_portals4_create_recv_converter (opal_convertor_t *converter,
422+
void *target,
423+
ompi_proc_t *proc,
424+
int count,
425+
ompi_datatype_t *datatype)
426+
{
427+
/* create converter */
428+
OBJ_CONSTRUCT(converter, opal_convertor_t);
429+
430+
/* initialize converter */
431+
opal_convertor_copy_and_prepare_for_recv(proc->super.proc_convertor,
432+
&datatype->super,
433+
count,
434+
target,
435+
0,
436+
converter);
437+
}
438+
439+
static inline void
440+
ompi_coll_portals4_create_send_converter (opal_convertor_t *converter,
441+
const void *source,
442+
ompi_proc_t *proc,
443+
int count,
444+
ompi_datatype_t *datatype)
445+
{
446+
OBJ_CONSTRUCT(converter, opal_convertor_t);
447+
448+
opal_convertor_copy_and_prepare_for_send(proc->super.proc_convertor,
449+
&datatype->super,
450+
count,
451+
source,
452+
0,
453+
converter);
454+
}
455+
360456
END_C_DECLS
361457

362458
#endif /* MCA_COLL_PORTALS4_EXPORT_H */

ompi/mca/coll/portals4/coll_portals4_component.c

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,14 @@ portals4_register(void)
203203
MCA_BASE_VAR_SCOPE_READONLY,
204204
&mca_coll_portals4_priority);
205205

206+
mca_coll_portals4_component.use_binomial_gather_algorithm = 0;
207+
(void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "use_binomial_gather_algorithm",
208+
"if 1 use a binomial tree algorithm for gather, otherwise use linear",
209+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
210+
OPAL_INFO_LVL_9,
211+
MCA_BASE_VAR_SCOPE_READONLY,
212+
&mca_coll_portals4_component.use_binomial_gather_algorithm);
213+
206214
return OMPI_SUCCESS;
207215
}
208216

@@ -463,7 +471,7 @@ portals4_init_query(bool enable_progress_threads,
463471
__FILE__, __LINE__, ret);
464472
return OMPI_ERROR;
465473
}
466-
OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length));
474+
OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%lx\n", md.start, md.length));
467475

468476
/* setup finish ack ME */
469477
me.start = NULL;
@@ -472,7 +480,7 @@ portals4_init_query(bool enable_progress_threads,
472480
me.min_free = 0;
473481
me.uid = mca_coll_portals4_component.uid;
474482
me.options = PTL_ME_OP_PUT |
475-
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
483+
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
476484
me.match_id.phys.nid = PTL_NID_ANY;
477485
me.match_id.phys.pid = PTL_PID_ANY;
478486
me.match_bits = 0;
@@ -584,6 +592,12 @@ portals4_comm_query(struct ompi_communicator_t *comm,
584592
portals4_module->super.coll_barrier = ompi_coll_portals4_barrier_intra;
585593
portals4_module->super.coll_ibarrier = ompi_coll_portals4_ibarrier_intra;
586594

595+
portals4_module->super.coll_gather = ompi_coll_portals4_gather_intra;
596+
portals4_module->super.coll_igather = ompi_coll_portals4_igather_intra;
597+
598+
portals4_module->cached_in_order_bmtree=NULL;
599+
portals4_module->cached_in_order_bmtree_root=-1;
600+
587601
portals4_module->super.coll_bcast = ompi_coll_portals4_bcast_intra;
588602
portals4_module->super.coll_ibcast = ompi_coll_portals4_ibcast_intra;
589603

@@ -593,6 +607,9 @@ portals4_comm_query(struct ompi_communicator_t *comm,
593607
portals4_module->super.coll_reduce = ompi_coll_portals4_reduce_intra;
594608
portals4_module->super.coll_ireduce = ompi_coll_portals4_ireduce_intra;
595609

610+
portals4_module->barrier_count = 0;
611+
portals4_module->gather_count = 0;
612+
596613
return &(portals4_module->super);
597614
}
598615

@@ -689,9 +706,11 @@ portals4_progress(void)
689706
ompi_coll_portals4_iallreduce_intra_fini(ptl_request);
690707
break;
691708
case OMPI_COLL_PORTALS4_TYPE_SCATTER:
692-
case OMPI_COLL_PORTALS4_TYPE_GATHER:
693709
opal_output(ompi_coll_base_framework.framework_output,
694-
"allreduce is not supported yet\n");
710+
"scatter is not supported yet\n");
711+
break;
712+
case OMPI_COLL_PORTALS4_TYPE_GATHER:
713+
ompi_coll_portals4_igather_intra_fini(ptl_request);
695714
break;
696715
}
697716
}

0 commit comments

Comments
 (0)