2424#include "ompi/datatype/ompi_datatype_internal.h"
2525#include "ompi/op/op.h"
2626#include "ompi/mca/mca.h"
27+ #include "opal/datatype/opal_convertor.h"
2728#include "ompi/mca/coll/coll.h"
2829#include "ompi/request/request.h"
2930#include "ompi/communicator/communicator.h"
3031#include "ompi/mca/coll/base/base.h"
32+ #include "ompi/datatype/ompi_datatype.h"
33+ #include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
3134
3235#include "ompi/mca/mtl/portals4/mtl_portals4.h"
3336
37+ #define MAXTREEFANOUT 32
38+
3439BEGIN_C_DECLS
3540
3641#define COLL_PORTALS4_NO_OP ((ptl_op_t)-1)
@@ -61,10 +66,27 @@ struct mca_coll_portals4_component_t {
6166
6267 ptl_ni_limits_t ni_limits ;
6368
69+ int use_binomial_gather_algorithm ;
70+
6471};
6572typedef struct mca_coll_portals4_component_t mca_coll_portals4_component_t ;
6673OMPI_MODULE_DECLSPEC extern mca_coll_portals4_component_t mca_coll_portals4_component ;
6774
75+
76+ /*
77+ * Borrowed with thanks from the coll-tuned component, then modified for Portals4.
78+ */
79+ typedef struct ompi_coll_portals4_tree_t {
80+ int32_t tree_root ;
81+ int32_t tree_fanout ;
82+ int32_t tree_bmtree ;
83+ int32_t tree_prev ;
84+ int32_t tree_next [MAXTREEFANOUT ];
85+ int32_t tree_nextsize ;
86+ int32_t tree_numdescendants ;
87+ } ompi_coll_portals4_tree_t ;
88+
89+
6890struct mca_coll_portals4_module_t {
6991 mca_coll_base_module_t super ;
7092 size_t coll_count ;
@@ -79,6 +101,13 @@ struct mca_coll_portals4_module_t {
79101 mca_coll_base_module_t * previous_allreduce_module ;
80102 mca_coll_base_module_iallreduce_fn_t previous_iallreduce ;
81103 mca_coll_base_module_t * previous_iallreduce_module ;
104+
105+ /* binomial tree */
106+ ompi_coll_portals4_tree_t * cached_in_order_bmtree ;
107+ int cached_in_order_bmtree_root ;
108+
109+ size_t barrier_count ;
110+ size_t gather_count ;
82111};
83112typedef struct mca_coll_portals4_module_t mca_coll_portals4_module_t ;
84113OBJ_CLASS_DECLARATION (mca_coll_portals4_module_t );
135164opal_stderr (const char * msg , const char * file ,
136165 const int line , const int ret );
137166
167+ /*
168+ * Borrowed with thanks from the coll-tuned component.
169+ */
170+ #define COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE ( OMPI_COMM , PORTALS4_MODULE , ROOT ) \
171+ do { \
172+ if( !( ((PORTALS4_MODULE)->cached_in_order_bmtree) \
173+ && ((PORTALS4_MODULE)->cached_in_order_bmtree_root == (ROOT)) ) ) { \
174+ if( (PORTALS4_MODULE)->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
175+ ompi_coll_portals4_destroy_tree ( & ((PORTALS4_MODULE )-> cached_in_order_bmtree ) ); \
176+ } \
177+ (PORTALS4_MODULE )-> cached_in_order_bmtree = ompi_coll_portals4_build_in_order_bmtree ( (OMPI_COMM ), (ROOT ) ); \
178+ (PORTALS4_MODULE )-> cached_in_order_bmtree_root = (ROOT ); \
179+ } \
180+ } while (0 )
181+
182+
138183int ompi_coll_portals4_barrier_intra (struct ompi_communicator_t * comm ,
139184 mca_coll_base_module_t * module );
140185int ompi_coll_portals4_ibarrier_intra (struct ompi_communicator_t * comm ,
@@ -177,6 +222,20 @@ int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int
177222int
178223ompi_coll_portals4_iallreduce_intra_fini (struct ompi_coll_portals4_request_t * request );
179224
225+ int ompi_coll_portals4_gather_intra (const void * sbuf , int scount , struct ompi_datatype_t * sdtype ,
226+ void * rbuf , int rcount , struct ompi_datatype_t * rdtype ,
227+ int root ,
228+ struct ompi_communicator_t * comm ,
229+ mca_coll_base_module_t * module );
230+ int ompi_coll_portals4_igather_intra (const void * sbuf , int scount , struct ompi_datatype_t * sdtype ,
231+ void * rbuf , int rcount , struct ompi_datatype_t * rdtype ,
232+ int root ,
233+ struct ompi_communicator_t * comm ,
234+ ompi_request_t * * request ,
235+ mca_coll_base_module_t * module );
236+ int ompi_coll_portals4_igather_intra_fini (struct ompi_coll_portals4_request_t * request );
237+
238+
180239static inline ptl_process_t
181240ompi_coll_portals4_get_peer (struct ompi_communicator_t * comm , int rank )
182241{
@@ -357,6 +416,43 @@ void get_k_ary_tree(const unsigned int k_ary,
357416 return ;
358417}
359418
419+
420+ static inline void
421+ ompi_coll_portals4_create_recv_converter (opal_convertor_t * converter ,
422+ void * target ,
423+ ompi_proc_t * proc ,
424+ int count ,
425+ ompi_datatype_t * datatype )
426+ {
427+ /* create converter */
428+ OBJ_CONSTRUCT (converter , opal_convertor_t );
429+
430+ /* initialize converter */
431+ opal_convertor_copy_and_prepare_for_recv (proc -> super .proc_convertor ,
432+ & datatype -> super ,
433+ count ,
434+ target ,
435+ 0 ,
436+ converter );
437+ }
438+
439+ static inline void
440+ ompi_coll_portals4_create_send_converter (opal_convertor_t * converter ,
441+ const void * source ,
442+ ompi_proc_t * proc ,
443+ int count ,
444+ ompi_datatype_t * datatype )
445+ {
446+ OBJ_CONSTRUCT (converter , opal_convertor_t );
447+
448+ opal_convertor_copy_and_prepare_for_send (proc -> super .proc_convertor ,
449+ & datatype -> super ,
450+ count ,
451+ source ,
452+ 0 ,
453+ converter );
454+ }
455+
360456END_C_DECLS
361457
362458#endif /* MCA_COLL_PORTALS4_EXPORT_H */
0 commit comments