@@ -26,6 +26,8 @@ static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *sc
2626 MPI_Datatype datatype );
2727static inline int bcast_sched_chain (int rank , int p , int root , NBC_Schedule * schedule , void * buffer , int count ,
2828 MPI_Datatype datatype , int fragsize , size_t size );
29+ static inline int bcast_sched_knomial (int rank , int comm_size , int root , NBC_Schedule * schedule , void * buf ,
30+ int count , MPI_Datatype datatype , int knomial_radix );
2931
3032#ifdef NBC_CACHE_SCHEDULE
3133/* tree comparison function for schedule cache */
@@ -55,7 +57,7 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
5557#ifdef NBC_CACHE_SCHEDULE
5658 NBC_Bcast_args * args , * found , search ;
5759#endif
58- enum { NBC_BCAST_LINEAR , NBC_BCAST_BINOMIAL , NBC_BCAST_CHAIN } alg ;
60+ enum { NBC_BCAST_LINEAR , NBC_BCAST_BINOMIAL , NBC_BCAST_CHAIN , NBC_BCAST_KNOMIAL } alg ;
5961 ompi_coll_libnbc_module_t * libnbc_module = (ompi_coll_libnbc_module_t * ) module ;
6062
6163 rank = ompi_comm_rank (comm );
@@ -73,25 +75,40 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
7375
7476 segsize = 16384 ;
7577 /* algorithm selection */
76- if ( libnbc_ibcast_skip_dt_decision ) {
77- if (p <= 4 ) {
78- alg = NBC_BCAST_LINEAR ;
78+ if (libnbc_ibcast_algorithm == 0 ) {
79+ if ( libnbc_ibcast_skip_dt_decision ) {
80+ if (p <= 4 ) {
81+ alg = NBC_BCAST_LINEAR ;
82+ }
83+ else {
84+ alg = NBC_BCAST_BINOMIAL ;
85+ }
7986 }
8087 else {
81- alg = NBC_BCAST_BINOMIAL ;
88+ if (p <= 4 ) {
89+ alg = NBC_BCAST_LINEAR ;
90+ } else if (size * count < 65536 ) {
91+ alg = NBC_BCAST_BINOMIAL ;
92+ } else if (size * count < 524288 ) {
93+ alg = NBC_BCAST_CHAIN ;
94+ segsize = 8192 ;
95+ } else {
96+ alg = NBC_BCAST_CHAIN ;
97+ segsize = 32768 ;
98+ }
8299 }
83- }
84- else {
85- if (p <= 4 ) {
100+ } else {
101+ /* user forced dynamic decision */
102+ if (libnbc_ibcast_algorithm == 1 ) {
86103 alg = NBC_BCAST_LINEAR ;
87- } else if (size * count < 65536 ) {
104+ } else if (libnbc_ibcast_algorithm == 2 ) {
88105 alg = NBC_BCAST_BINOMIAL ;
89- } else if (size * count < 524288 ) {
106+ } else if (libnbc_ibcast_algorithm == 3 ) {
90107 alg = NBC_BCAST_CHAIN ;
91- segsize = 8192 ;
108+ } else if (libnbc_ibcast_algorithm == 4 && libnbc_ibcast_knomial_radix > 1 ) {
109+ alg = NBC_BCAST_KNOMIAL ;
92110 } else {
93- alg = NBC_BCAST_CHAIN ;
94- segsize = 32768 ;
111+ alg = NBC_BCAST_LINEAR ;
95112 }
96113 }
97114
@@ -119,6 +136,9 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
119136 case NBC_BCAST_CHAIN :
120137 res = bcast_sched_chain (rank , p , root , schedule , buffer , count , datatype , segsize , size );
121138 break ;
139+ case NBC_BCAST_KNOMIAL :
140+ res = bcast_sched_knomial (rank , p , root , schedule , buffer , count , datatype , libnbc_ibcast_knomial_radix );
141+ break ;
122142 }
123143
124144 if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
@@ -342,6 +362,53 @@ static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *sch
342362 return OMPI_SUCCESS ;
343363}
344364
365+ /*
366+ * bcast_sched_knomial:
367+ *
368+ * Description: an implementation of Ibcast using k-nomial tree algorithm
369+ *
370+ * Time: (radix - 1)O(log_{radix}(comm_size))
371+ * Memory: O(reqs_max)
372+ * Schedule length (rounds): O(log(comm_size))
373+ */
374+ static inline int bcast_sched_knomial (
375+ int rank , int comm_size , int root , NBC_Schedule * schedule , void * buf ,
376+ int count , MPI_Datatype datatype , int knomial_radix )
377+ {
378+ int res = OMPI_SUCCESS ;
379+
380+ /* Receive from parent */
381+ int vrank = (rank - root + comm_size ) % comm_size ;
382+ int mask = 0x1 ;
383+ while (mask < comm_size ) {
384+ if (vrank % (knomial_radix * mask )) {
385+ int parent = vrank / (knomial_radix * mask ) * (knomial_radix * mask );
386+ parent = (parent + root ) % comm_size ;
387+ res = NBC_Sched_recv (buf , false, count , datatype , parent , schedule , true);
388+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) { goto cleanup_and_return ; }
389+ break ;
390+ }
391+ mask *= knomial_radix ;
392+ }
393+ mask /= knomial_radix ;
394+
395+ /* Send data to all children */
396+ while (mask > 0 ) {
397+ for (int r = 1 ; r < knomial_radix ; r ++ ) {
398+ int child = vrank + mask * r ;
399+ if (child < comm_size ) {
400+ child = (child + root ) % comm_size ;
401+ res = NBC_Sched_send (buf , false, count , datatype , child , schedule , false);
402+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) { goto cleanup_and_return ; }
403+ }
404+ }
405+ mask /= knomial_radix ;
406+ }
407+
408+ cleanup_and_return :
409+ return res ;
410+ }
411+
345412static int nbc_bcast_inter_init (void * buffer , int count , MPI_Datatype datatype , int root ,
346413 struct ompi_communicator_t * comm , ompi_request_t * * request ,
347414 struct mca_coll_base_module_2_3_0_t * module , bool persistent ) {
0 commit comments