@@ -357,4 +357,190 @@ Output variables:
357357 time [0 ] = t_pure ;
358358}
359359
360+ #elif defined MPI4 // NBC
361+
362+ /*************************************************************************/
363+
364+ void IMB_alltoallv_persist (struct comm_info * c_info ,
365+ int size ,
366+ struct iter_schedule * ITERATIONS ,
367+ MODES RUN_MODE ,
368+ double * time )
369+ /*
370+
371+ MPI4 benchmark kernel
372+ Benchmarks MPI_Alltoallv_init
373+
374+ Input variables:
375+
376+ -c_info (type struct comm_info*)
377+ Collection of all base data for MPI;
378+ see [1] for more information
379+
380+
381+ -size (type int)
382+ Basic message size in bytes
383+
384+ -ITERATIONS (type struct iter_schedule *)
385+ Repetition scheduling
386+
387+ -RUN_MODE (type MODES)
388+
389+ Output variables:
390+
391+ -time (type double*)
392+ Timing result per sample
393+
394+ */
395+ {
396+ int i = 0 ;
397+ MPI_Request request ;
398+ MPI_Status status ;
399+ double t_pure = 0. ,
400+ t_comp = 0. ,
401+ t_ovrlp = 0. ;
402+
403+ /* GET SIZE OF DATA TYPE */
404+
405+ if (c_info -> rank != -1 ) {
406+ /* GET PURE TIME. DISPLACEMENTS AND RECEIVE COUNTS WILL BE INITIALIZED HERE */
407+ IMB_alltoallv_pure_persist (c_info , size , ITERATIONS , RUN_MODE , & t_pure );
408+
409+ /* INITIALIZATION CALL */
410+ IMB_cpu_exploit (t_pure , 1 );
411+
412+ IMB_do_n_barriers (c_info -> communicator , N_BARR );
413+
414+ // Create a persistent collective operation
415+ MPI_ERRHAND (MPI_Alltoallv_init ((char * )c_info -> s_buffer + i % ITERATIONS -> s_cache_iter * ITERATIONS -> s_offs ,
416+ c_info -> sndcnt ,
417+ c_info -> sdispl ,
418+ c_info -> s_data_type ,
419+ (char * )c_info -> r_buffer + i % ITERATIONS -> r_cache_iter * ITERATIONS -> r_offs ,
420+ c_info -> reccnt ,
421+ c_info -> rdispl ,
422+ c_info -> r_data_type ,
423+ c_info -> communicator ,
424+ c_info -> info ,
425+ & request ));
426+ for (i = 0 ; i < ITERATIONS -> n_sample ; i ++ ) {
427+ t_ovrlp -= MPI_Wtime ();
428+ // Start the persistent request
429+ MPI_ERRHAND (MPI_Start (& request ));
430+
431+ t_comp -= MPI_Wtime ();
432+ IMB_cpu_exploit (t_pure , 0 );
433+ t_comp += MPI_Wtime ();
434+
435+ MPI_Wait (& request , & status );
436+ t_ovrlp += MPI_Wtime ();
437+
438+ IMB_do_n_barriers (c_info -> communicator , c_info -> sync );
439+ }
440+ // Clean up
441+ MPI_Request_free (& request );
442+
443+ t_ovrlp /= ITERATIONS -> n_sample ;
444+ t_comp /= ITERATIONS -> n_sample ;
445+ }
446+
447+ time [0 ] = t_pure ;
448+ time [1 ] = t_ovrlp ;
449+ time [2 ] = t_comp ;
450+ }
451+
452+ /*************************************************************************/
453+
454+ void IMB_alltoallv_pure_persist (struct comm_info * c_info ,
455+ int size ,
456+ struct iter_schedule * ITERATIONS ,
457+ MODES RUN_MODE ,
458+ double * time )
459+ /*
460+
461+ MPI4 benchmark kernel
462+ Benchmarks IMB_Alltoallv_init
463+
464+ Input variables:
465+
466+ -c_info (type struct comm_info*)
467+ Collection of all base data for MPI;
468+ see [1] for more information
469+
470+ -size (type int)
471+ Basic message size in bytes
472+
473+ -ITERATIONS (type struct iter_schedule *)
474+ Repetition scheduling
475+
476+ -RUN_MODE (type MODES)
477+ (only MPI-2 case: see [1])
478+
479+ Output variables:
480+
481+ -time (type double*)
482+ Timing result per sample
483+
484+ */
485+ {
486+ int i = 0 ;
487+ Type_Size s_size ,
488+ r_size ;
489+ int s_num = 0 ,
490+ r_num = 0 ;
491+ MPI_Request request ;
492+ MPI_Status status ;
493+ double t_pure = 0. ;
494+
495+ /* GET SIZE OF DATA TYPE */
496+ MPI_Type_size (c_info -> s_data_type , & s_size );
497+ MPI_Type_size (c_info -> s_data_type , & r_size );
498+ if ((s_size != 0 ) && (r_size != 0 )) {
499+ s_num = size / s_size ;
500+ r_num = size / r_size ;
501+ }
502+
503+ /* INITIALIZATION OF DISPLACEMENT and SEND/RECEIVE COUNTS */
504+ for (i = 0 ; i < c_info -> num_procs ; i ++ ) {
505+ c_info -> sdispl [i ] = s_num * i ;
506+ c_info -> sndcnt [i ] = s_num ;
507+ c_info -> rdispl [i ] = r_num * i ;
508+ c_info -> reccnt [i ] = r_num ;
509+ }
510+
511+ if (c_info -> rank != -1 ) {
512+ IMB_do_n_barriers (c_info -> communicator , N_BARR );
513+
514+ // Create a persistent collective operation
515+ MPI_ERRHAND (MPI_Alltoallv_init ((char * )c_info -> s_buffer + i % ITERATIONS -> s_cache_iter * ITERATIONS -> s_offs ,
516+ c_info -> sndcnt ,
517+ c_info -> sdispl ,
518+ c_info -> s_data_type ,
519+ (char * )c_info -> r_buffer + i % ITERATIONS -> r_cache_iter * ITERATIONS -> r_offs ,
520+ c_info -> reccnt ,
521+ c_info -> rdispl ,
522+ c_info -> r_data_type ,
523+ c_info -> communicator ,
524+ c_info -> info ,
525+ & request ));
526+ for (i = 0 ; i < ITERATIONS -> n_sample ; i ++ )
527+ {
528+ t_pure -= MPI_Wtime ();
529+ // Start the persistent request
530+ MPI_ERRHAND (MPI_Start (& request ));
531+
532+ MPI_Wait (& request , & status );
533+ t_pure += MPI_Wtime ();
534+
535+ IMB_do_n_barriers (c_info -> communicator , c_info -> sync );
536+ }
537+ // Clean up
538+ MPI_Request_free (& request );
539+
540+ t_pure /= ITERATIONS -> n_sample ;
541+ }
542+
543+ time [0 ] = t_pure ;
544+ }
545+
360546#endif // NBC // MPI1
0 commit comments