@@ -487,7 +487,7 @@ static int ompi_osc_ucx_shared_query_peer(ompi_osc_ucx_module_t *module, int pee
487
487
if (UCS_OK != ucp_rkey_ptr (rkey , module -> addrs [peer ], & addr_p )) {
488
488
return OMPI_ERR_NOT_AVAILABLE ;
489
489
}
490
- * size = module -> same_size ? module -> size : module -> sizes [ peer ] ;
490
+ * size = ompi_osc_ucx_get_size ( module , peer ) ;
491
491
* ((void * * ) baseptr ) = addr_p ;
492
492
* disp_unit = (module -> disp_unit < 0 ) ? module -> disp_units [peer ] : module -> disp_unit ;
493
493
@@ -508,39 +508,30 @@ int ompi_osc_ucx_shared_query(struct ompi_win_t *win, int rank, size_t *size,
508
508
509
509
if (MPI_PROC_NULL == rank ) {
510
510
for (int i = 0 ; i < ompi_comm_size (module -> comm ) ; ++ i ) {
511
- if (0 != module -> sizes [ i ] ) {
511
+ if (0 != ompi_osc_ucx_get_size ( module , i ) ) {
512
512
if (OMPI_SUCCESS == ompi_osc_ucx_shared_query_peer (module , i , size , disp_unit , baseptr )) {
513
513
return OMPI_SUCCESS ;
514
514
}
515
515
}
516
516
}
517
517
} else {
518
- if (0 != module -> sizes [ rank ] ) {
518
+ if (0 != ompi_osc_ucx_get_size ( module , rank ) ) {
519
519
return ompi_osc_ucx_shared_query_peer (module , rank , size , disp_unit , baseptr );
520
520
}
521
521
}
522
522
return OMPI_ERR_NOT_SUPPORTED ;
523
523
524
524
} else if (MPI_PROC_NULL != rank ) { // shared memory window with given rank
525
- * size = module -> sizes [ rank ] ;
525
+ * size = ompi_osc_ucx_get_size ( module , rank ) ;
526
526
* ((void * * ) baseptr ) = (void * )module -> shmem_addrs [rank ];
527
- if (module -> disp_unit == -1 ) {
528
- * disp_unit = module -> disp_units [rank ];
529
- } else {
530
- * disp_unit = module -> disp_unit ;
531
- }
527
+ * disp_unit = ompi_osc_ucx_get_disp_unit (module , rank );
532
528
} else { // shared memory window with MPI_PROC_NULL
533
- int i = 0 ;
534
-
535
- for (i = 0 ; i < ompi_comm_size (module -> comm ) ; ++ i ) {
536
- if (0 != module -> sizes [i ]) {
537
- * size = module -> sizes [i ];
529
+ for (int i = 0 ; i < ompi_comm_size (module -> comm ) ; ++ i ) {
530
+ size_t peer_size = ompi_osc_ucx_get_size (module , i );
531
+ if (0 != size ) {
532
+ * size = peer_size ;
538
533
* ((void * * ) baseptr ) = (void * )module -> shmem_addrs [i ];
539
- if (module -> disp_unit == -1 ) {
540
- * disp_unit = module -> disp_units [rank ];
541
- } else {
542
- * disp_unit = module -> disp_unit ;
543
- }
534
+ * disp_unit = ompi_osc_ucx_get_disp_unit (module , rank );
544
535
break ;
545
536
}
546
537
}
@@ -566,7 +557,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
566
557
uint64_t my_info [3 ] = {0 };
567
558
char * recv_buf = NULL ;
568
559
void * dynamic_base = NULL ;
569
- unsigned long total , * rbuf ;
560
+ unsigned long adjusted_size = size ;
570
561
int flag ;
571
562
size_t pagesize ;
572
563
bool unlink_needed = false;
@@ -680,12 +671,42 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
680
671
module -> acc_single_intrinsic = check_config_value_bool ("acc_single_intrinsic" , info );
681
672
module -> skip_sync_check = false;
682
673
674
+ if (flavor == MPI_WIN_FLAVOR_SHARED ) {
675
+ opal_output_verbose (MCA_BASE_VERBOSE_DEBUG , ompi_osc_base_framework .framework_output ,
676
+ "allocating shared memory region of size %ld\n" , (long ) size );
677
+ /* get the pagesize */
678
+ pagesize = opal_getpagesize ();
679
+
680
+ /* Note that the alloc_shared_noncontig info key only has
681
+ * meaning during window creation. Once the window is
682
+ * created, we can't move memory around without making
683
+ * everything miserable. So we intentionally do not subscribe
684
+ * to updates on the info key, because there's no useful
685
+ * update to occur. */
686
+ module -> noncontig_shared_win = false;
687
+ if (OMPI_SUCCESS != opal_info_get_bool (info , "alloc_shared_noncontig" ,
688
+ & module -> noncontig_shared_win , & flag )) {
689
+ err = OMPI_ERR_BAD_PARAM ;
690
+ goto error ;
691
+ }
692
+
693
+ if (module -> noncontig_shared_win ) {
694
+ opal_output_verbose (MCA_BASE_VERBOSE_DEBUG , ompi_osc_base_framework .framework_output ,
695
+ "allocating window using non-contiguous strategy" );
696
+ adjusted_size = ((size - 1 ) / pagesize + 1 ) * pagesize ;
697
+ } else {
698
+ opal_output_verbose (MCA_BASE_VERBOSE_DEBUG , ompi_osc_base_framework .framework_output ,
699
+ "allocating window using contiguous strategy" );
700
+ adjusted_size = size ;
701
+ }
702
+ }
703
+
683
704
/* share everyone's displacement units. Only do an allgather if
684
705
strictly necessary, since it requires O(p) state. */
685
706
values [0 ] = disp_unit ;
686
707
values [1 ] = - disp_unit ;
687
- values [2 ] = size ;
688
- values [3 ] = - (int64_t ) size ;
708
+ values [2 ] = adjusted_size ;
709
+ values [3 ] = - (long ) adjusted_size ;
689
710
690
711
ret = module -> comm -> c_coll -> coll_allreduce (MPI_IN_PLACE , values , 4 , MPI_LONG ,
691
712
MPI_MIN , module -> comm ,
@@ -710,7 +731,6 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
710
731
}
711
732
712
733
if (!same_disp_unit || !same_size ) {
713
-
714
734
ret = module -> comm -> c_coll -> coll_allgather (values , val_count * sizeof (long ), MPI_BYTE ,
715
735
(void * )my_info , sizeof (long ) * val_count , MPI_BYTE ,
716
736
module -> comm ,
@@ -743,7 +763,6 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
743
763
module -> sizes [i ] = (size_t )values [i * val_count + val_count - 1 ];
744
764
}
745
765
}
746
-
747
766
}
748
767
749
768
ret = opal_common_ucx_wpctx_create (mca_osc_ucx_component .wpool , comm_size ,
@@ -755,50 +774,14 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
755
774
756
775
if (flavor == MPI_WIN_FLAVOR_SHARED ) {
757
776
/* create the segment */
758
- opal_output_verbose (MCA_BASE_VERBOSE_DEBUG , ompi_osc_base_framework .framework_output ,
759
- "allocating shared memory region of size %ld\n" , (long ) size );
760
- /* get the pagesize */
761
- pagesize = opal_getpagesize ();
762
-
763
- rbuf = malloc (sizeof (unsigned long ) * comm_size );
764
- if (NULL == rbuf ) return OMPI_ERR_TEMP_OUT_OF_RESOURCE ;
765
-
766
- /* Note that the alloc_shared_noncontig info key only has
767
- * meaning during window creation. Once the window is
768
- * created, we can't move memory around without making
769
- * everything miserable. So we intentionally do not subscribe
770
- * to updates on the info key, because there's no useful
771
- * update to occur. */
772
- module -> noncontig_shared_win = false;
773
- if (OMPI_SUCCESS != opal_info_get_bool (info , "alloc_shared_noncontig" ,
774
- & module -> noncontig_shared_win , & flag )) {
775
- free (rbuf );
776
- goto error ;
777
- }
778
-
779
- if (module -> noncontig_shared_win ) {
780
- opal_output_verbose (MCA_BASE_VERBOSE_DEBUG , ompi_osc_base_framework .framework_output ,
781
- "allocating window using non-contiguous strategy" );
782
- total = ((size - 1 ) / pagesize + 1 ) * pagesize ;
783
- } else {
784
- opal_output_verbose (MCA_BASE_VERBOSE_DEBUG , ompi_osc_base_framework .framework_output ,
785
- "allocating window using contiguous strategy" );
786
- total = size ;
787
- }
788
- ret = module -> comm -> c_coll -> coll_allgather (& total , 1 , MPI_UNSIGNED_LONG ,
789
- rbuf , 1 , MPI_UNSIGNED_LONG ,
790
- module -> comm ,
791
- module -> comm -> c_coll -> coll_allgather_module );
792
- if (OMPI_SUCCESS != ret ) return ret ;
793
777
794
- total = 0 ;
778
+ unsigned long total = 0 ;
795
779
for (i = 0 ; i < comm_size ; ++ i ) {
796
- total += rbuf [ i ] ;
780
+ total += ompi_osc_ucx_get_size ( module , i ) ;
797
781
}
798
782
799
783
module -> segment_base = NULL ;
800
784
module -> shmem_addrs = NULL ;
801
- module -> sizes = NULL ;
802
785
803
786
if (total != 0 ) {
804
787
/* user opal/shmem directly to create a shared memory segment */
@@ -809,14 +792,12 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
809
792
OMPI_PROC_MY_NAME -> jobid , (int ) OMPI_PROC_MY_NAME -> vpid ,
810
793
ompi_comm_print_cid (module -> comm ));
811
794
if (ret < 0 ) {
812
- free (rbuf );
813
795
return OMPI_ERR_OUT_OF_RESOURCE ;
814
796
}
815
797
816
798
ret = opal_shmem_segment_create (& module -> seg_ds , data_file , total );
817
799
free (data_file );
818
800
if (OPAL_SUCCESS != ret ) {
819
- free (rbuf );
820
801
goto error ;
821
802
}
822
803
@@ -826,20 +807,18 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
826
807
ret = module -> comm -> c_coll -> coll_bcast (& module -> seg_ds , sizeof (module -> seg_ds ), MPI_BYTE , 0 ,
827
808
module -> comm , module -> comm -> c_coll -> coll_bcast_module );
828
809
if (OMPI_SUCCESS != ret ) {
829
- free (rbuf );
830
810
goto error ;
831
811
}
832
812
833
813
module -> segment_base = opal_shmem_segment_attach (& module -> seg_ds );
834
814
if (NULL == module -> segment_base ) {
835
- free ( rbuf ) ;
815
+ ret = OMPI_ERR_OUT_OF_RESOURCE ;
836
816
goto error ;
837
817
}
838
818
839
819
/* wait for all processes to attach */
840
820
ret = module -> comm -> c_coll -> coll_barrier (module -> comm , module -> comm -> c_coll -> coll_barrier_module );
841
821
if (OMPI_SUCCESS != ret ) {
842
- free (rbuf );
843
822
goto error ;
844
823
}
845
824
@@ -854,47 +833,26 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
854
833
* different between different processes. To use direct load/store,
855
834
* shmem_addrs can be used, however, for RDMA, virtual address of
856
835
* remote process that will be stored in module->addrs should be used */
857
- module -> sizes = malloc (sizeof (size_t ) * comm_size );
858
- if (NULL == module -> sizes ) {
859
- free (rbuf );
860
- ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE ;
861
- goto error ;
862
- }
863
836
module -> shmem_addrs = malloc (sizeof (uint64_t ) * comm_size );
864
837
if (NULL == module -> shmem_addrs ) {
865
838
free (module -> sizes );
866
- free (rbuf );
867
839
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE ;
868
840
goto error ;
869
841
}
870
842
871
843
872
844
for (i = 0 , total = 0 ; i < comm_size ; ++ i ) {
873
- module -> sizes [ i ] = rbuf [ i ] ;
874
- if (module -> sizes [ i ] || !module -> noncontig_shared_win ) {
845
+ size_t size = ompi_osc_ucx_get_size ( module , i ) ;
846
+ if (size || !module -> noncontig_shared_win ) {
875
847
module -> shmem_addrs [i ] = ((uint64_t ) module -> segment_base ) + total ;
876
- total += rbuf [ i ] ;
848
+ total += size ;
877
849
} else {
878
850
module -> shmem_addrs [i ] = (uint64_t )NULL ;
879
851
}
880
852
}
881
853
882
- free (rbuf );
883
-
884
- module -> size = module -> sizes [ompi_comm_rank (module -> comm )];
854
+ module -> size = ompi_osc_ucx_get_size (module , ompi_comm_rank (module -> comm ));
885
855
* base = (void * )module -> shmem_addrs [ompi_comm_rank (module -> comm )];
886
- } else {
887
- /* non-shared memory: exchange sizes and addresses so they can be queried for shared memory */
888
- for (i = 0 ; i < comm_size ; i ++ ) {
889
- ompi_proc_t * peer = ompi_comm_peer_lookup (module -> comm , i );
890
- peer ->
891
- if (ompi_comm_peer_lookup (module -> comm , i ) == NULL ) {
892
- OSC_UCX_ERROR ("Failed to lookup peer %d in communicator %s" , i , ompi_comm_print_cid (module -> comm ));
893
- ret = OMPI_ERR_COMM_FAILURE ;
894
- goto error ;
895
- }
896
- }
897
-
898
856
}
899
857
900
858
void * * mem_base = base ;
@@ -1030,6 +988,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, pt
1030
988
error :
1031
989
if (module -> disp_units ) free (module -> disp_units );
1032
990
if (module -> comm ) ompi_comm_free (& module -> comm );
991
+ if (module -> sizes ) ompi_comm_free (& module -> sizes );
1033
992
free (module );
1034
993
module = NULL ;
1035
994
@@ -1256,8 +1215,6 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) {
1256
1215
opal_shmem_segment_detach (& module -> seg_ds );
1257
1216
if (module -> shmem_addrs != NULL )
1258
1217
free (module -> shmem_addrs );
1259
- if (module -> sizes != NULL )
1260
- free (module -> sizes );
1261
1218
}
1262
1219
1263
1220
if (module -> flavor == MPI_WIN_FLAVOR_DYNAMIC ) {
0 commit comments