1010 * Copyright (c) 2019-2021 Google, LLC. All rights reserved.
1111 * Copyright (c) 2021 IBM Corporation. All rights reserved.
1212 * Copyright (c) 2022 Cisco Systems, Inc. All rights reserved
13+ * Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
14+ * All Rights reserved.
1315 * $COPYRIGHT$
1416 *
1517 * Additional copyrights may follow
1618 *
1719 * $HEADER$
1820 */
1921
22+ #include "ompi_config.h"
23+
2024#include "osc_rdma_accumulate.h"
2125#include "osc_rdma_request.h"
2226#include "osc_rdma_comm.h"
27+ #include "osc_rdma_lock.h"
28+ #include "osc_rdma_btl_comm.h"
2329
30+ #include "opal/util/minmax.h"
2431#include "ompi/mca/osc/base/base.h"
2532#include "ompi/mca/osc/base/osc_base_obj_convert.h"
2633
@@ -157,13 +164,11 @@ static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const
157164 mca_btl_base_registration_handle_t * target_handle , ompi_op_t * op , ompi_osc_rdma_request_t * req )
158165{
159166 ompi_osc_rdma_module_t * module = sync -> module ;
160- mca_btl_base_module_t * selected_btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
161- int32_t atomic_flags = selected_btl -> btl_atomic_flags ;
162167 int btl_op , flags ;
163168 int64_t origin ;
164169
165- if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags ) && 4 == extent )) ||
166- (!(OMPI_DATATYPE_FLAG_DATA_INT & dt -> super .flags ) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags )) ||
170+ if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & module -> atomic_flags ) && 4 == extent )) ||
171+ (!(OMPI_DATATYPE_FLAG_DATA_INT & dt -> super .flags ) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & module -> atomic_flags )) ||
167172 !ompi_op_is_intrinsic (op ) || (0 == ompi_osc_rdma_op_mapping [op -> op_type ])) {
168173 return OMPI_ERR_NOT_SUPPORTED ;
169174 }
@@ -235,19 +240,11 @@ static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const vo
235240 ompi_op_t * op , ompi_osc_rdma_request_t * req )
236241{
237242 ompi_osc_rdma_module_t * module = sync -> module ;
238- mca_btl_base_module_t * selected_btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
239- int32_t atomic_flags = selected_btl -> btl_atomic_flags ;
240243 int btl_op , flags ;
241244 int64_t origin ;
242245
243- if (!(selected_btl -> btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS )) {
244- /* btl put atomics not supported or disabled. fall back on fetch-and-op */
245- return ompi_osc_rdma_fetch_and_op_atomic (sync , origin_addr , NULL , dt , extent , peer , target_address , target_handle ,
246- op , req );
247- }
248-
249- if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags ) && 4 == extent )) ||
250- (!(OMPI_DATATYPE_FLAG_DATA_INT & dt -> super .flags ) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags )) ||
246+ if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & module -> atomic_flags ) && 4 == extent )) ||
247+ (!(OMPI_DATATYPE_FLAG_DATA_INT & dt -> super .flags ) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & module -> atomic_flags )) ||
251248 !ompi_op_is_intrinsic (op ) || (0 == ompi_osc_rdma_op_mapping [op -> op_type ])) {
252249 return OMPI_ERR_NOT_SUPPORTED ;
253250 }
@@ -585,9 +582,9 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
585582
586583 /* determine how much to put in this operation */
587584 if (source_count ) {
588- acc_len = min ( min (target_iovec [target_iov_index ].iov_len , source_iovec [source_iov_index ].iov_len ), acc_limit );
585+ acc_len = opal_min ( opal_min (target_iovec [target_iov_index ].iov_len , source_iovec [source_iov_index ].iov_len ), acc_limit );
589586 } else {
590- acc_len = min (target_iovec [target_iov_index ].iov_len , acc_limit );
587+ acc_len = opal_min (target_iovec [target_iov_index ].iov_len , acc_limit );
591588 }
592589
593590 if (0 != acc_len ) {
@@ -662,13 +659,11 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo
662659 bool lock_acquired )
663660{
664661 ompi_osc_rdma_module_t * module = sync -> module ;
665- mca_btl_base_module_t * btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
666- int32_t atomic_flags = btl -> btl_atomic_flags ;
667662 const size_t size = datatype -> super .size ;
668663 int64_t compare , source ;
669664 int flags , ret ;
670665
671- if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags ))) {
666+ if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & module -> atomic_flags ))) {
672667 return OMPI_ERR_NOT_SUPPORTED ;
673668 }
674669
@@ -716,7 +711,6 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
716711 mca_btl_base_registration_handle_t * target_handle , bool lock_acquired )
717712{
718713 ompi_osc_rdma_module_t * module = sync -> module ;
719- mca_btl_base_module_t * btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
720714 unsigned long len = datatype -> super .size ;
721715 mca_btl_base_registration_handle_t * local_handle = NULL ;
722716 ompi_osc_rdma_frag_t * frag = NULL ;
@@ -741,26 +735,30 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
741735 return OMPI_SUCCESS ;
742736 }
743737
744- if (btl -> btl_register_mem && len > btl -> btl_put_local_registration_threshold ) {
745- do {
746- ret = ompi_osc_rdma_frag_alloc (module , len , & frag , & ptr );
747- if (OPAL_UNLIKELY (OMPI_SUCCESS == ret )) {
748- break ;
749- }
738+ if (module -> use_memory_registration ) {
739+ mca_btl_base_module_t * btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
740+ if (len > btl -> btl_put_local_registration_threshold ) {
741+ do {
742+ ret = ompi_osc_rdma_frag_alloc (module , len , & frag , & ptr );
743+ if (OPAL_UNLIKELY (OMPI_SUCCESS == ret )) {
744+ break ;
745+ }
750746
751- ompi_osc_rdma_progress (module );
752- } while (1 );
747+ ompi_osc_rdma_progress (module );
748+ } while (1 );
753749
754- memcpy (ptr , source_addr , len );
755- local_handle = frag -> handle ;
750+ memcpy (ptr , source_addr , len );
751+ local_handle = frag -> handle ;
752+ }
756753 }
757754
758755 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "RDMA compare-and-swap initiating blocking btl put..." );
759756
760757 do {
761- ret = btl -> btl_put (btl , peer -> data_endpoint , ptr , target_address ,
762- local_handle , target_handle , len , 0 , MCA_BTL_NO_ORDER ,
763- ompi_osc_rdma_cas_put_complete , (void * ) & complete , NULL );
758+ ret = ompi_osc_rdma_btl_put (module , peer -> data_btl_index , peer -> data_endpoint ,
759+ ptr , target_address , local_handle , target_handle ,
760+ len , 0 , MCA_BTL_NO_ORDER ,
761+ ompi_osc_rdma_cas_put_complete , (void * ) & complete , NULL );
764762 if (OPAL_SUCCESS == ret || (OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret )) {
765763 break ;
766764 }
0 commit comments