@@ -1071,28 +1071,21 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count,
10711071 return ret ;
10721072 }
10731073
1074- mca_osc_ucx_component .num_incomplete_req_ops ++ ;
1075- ret = opal_common_ucx_wpmem_flush_ep_nb (mem , target , req_completion , ucx_req );
1074+ ret = opal_common_ucx_wpmem_fence (mem );
1075+ if (ret != OMPI_SUCCESS ) {
1076+ OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1077+ return OMPI_ERROR ;
1078+ }
10761079
1080+ mca_osc_ucx_component .num_incomplete_req_ops ++ ;
1081+ /* TODO: investigate whether ucp_worker_flush_nb is a better choice here */
1082+ ret = opal_common_ucx_wpmem_fetch_nb (module -> state_mem , UCP_ATOMIC_FETCH_OP_FADD ,
1083+ 0 , target , & (module -> req_result ),
1084+ sizeof (uint64_t ), remote_addr & (~0x7 ),
1085+ req_completion , ucx_req );
10771086 if (ret != OMPI_SUCCESS ) {
10781087 OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
10791088 return ret ;
1080-
1081- /* fallback to using an atomic op to acquire a request handle */
1082- ret = opal_common_ucx_wpmem_fence (mem );
1083- if (ret != OMPI_SUCCESS ) {
1084- OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1085- return OMPI_ERROR ;
1086- }
1087-
1088- ret = opal_common_ucx_wpmem_fetch_nb (mem , UCP_ATOMIC_FETCH_OP_FADD ,
1089- 0 , target , & (module -> req_result ),
1090- sizeof (uint64_t ), remote_addr & (~0x7 ),
1091- req_completion , ucx_req );
1092- if (ret != OMPI_SUCCESS ) {
1093- OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
1094- return ret ;
1095- }
10961089 }
10971090
10981091 * request = & ucx_req -> super ;
@@ -1127,28 +1120,21 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count,
11271120 return ret ;
11281121 }
11291122
1130- mca_osc_ucx_component .num_incomplete_req_ops ++ ;
1131- ret = opal_common_ucx_wpmem_flush_ep_nb (mem , target , req_completion , ucx_req );
1123+ ret = opal_common_ucx_wpmem_fence (mem );
1124+ if (ret != OMPI_SUCCESS ) {
1125+ OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1126+ return OMPI_ERROR ;
1127+ }
11321128
1129+ mca_osc_ucx_component .num_incomplete_req_ops ++ ;
1130+ /* TODO: investigate whether ucp_worker_flush_nb is a better choice here */
1131+ ret = opal_common_ucx_wpmem_fetch_nb (module -> state_mem , UCP_ATOMIC_FETCH_OP_FADD ,
1132+ 0 , target , & (module -> req_result ),
1133+ sizeof (uint64_t ), remote_addr & (~0x7 ),
1134+ req_completion , ucx_req );
11331135 if (ret != OMPI_SUCCESS ) {
11341136 OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
11351137 return ret ;
1136-
1137- /* fallback to using an atomic op to acquire a request handle */
1138- ret = opal_common_ucx_wpmem_fence (mem );
1139- if (ret != OMPI_SUCCESS ) {
1140- OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1141- return OMPI_ERROR ;
1142- }
1143-
1144- ret = opal_common_ucx_wpmem_fetch_nb (mem , UCP_ATOMIC_FETCH_OP_FADD ,
1145- 0 , target , & (module -> req_result ),
1146- sizeof (uint64_t ), remote_addr & (~0x7 ),
1147- req_completion , ucx_req );
1148- if (ret != OMPI_SUCCESS ) {
1149- OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
1150- return ret ;
1151- }
11521138 }
11531139
11541140 * request = & ucx_req -> super ;
0 commit comments