@@ -48,18 +48,47 @@ typedef struct ompi_osc_rdma_pending_post_t ompi_osc_rdma_pending_post_t;
4848
4949static OBJ_CLASS_INSTANCE (ompi_osc_rdma_pending_post_t , opal_list_item_t , NULL, NULL) ;
5050
51+ static void ompi_osc_rdma_pending_op_construct (ompi_osc_rdma_pending_op_t * pending_op )
52+ {
53+ pending_op -> op_frag = NULL ;
54+ pending_op -> op_buffer = NULL ;
55+ pending_op -> op_result = NULL ;
56+ pending_op -> op_complete = false;
57+ }
58+
59+ static void ompi_osc_rdma_pending_op_destruct (ompi_osc_rdma_pending_op_t * pending_op )
60+ {
61+ if (NULL != pending_op -> op_frag ) {
62+ ompi_osc_rdma_frag_complete (pending_op -> op_frag );
63+ }
64+
65+ ompi_osc_rdma_pending_op_construct (pending_op );
66+ }
67+
68+ OBJ_CLASS_INSTANCE (ompi_osc_rdma_pending_op_t , opal_list_item_t ,
69+ ompi_osc_rdma_pending_op_construct ,
70+ ompi_osc_rdma_pending_op_destruct );
71+
5172/**
5273 * Dummy completion function for atomic operations
5374 */
5475void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t * btl , struct mca_btl_base_endpoint_t * endpoint ,
5576 void * local_address , mca_btl_base_registration_handle_t * local_handle ,
5677 void * context , void * data , int status )
5778{
58- volatile bool * atomic_complete = (volatile bool * ) context ;
79+ ompi_osc_rdma_pending_op_t * pending_op = (ompi_osc_rdma_pending_op_t * ) context ;
5980
60- if (atomic_complete ) {
61- * atomic_complete = true ;
81+ if (pending_op -> op_result ) {
82+ memmove ( pending_op -> op_result , pending_op -> op_buffer , pending_op -> op_size ) ;
6283 }
84+
85+ if (NULL != pending_op -> op_frag ) {
86+ ompi_osc_rdma_frag_complete (pending_op -> op_frag );
87+ pending_op -> op_frag = NULL ;
88+ }
89+
90+ pending_op -> op_complete = true;
91+ OBJ_RELEASE (pending_op );
6392}
6493
6594/**
@@ -182,9 +211,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
182211 ompi_osc_rdma_peer_t * * peers ;
183212 int my_rank = ompi_comm_rank (module -> comm );
184213 ompi_osc_rdma_state_t * state = module -> state ;
185- volatile bool atomic_complete ;
186- ompi_osc_rdma_frag_t * frag = NULL ;
187- osc_rdma_counter_t * temp = NULL ;
188214 int ret ;
189215
190216 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "post: %p, %d, %s" , (void * ) group , assert , win -> w_name );
@@ -212,9 +238,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
212238 state -> num_complete_msgs = 0 ;
213239 OPAL_THREAD_UNLOCK (& module -> lock );
214240
215- /* allocate a temporary buffer for atomic response */
216- ret = ompi_osc_rdma_frag_alloc (module , 8 , & frag , (char * * ) & temp );
217-
218241 if ((assert & MPI_MODE_NOCHECK ) || 0 == ompi_group_size (group )) {
219242 return OMPI_SUCCESS ;
220243 }
@@ -226,7 +249,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
226249 /* translate group ranks into the communicator */
227250 peers = ompi_osc_rdma_get_peers (module , module -> pw_group );
228251 if (OPAL_UNLIKELY (NULL == peers )) {
229- ompi_osc_rdma_frag_complete (frag );
230252 return OMPI_ERR_OUT_OF_RESOURCE ;
231253 }
232254
@@ -236,65 +258,40 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
236258 for (int i = 0 ; i < ompi_group_size (module -> pw_group ) ; ++ i ) {
237259 ompi_osc_rdma_peer_t * peer = peers [i ];
238260 uint64_t target = (uint64_t ) (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , post_index );
239- int post_index ;
261+ ompi_osc_rdma_lock_t post_index ;
240262
241263 if (peer -> rank == my_rank ) {
242264 ompi_osc_rdma_handle_post (module , my_rank , NULL , 0 );
243265 continue ;
244266 }
245267
246268 /* get a post index */
247- atomic_complete = false;
248269 if (!ompi_osc_rdma_peer_local_state (peer )) {
249- do {
250- ret = module -> selected_btl -> btl_atomic_fop (module -> selected_btl , peer -> state_endpoint , temp , target , frag -> handle ,
251- peer -> state_handle , MCA_BTL_ATOMIC_ADD , 1 , 0 , MCA_BTL_NO_ORDER ,
252- ompi_osc_rdma_atomic_complete , (void * ) & atomic_complete , NULL );
253- assert (OPAL_SUCCESS >= ret );
254-
255- if (OMPI_SUCCESS == ret ) {
256- while (!atomic_complete ) {
257- ompi_osc_rdma_progress (module );
258- }
259-
260- break ;
261- }
262-
263- ompi_osc_rdma_progress (module );
264- } while (1 );
270+ ret = ompi_osc_rdma_lock_btl_fop (module , peer , target , MCA_BTL_ATOMIC_ADD , 1 , & post_index , true);
271+ assert (OMPI_SUCCESS == ret );
265272 } else {
266- * temp = ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) (intptr_t ) target , 1 ) - 1 ;
273+ post_index = ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) (intptr_t ) target , 1 ) - 1 ;
267274 }
268- post_index = (* temp ) & (OMPI_OSC_RDMA_POST_PEER_MAX - 1 );
275+
276+ post_index &= OMPI_OSC_RDMA_POST_PEER_MAX - 1 ;
269277
270278 target = (uint64_t ) (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , post_peers ) +
271279 sizeof (osc_rdma_counter_t ) * post_index ;
272280
273281 do {
282+ ompi_osc_rdma_lock_t result ;
283+
274284 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "attempting to post to index %d @ rank %d" , post_index , peer -> rank );
275285
276286 /* try to post. if the value isn't 0 then another rank is occupying this index */
277287 if (!ompi_osc_rdma_peer_local_state (peer )) {
278- atomic_complete = false;
279- ret = module -> selected_btl -> btl_atomic_cswap (module -> selected_btl , peer -> state_endpoint , temp , target , frag -> handle , peer -> state_handle ,
280- 0 , 1 + (int64_t ) my_rank , 0 , MCA_BTL_NO_ORDER , ompi_osc_rdma_atomic_complete ,
281- (void * ) & atomic_complete , NULL );
282- assert (OPAL_SUCCESS >= ret );
283-
284- if (OMPI_SUCCESS == ret ) {
285- while (!atomic_complete ) {
286- ompi_osc_rdma_progress (module );
287- }
288- } else {
289- ompi_osc_rdma_progress (module );
290- continue ;
291- }
292-
288+ ret = ompi_osc_rdma_lock_btl_cswap (module , peer , target , 0 , 1 + (int64_t ) my_rank , & result );
289+ assert (OMPI_SUCCESS == ret );
293290 } else {
294- * temp = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t * ) target , 0 , 1 + (osc_rdma_counter_t ) my_rank );
291+ result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t * ) target , 0 , 1 + (osc_rdma_counter_t ) my_rank );
295292 }
296293
297- if (OPAL_LIKELY (0 == * temp )) {
294+ if (OPAL_LIKELY (0 == result )) {
298295 break ;
299296 }
300297
@@ -313,8 +310,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
313310 } while (1 );
314311 }
315312
316- ompi_osc_rdma_frag_complete (frag );
317-
318313 ompi_osc_rdma_release_peers (peers , ompi_group_size (module -> pw_group ));
319314
320315 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "post complete" );
@@ -422,9 +417,7 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
422417{
423418 ompi_osc_rdma_module_t * module = GET_MODULE (win );
424419 ompi_osc_rdma_sync_t * sync = & module -> all_sync ;
425- ompi_osc_rdma_frag_t * frag = NULL ;
426420 ompi_osc_rdma_peer_t * * peers ;
427- void * scratch_lock = NULL ;
428421 ompi_group_t * group ;
429422 int group_size , ret ;
430423
@@ -459,45 +452,19 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
459452
460453 ompi_osc_rdma_sync_rdma_complete (sync );
461454
462- if (!(MCA_BTL_FLAGS_ATOMIC_OPS & module -> selected_btl -> btl_flags )) {
463- /* need a temporary buffer for performing fetching atomics */
464- ret = ompi_osc_rdma_frag_alloc (module , 8 , & frag , (char * * ) & scratch_lock );
465- if (OPAL_UNLIKELY (OPAL_SUCCESS != ret )) {
466- return ret ;
467- }
468- }
469-
470455 /* for each process in the group increment their number of complete messages */
471456 for (int i = 0 ; i < group_size ; ++ i ) {
472457 ompi_osc_rdma_peer_t * peer = peers [i ];
473458 intptr_t target = (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , num_complete_msgs );
474459
475460 if (!ompi_osc_rdma_peer_local_state (peer )) {
476- do {
477- if (MCA_BTL_FLAGS_ATOMIC_OPS & module -> selected_btl -> btl_flags ) {
478- ret = module -> selected_btl -> btl_atomic_op (module -> selected_btl , peer -> state_endpoint , target , peer -> state_handle ,
479- MCA_BTL_ATOMIC_ADD , 1 , 0 , MCA_BTL_NO_ORDER ,
480- ompi_osc_rdma_atomic_complete , NULL , NULL );
481- } else {
482- /* don't care about the read value so use the scratch lock */
483- ret = module -> selected_btl -> btl_atomic_fop (module -> selected_btl , peer -> state_endpoint , scratch_lock ,
484- target , frag -> handle , peer -> state_handle , MCA_BTL_ATOMIC_ADD , 1 ,
485- 0 , MCA_BTL_NO_ORDER , ompi_osc_rdma_atomic_complete , NULL , NULL );
486- }
487-
488- if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
489- break ;
490- }
491- } while (1 );
461+ ret = ompi_osc_rdma_lock_btl_op (module , peer , target , MCA_BTL_ATOMIC_ADD , 1 , true);
462+ assert (OMPI_SUCCESS == ret );
492463 } else {
493464 (void ) ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) target , 1 );
494465 }
495466 }
496467
497- if (frag ) {
498- ompi_osc_rdma_frag_complete (frag );
499- }
500-
501468 /* release our reference to peers in this group */
502469 ompi_osc_rdma_release_peers (peers , group_size );
503470
0 commit comments