@@ -48,18 +48,47 @@ typedef struct ompi_osc_rdma_pending_post_t ompi_osc_rdma_pending_post_t;
48
48
49
49
static OBJ_CLASS_INSTANCE (ompi_osc_rdma_pending_post_t , opal_list_item_t , NULL, NULL) ;
50
50
51
+ static void ompi_osc_rdma_pending_op_construct (ompi_osc_rdma_pending_op_t * pending_op )
52
+ {
53
+ pending_op -> op_frag = NULL ;
54
+ pending_op -> op_buffer = NULL ;
55
+ pending_op -> op_result = NULL ;
56
+ pending_op -> op_complete = false;
57
+ }
58
+
59
+ static void ompi_osc_rdma_pending_op_destruct (ompi_osc_rdma_pending_op_t * pending_op )
60
+ {
61
+ if (NULL != pending_op -> op_frag ) {
62
+ ompi_osc_rdma_frag_complete (pending_op -> op_frag );
63
+ }
64
+
65
+ ompi_osc_rdma_pending_op_construct (pending_op );
66
+ }
67
+
68
+ OBJ_CLASS_INSTANCE (ompi_osc_rdma_pending_op_t , opal_list_item_t ,
69
+ ompi_osc_rdma_pending_op_construct ,
70
+ ompi_osc_rdma_pending_op_destruct );
71
+
51
72
/**
52
73
* Dummy completion function for atomic operations
53
74
*/
54
75
void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t * btl , struct mca_btl_base_endpoint_t * endpoint ,
55
76
void * local_address , mca_btl_base_registration_handle_t * local_handle ,
56
77
void * context , void * data , int status )
57
78
{
58
- volatile bool * atomic_complete = (volatile bool * ) context ;
79
+ ompi_osc_rdma_pending_op_t * pending_op = (ompi_osc_rdma_pending_op_t * ) context ;
59
80
60
- if (atomic_complete ) {
61
- * atomic_complete = true ;
81
+ if (pending_op -> op_result ) {
82
+ memmove ( pending_op -> op_result , pending_op -> op_buffer , pending_op -> op_size ) ;
62
83
}
84
+
85
+ if (NULL != pending_op -> op_frag ) {
86
+ ompi_osc_rdma_frag_complete (pending_op -> op_frag );
87
+ pending_op -> op_frag = NULL ;
88
+ }
89
+
90
+ pending_op -> op_complete = true;
91
+ OBJ_RELEASE (pending_op );
63
92
}
64
93
65
94
/**
@@ -182,9 +211,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
182
211
ompi_osc_rdma_peer_t * * peers ;
183
212
int my_rank = ompi_comm_rank (module -> comm );
184
213
ompi_osc_rdma_state_t * state = module -> state ;
185
- volatile bool atomic_complete ;
186
- ompi_osc_rdma_frag_t * frag = NULL ;
187
- osc_rdma_counter_t * temp = NULL ;
188
214
int ret ;
189
215
190
216
OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "post: %p, %d, %s" , (void * ) group , assert , win -> w_name );
@@ -212,9 +238,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
212
238
state -> num_complete_msgs = 0 ;
213
239
OPAL_THREAD_UNLOCK (& module -> lock );
214
240
215
- /* allocate a temporary buffer for atomic response */
216
- ret = ompi_osc_rdma_frag_alloc (module , 8 , & frag , (char * * ) & temp );
217
-
218
241
if ((assert & MPI_MODE_NOCHECK ) || 0 == ompi_group_size (group )) {
219
242
return OMPI_SUCCESS ;
220
243
}
@@ -226,7 +249,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
226
249
/* translate group ranks into the communicator */
227
250
peers = ompi_osc_rdma_get_peers (module , module -> pw_group );
228
251
if (OPAL_UNLIKELY (NULL == peers )) {
229
- ompi_osc_rdma_frag_complete (frag );
230
252
return OMPI_ERR_OUT_OF_RESOURCE ;
231
253
}
232
254
@@ -236,65 +258,40 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
236
258
for (int i = 0 ; i < ompi_group_size (module -> pw_group ) ; ++ i ) {
237
259
ompi_osc_rdma_peer_t * peer = peers [i ];
238
260
uint64_t target = (uint64_t ) (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , post_index );
239
- int post_index ;
261
+ ompi_osc_rdma_lock_t post_index ;
240
262
241
263
if (peer -> rank == my_rank ) {
242
264
ompi_osc_rdma_handle_post (module , my_rank , NULL , 0 );
243
265
continue ;
244
266
}
245
267
246
268
/* get a post index */
247
- atomic_complete = false;
248
269
if (!ompi_osc_rdma_peer_local_state (peer )) {
249
- do {
250
- ret = module -> selected_btl -> btl_atomic_fop (module -> selected_btl , peer -> state_endpoint , temp , target , frag -> handle ,
251
- peer -> state_handle , MCA_BTL_ATOMIC_ADD , 1 , 0 , MCA_BTL_NO_ORDER ,
252
- ompi_osc_rdma_atomic_complete , (void * ) & atomic_complete , NULL );
253
- assert (OPAL_SUCCESS >= ret );
254
-
255
- if (OMPI_SUCCESS == ret ) {
256
- while (!atomic_complete ) {
257
- ompi_osc_rdma_progress (module );
258
- }
259
-
260
- break ;
261
- }
262
-
263
- ompi_osc_rdma_progress (module );
264
- } while (1 );
270
+ ret = ompi_osc_rdma_lock_btl_fop (module , peer , target , MCA_BTL_ATOMIC_ADD , 1 , & post_index , true);
271
+ assert (OMPI_SUCCESS == ret );
265
272
} else {
266
- * temp = ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) (intptr_t ) target , 1 ) - 1 ;
273
+ post_index = ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) (intptr_t ) target , 1 ) - 1 ;
267
274
}
268
- post_index = (* temp ) & (OMPI_OSC_RDMA_POST_PEER_MAX - 1 );
275
+
276
+ post_index &= OMPI_OSC_RDMA_POST_PEER_MAX - 1 ;
269
277
270
278
target = (uint64_t ) (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , post_peers ) +
271
279
sizeof (osc_rdma_counter_t ) * post_index ;
272
280
273
281
do {
282
+ ompi_osc_rdma_lock_t result ;
283
+
274
284
OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "attempting to post to index %d @ rank %d" , post_index , peer -> rank );
275
285
276
286
/* try to post. if the value isn't 0 then another rank is occupying this index */
277
287
if (!ompi_osc_rdma_peer_local_state (peer )) {
278
- atomic_complete = false;
279
- ret = module -> selected_btl -> btl_atomic_cswap (module -> selected_btl , peer -> state_endpoint , temp , target , frag -> handle , peer -> state_handle ,
280
- 0 , 1 + (int64_t ) my_rank , 0 , MCA_BTL_NO_ORDER , ompi_osc_rdma_atomic_complete ,
281
- (void * ) & atomic_complete , NULL );
282
- assert (OPAL_SUCCESS >= ret );
283
-
284
- if (OMPI_SUCCESS == ret ) {
285
- while (!atomic_complete ) {
286
- ompi_osc_rdma_progress (module );
287
- }
288
- } else {
289
- ompi_osc_rdma_progress (module );
290
- continue ;
291
- }
292
-
288
+ ret = ompi_osc_rdma_lock_btl_cswap (module , peer , target , 0 , 1 + (int64_t ) my_rank , & result );
289
+ assert (OMPI_SUCCESS == ret );
293
290
} else {
294
- * temp = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t * ) target , 0 , 1 + (osc_rdma_counter_t ) my_rank );
291
+ result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t * ) target , 0 , 1 + (osc_rdma_counter_t ) my_rank );
295
292
}
296
293
297
- if (OPAL_LIKELY (0 == * temp )) {
294
+ if (OPAL_LIKELY (0 == result )) {
298
295
break ;
299
296
}
300
297
@@ -313,8 +310,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
313
310
} while (1 );
314
311
}
315
312
316
- ompi_osc_rdma_frag_complete (frag );
317
-
318
313
ompi_osc_rdma_release_peers (peers , ompi_group_size (module -> pw_group ));
319
314
320
315
OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "post complete" );
@@ -422,9 +417,7 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
422
417
{
423
418
ompi_osc_rdma_module_t * module = GET_MODULE (win );
424
419
ompi_osc_rdma_sync_t * sync = & module -> all_sync ;
425
- ompi_osc_rdma_frag_t * frag = NULL ;
426
420
ompi_osc_rdma_peer_t * * peers ;
427
- void * scratch_lock = NULL ;
428
421
ompi_group_t * group ;
429
422
int group_size , ret ;
430
423
@@ -459,45 +452,19 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
459
452
460
453
ompi_osc_rdma_sync_rdma_complete (sync );
461
454
462
- if (!(MCA_BTL_FLAGS_ATOMIC_OPS & module -> selected_btl -> btl_flags )) {
463
- /* need a temporary buffer for performing fetching atomics */
464
- ret = ompi_osc_rdma_frag_alloc (module , 8 , & frag , (char * * ) & scratch_lock );
465
- if (OPAL_UNLIKELY (OPAL_SUCCESS != ret )) {
466
- return ret ;
467
- }
468
- }
469
-
470
455
/* for each process in the group increment their number of complete messages */
471
456
for (int i = 0 ; i < group_size ; ++ i ) {
472
457
ompi_osc_rdma_peer_t * peer = peers [i ];
473
458
intptr_t target = (intptr_t ) peer -> state + offsetof (ompi_osc_rdma_state_t , num_complete_msgs );
474
459
475
460
if (!ompi_osc_rdma_peer_local_state (peer )) {
476
- do {
477
- if (MCA_BTL_FLAGS_ATOMIC_OPS & module -> selected_btl -> btl_flags ) {
478
- ret = module -> selected_btl -> btl_atomic_op (module -> selected_btl , peer -> state_endpoint , target , peer -> state_handle ,
479
- MCA_BTL_ATOMIC_ADD , 1 , 0 , MCA_BTL_NO_ORDER ,
480
- ompi_osc_rdma_atomic_complete , NULL , NULL );
481
- } else {
482
- /* don't care about the read value so use the scratch lock */
483
- ret = module -> selected_btl -> btl_atomic_fop (module -> selected_btl , peer -> state_endpoint , scratch_lock ,
484
- target , frag -> handle , peer -> state_handle , MCA_BTL_ATOMIC_ADD , 1 ,
485
- 0 , MCA_BTL_NO_ORDER , ompi_osc_rdma_atomic_complete , NULL , NULL );
486
- }
487
-
488
- if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
489
- break ;
490
- }
491
- } while (1 );
461
+ ret = ompi_osc_rdma_lock_btl_op (module , peer , target , MCA_BTL_ATOMIC_ADD , 1 , true);
462
+ assert (OMPI_SUCCESS == ret );
492
463
} else {
493
464
(void ) ompi_osc_rdma_counter_add ((osc_rdma_counter_t * ) target , 1 );
494
465
}
495
466
}
496
467
497
- if (frag ) {
498
- ompi_osc_rdma_frag_complete (frag );
499
- }
500
-
501
468
/* release our reference to peers in this group */
502
469
ompi_osc_rdma_release_peers (peers , group_size );
503
470
0 commit comments