@@ -331,6 +331,13 @@ static inline int ompi_osc_rdma_gacc_amo (ompi_osc_rdma_module_t *module, ompi_o
331
331
return OMPI_SUCCESS ;
332
332
}
333
333
334
+ static inline __opal_attribute_always_inline__ bool ompi_osc_rdma_is_atomic_size_supported (uint64_t remote_addr ,
335
+ size_t size )
336
+ {
337
+ return ((sizeof (uint32_t ) == size && !(remote_addr & 0x3 )) ||
338
+ (sizeof (uint64_t ) == size && !(remote_addr & 0x7 )));
339
+ }
340
+
334
341
static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t * sync , const void * source , int source_count ,
335
342
ompi_datatype_t * source_datatype , void * result , int result_count ,
336
343
ompi_datatype_t * result_datatype , opal_convertor_t * result_convertor ,
@@ -339,19 +346,21 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
339
346
ompi_datatype_t * target_datatype , ompi_op_t * op , ompi_osc_rdma_request_t * request )
340
347
{
341
348
ompi_osc_rdma_module_t * module = sync -> module ;
342
- unsigned long len = target_count * target_datatype -> super .size ;
349
+ size_t target_dtype_size = target_datatype -> super .size ;
350
+ unsigned long len = target_count * target_dtype_size ;
343
351
char * ptr = NULL ;
344
352
int ret ;
345
353
346
- request -> len = target_datatype -> super . size * module -> network_amo_max_count ;
354
+ request -> len = target_dtype_size * module -> network_amo_max_count ;
347
355
348
356
OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "initiating accumulate on contiguous region of %lu bytes to remote address %" PRIx64
349
357
", sync %p" , len , target_address , (void * ) sync );
350
358
351
359
/* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
352
360
* the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
353
361
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
354
- if ((target_datatype -> super .size <= 8 ) && (((unsigned long ) target_count ) <= module -> network_amo_max_count )) {
362
+ if ((target_dtype_size <= 8 ) && (((unsigned long ) target_count ) <= module -> network_amo_max_count ) &&
363
+ ompi_osc_rdma_is_atomic_size_supported (target_address , target_dtype_size )) {
355
364
ret = ompi_osc_rdma_gacc_amo (module , sync , source , result , result_count , result_datatype , result_convertor ,
356
365
peer , target_address , target_handle , target_count , target_datatype , op , request );
357
366
if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
0 commit comments