Skip to content

Commit f0ab7e1

Browse files
author
Damian Rouson
committed
Updated cuda_mpi library to support writing a Fortran accelerated_allocate wrapper for register_nc.
Signed-off-by: Damian Rouson <[email protected]>
1 parent c9f1a3c commit f0ab7e1

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

src/cuda_mpi/.mpi_caf.c.swp

-16 KB
Binary file not shown.

src/cuda_mpi/mpi_caf.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -364,16 +364,17 @@ PREFIX (num_images)(int distance __attribute__ ((unused)),
364364
return caf_num_images;
365365
}
366366

367-
void *
368-
PREFIX(registernc) (size_t size)
367+
void
368+
PREFIX(registernc) (size_t size,void* mem)
369369
{
370-
void *mem;
371370
int cuda_ierr = 0;
372371

373-
cuda_ierr = cudaMallocManaged(&mem,size,cudaMemAttachGlobal);
372+
cuda_ierr = cudaHostRegister(mem,size,cudaHostRegisterMapped);
374373
cudaDeviceSynchronize();
375374

376-
return mem;
375+
if (ierr != 0) call caf_runtime_error ("CUDA allocation failed with code %d", ierr);
376+
377+
return;
377378

378379
}
379380

0 commit comments

Comments
 (0)