Skip to content

Commit 64c0421

Browse files
author
Damian Rouson
committed
Adding cuada_mpi and co_dot integration test.
1 parent f0ab7e1 commit 64c0421

File tree

3 files changed

+41
-8
lines changed

3 files changed

+41
-8
lines changed

src/cuda_mpi/mpi_caf.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -364,20 +364,33 @@ PREFIX (num_images)(int distance __attribute__ ((unused)),
364364
return caf_num_images;
365365
}
366366

367+
/* Register user-allocated memory for management by CUDA (works for coarrays and non-coarrays):
368+
This procedure is exposed in the OpenCoarrays Fortran application programming interface (API)
369+
contained in ../extensions/opencoarrays.f90 to expose it to Fortran programmers who want
370+
to explicitly allocate memory related to manycore devices: currently via CUDA for NVIDIA
371+
GPUs and possibly in the future via another mechanism for Intel MIC architecture processors.
372+
*/
373+
367374
void
368-
PREFIX(registernc) (size_t size,void* mem)
375+
PREFIX(registernc) (void* mem,size_t mem_size)
369376
{
370377
int cuda_ierr = 0;
371378

372-
cuda_ierr = cudaHostRegister(mem,size,cudaHostRegisterMapped);
379+
cuda_ierr = cudaHostRegister(mem,mem_size,cudaHostRegisterMapped);
373380
cudaDeviceSynchronize();
374381

375-
if (ierr != 0) call caf_runtime_error ("CUDA allocation failed with code %d", ierr);
382+
if (ierr != 0)
383+
call caf_runtime_error ("CUDA allocation failed with code %d", ierr);
376384

377385
return;
378386

379387
}
380388

389+
/* Allocate coarray variables:
390+
This procedure is part of the OpenCoarrays Fortran application binary interface (ABI)
391+
that is used by the compiler only and is therefore not exposed to Fortran programs directly.
392+
*/
393+
381394
void *
382395
PREFIX (register) (size_t size, caf_register_t type, caf_token_t *token,
383396
int *stat, char *errmsg, int errmsg_len, int cuda)

src/extensions/opencoarrays.F90

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ module opencoarrays
5252
public :: sync_all
5353
public :: caf_init
5454
public :: caf_finalize
55+
public :: accelerate
5556
#ifdef COMPILER_SUPPORTS_ATOMICS
5657
public :: event_type
5758
public :: event_post
@@ -216,6 +217,7 @@ pure function logical_operator(lhs,rhs) result(lhs_op_rhs)
216217

217218
! Bindings for OpenCoarrays C procedures
218219
interface
220+
219221
! C function signature from ../mpi/mpi_caf.c
220222
! void _gfortran_caf_init (int *argc, char ***argv);
221223

@@ -233,6 +235,15 @@ subroutine caf_finalize(argc, argv) bind(C,name="_gfortran_caf_finalize")
233235
type(c_ptr), value :: argv
234236
end subroutine
235237

238+
! C function signature from ../cuda_mpi/mpi_caf.c:
239+
! void
240+
! PREFIX(registernc) (void* mem,size_t mem_size)
241+
242+
subroutine opencoarrays_registernc(mem,mem_size) bind(C,name="_gfortran_caf_registernc")
243+
type(c_ptr), intent(in), value :: mem
244+
integer(c_ptrdiff_t), intent(in) :: mem_size
245+
end subroutine
246+
236247
! C function signature from ../mpi/mpi_caf.c:
237248
! void
238249
! PREFIX (co_min) (gfc_descriptor_t *a, int result_image, int *stat, char *errmsg,
@@ -558,6 +569,11 @@ function c_sizeof(mold) result(c_size_of_mold)
558569
! ______ Assumed-rank co_reduce wrappers for each supported type and kind _________
559570
! _________________________________________________________________________________
560571

572+
subroutine accelerate(a)
573+
real(c_double), intent(in), contiguous :: a(..)
574+
call opencoarrays_registernc(c_loc(a),size(a)*c_sizeof(a))
575+
end subroutine
576+
561577
subroutine co_reduce_c_int(a, opr, result_image, stat, errmsg)
562578
! Dummy variables
563579
#ifdef COMPILER_LACKS_ASSUMED_RANK

src/tests/integration/gpu/co_dot.f90

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ module accelerated_module
1111
! Explicit interfaces for procedures that wrap accelerated kernels
1212
interface
1313

14-
! This wrapper exploits the OpenCoarrays acceleration support and is therefore simpler
14+
! This wrapper exploits the OpenCoarrays acceleration support
1515
subroutine cudaDot(a,b,partial_dot,n) bind(C, name="cudaDot")
1616
use iso_c_binding, only : c_float,c_int
1717
real(c_float) :: a(*),b(*)
@@ -80,7 +80,7 @@ program cu_dot_test
8080
real(c_float) :: dot
8181
real(c_double) :: t_start, t_end
8282

83-
! Library-accelerated variables
83+
! Library-accelerated variables (these are corarrays to facilitate a scatter operation)
8484
real(c_float), allocatable :: a_acc(:)[:], b_acc(:)[:]
8585
real(c_float) :: dot_acc[*]
8686

@@ -122,10 +122,14 @@ program cu_dot_test
122122

123123
subroutine initialize_all_variables()
124124
integer(c_int) :: i
125-
! The allocation arguments must be coarrays to support the scatter operation below
126-
call accelerated_allocate(a_acc,n_local)
127-
call accelerated_allocate(b_acc,n_local)
125+
! These allocation arguments must be coarrays to support the scatter operation below
128126
allocate(a_unacc(n_local)[*],b_unacc(n_local)[*])
127+
! These allocation arguments will be defined locally and therefore need not be coarrays
128+
allocate(a_acc(n_local),b_acc(n_local))
129+
130+
! Register the desired variables for acceleration
131+
call accelerate(a_acc)
132+
call accelerate(b_acc)
129133

130134
if(me == 1) then
131135
! Initialize the local unaccelerated data on every image

0 commit comments

Comments
 (0)