Skip to content

Commit c9f1a3c

Browse files
author
Damian Rouson
committed
First draft of refacotored co_dot test.
Signed-off-by: Damian Rouson <[email protected]>
1 parent c66da7c commit c9f1a3c

File tree

1 file changed

+21
-14
lines changed

1 file changed

+21
-14
lines changed

src/tests/integration/gpu/co_dot.f90

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,16 @@ subroutine co_dot_unaccelerated(x,y,x_dot_y)
4747
subroutine co_dot_accelerated(x,y,x_dot_y,API)
4848
real, intent(in) :: x(:),y(:)
4949
real, intent(out) :: x_dot_y
50-
integer(c_int), intent(in) :: API
51-
select case(API)
50+
integer(c_int), intent(in), optional :: API
51+
integer(c_int) :: chosen_API
52+
53+
if (present(API))
54+
chosen_API = API
55+
else
56+
chosen_API = CUDA
57+
end if
58+
59+
select case(chosen_API)
5260
case(CUDA)
5361
call cudaDot(x,y,x_dot_y,size(x)) ! Accelerated reduction on local data
5462
case(OpenMP)
@@ -68,18 +76,14 @@ program cu_dot_test
6876
implicit none
6977

7078
! Unaccelerated variables
71-
real(c_float), allocatable :: a(:),b(:)
79+
real(c_float), allocatable :: a_unacc(:),b_unacc(:)
7280
real(c_float) :: dot
7381
real(c_double) :: t_start, t_end
7482

7583
! Library-accelerated variables
7684
real(c_float), allocatable :: a_acc(:)[:], b_acc(:)[:]
7785
real(c_float) :: dot_acc[*]
7886

79-
! Manually accelerated variables
80-
real(c_float), allocatable :: a_man(:)[:], b_man(:)[:]
81-
real(c_float) :: dot_man[*]
82-
8387
integer(c_int),parameter :: n = 99900000
8488
integer(c_int) :: n_local,np,me
8589

@@ -98,15 +102,15 @@ program cu_dot_test
98102

99103
!Parallel execution
100104
t_start = walltime()
101-
call co_dot_accelerated(a_acc,b_acc,dot_acc,CUDA)
105+
call co_dot_accelerated(a_acc(1:n_local),b_acc(1:n_local),dot_acc,CUDA)
102106
t_end = walltime()
103107
if(me==1) print *, 'Accelerated dot_prod',dot_acc,'time:',t_end-t_start
104108

105109
sync all
106110

107111
!Serial execution
108112
t_start = walltime()
109-
call co_dot_unaccelerated(a_man,b_man,dot)
113+
call co_dot_unaccelerated(a_unacc(1:n_local),b_unacc(1:n_local),dot)
110114
t_end = walltime()
111115
if(me==1) print *, 'Serial result',dot,'time:',t_end-t_start
112116

@@ -118,8 +122,10 @@ program cu_dot_test
118122

119123
subroutine initialize_all_variables()
120124
integer(c_int) :: i
121-
call accelerated_allocate(a_acc(n_local)[*],b_acc(n_local)[*])
122-
call accelerated_allocate(a_man(n_local)[*],b_man(n_local)[*])
125+
! The allocation arguments must be coarrays to support the scatter operation below
126+
call accelerated_allocate(a_acc,n_local)
127+
call accelerated_allocate(b_acc,n_local)
128+
allocate(a_unacc(n_local)[*],b_unacc(n_local)[*])
123129

124130
if(me == 1) then
125131
! Initialize the local unaccelerated data on every image
@@ -129,10 +135,11 @@ subroutine initialize_all_variables()
129135
! Scatter a and b to a_cc and b_cc
130136
do i=1,np
131137
a_acc(1:n_local)[i] = a(n_local*(i-1)+1:n_local*i)
132-
a_man(1:n_local)[i] = a(n_local*(i-1)+1:n_local*i)
133138
b_acc(1:n_local)[i] = b(n_local*(i-1)+1:n_local*i)
134-
b_man(1:n_local)[i] = b(n_local*(i-1)+1:n_local*i)
135-
enddo
139+
end do
140+
sync all
141+
a_unacc=a_acc
142+
b_unacc=b_acc
136143
endif
137144
end subroutine
138145

0 commit comments

Comments
 (0)