@@ -5,23 +5,13 @@ module accelerated_module
5
5
private
6
6
public :: co_dot_accelerated
7
7
public :: co_dot_unaccelerated
8
- public :: co_dot_manually_accelerated
9
8
public :: co_dot_mapped_manually_accelerated
10
9
public :: CUDA,OpenACC,OpenMP
11
10
public :: walltime
12
11
13
12
! Explicit interfaces for procedures that wrap accelerated kernels
14
13
interface
15
14
16
- ! This is the wrapper a programmer would have to write today to manually accelerate calculations
17
- subroutine manual_cudaDot (a ,b ,partial_dot ,n ,img ) bind(C, name= " manual_cudaDot" )
18
- use iso_c_binding, only : c_float,c_int
19
- real (c_float) :: a(* ),b(* )
20
- real (c_float) :: partial_dot
21
- integer (c_int),value :: n
22
- integer (c_int),value :: img
23
- end subroutine
24
-
25
15
subroutine manual_mapped_cudaDot (a ,b ,partial_dot ,n ,img ) bind(C, name= " manual_mapped_cudaDot" )
26
16
use iso_c_binding, only : c_float,c_int
27
17
real (c_float) :: a(* ),b(* )
@@ -59,14 +49,6 @@ subroutine co_dot_unaccelerated(x,y,x_dot_y)
59
49
call co_sum(x_dot_y) ! Call Fortarn 2015 collective sum
60
50
end subroutine
61
51
62
- ! This parallel collective dot product uses manual acceleration
63
- subroutine co_dot_manually_accelerated (x ,y ,x_dot_y )
64
- real (c_float), intent (in ) :: x(:),y(:)
65
- real (c_float), intent (out ) :: x_dot_y
66
- call manual_cudaDot(x,y,x_dot_y,size (x),this_image()- 1 )
67
- call co_sum(x_dot_y) ! Call Fortarn 2015 collective sum
68
- end subroutine
69
-
70
52
subroutine co_dot_mapped_manually_accelerated (x ,y ,x_dot_y )
71
53
real (c_float), intent (in ) :: x(:),y(:)
72
54
real (c_float), intent (out ) :: x_dot_y
@@ -106,7 +88,7 @@ program cu_dot_test
106
88
real (c_float) :: dot
107
89
real (c_double) :: t_start, t_end
108
90
109
- ! Compiler/library -accelerated variables
91
+ ! Library -accelerated variables
110
92
real (c_float), allocatable :: a_acc(:)[:], b_acc(:)[:]
111
93
real (c_float) :: dot_acc[* ]
112
94
@@ -127,7 +109,7 @@ program cu_dot_test
127
109
sync all
128
110
129
111
block
130
- ! use accelerated_module, only : co_dot_accelerated,co_dot_unaccelerated,co_dot_manually_accelerated, CUDA,walltime,co_dot_mapped_manually_accelerated
112
+ ! use accelerated_module, only : co_dot_accelerated,co_dot_unaccelerated,CUDA,walltime,co_dot_mapped_manually_accelerated
131
113
use accelerated_module
132
114
133
115
! Parallel execution
@@ -138,13 +120,6 @@ program cu_dot_test
138
120
139
121
sync all
140
122
141
- t_start = walltime()
142
- call co_dot_manually_accelerated(a_man,b_man,dot_man)
143
- t_end = walltime()
144
- if (me== 1 ) print * , ' Manually accelerated dot_prod' ,dot_man,' time:' ,t_end- t_start
145
-
146
- sync all
147
-
148
123
! Serial execution
149
124
t_start = walltime()
150
125
call co_dot_unaccelerated(a_man,b_man,dot)
0 commit comments