109
109
110
110
111
111
"""
112
- gpu_call(f , A::GPUArray, args::Tuple, configuration = length(A))
112
+ gpu_call(kernel::Function , A::GPUArray, args::Tuple, configuration = length(A))
113
113
114
- Calls function `f ` on the GPU.
114
+ Calls function `kernel ` on the GPU.
115
115
`A` must be an GPUArray and will help to dispatch to the correct GPU backend
116
116
and supplies queues and contexts.
117
117
Calls the kernel function with `kernel(state, args...)`, where state is dependant on the backend
@@ -123,7 +123,7 @@ Optionally, a launch configuration can be supplied in the following way:
123
123
2) Pass a tuple of integer tuples to define blocks and threads per blocks!
124
124
125
125
"""
126
- function gpu_call (f , A:: GPUArray , args:: Tuple , configuration = length (A))
126
+ function gpu_call (kernel , A:: GPUArray , args:: Tuple , configuration = length (A))
127
127
ITuple = NTuple{N, Integer} where N
128
128
# If is a single integer, we assume it to be the global size / total number of threads one wants to launch
129
129
thread_blocks = if isa (configuration, Integer)
@@ -147,7 +147,7 @@ function gpu_call(f, A::GPUArray, args::Tuple, configuration = length(A))
147
147
`linear_index` will be inbetween 1:prod((blocks..., threads...))
148
148
""" )
149
149
end
150
- _gpu_call (f , A, args, thread_blocks)
150
+ _gpu_call (kernel , A, args, thread_blocks)
151
151
end
152
152
153
153
# Internal GPU call function, that needs to be overloaded by the backends.
0 commit comments