1
1
function LinearOperatorCollection. grad! (res:: vecT , img:: vecT , shape:: NTuple{N,Int64} , di:: CartesianIndex{N} ) where {vecT <: AbstractGPUVector , N}
2
2
res = reshape (res, shape .- Tuple (di))
3
+ backend = get_backend (res)
4
+
5
+ @kernel cpu = false inbounds = true function grad_kernel! (res, img, di)
6
+ idx = @index (Global, Cartesian)
7
+ res[idx] = img[idx] - img[idx + di]
8
+ end
3
9
4
10
if length (res) > 0
5
- gpu_call (grad_kernel!, res, reshape (img,shape), di)
11
+ kernel = grad_kernel! (backend)
12
+ kernel (res, reshape (img, shape), di, ndrange = size (res))
6
13
end
7
14
8
15
return res
9
16
end
10
17
11
- function grad_kernel! (ctx, res, img, di)
12
- idx = @cartesianidx (res)
13
- @inbounds res[idx] = img[idx] - img[idx + di]
14
- return nothing
15
- end
16
18
17
19
# adjoint of directional gradients
18
20
function LinearOperatorCollection. grad_t! (res:: vecT , g:: vecT , shape:: NTuple{N,Int64} , di:: CartesianIndex{N} ) where {T, vecT <: AbstractGPUVector{T} , N}
19
21
res_ = reshape (res,shape)
20
22
g_ = reshape (g, shape .- Tuple (di))
23
+ backend = get_backend (res)
21
24
22
25
fill! (res, zero (T))
23
26
if length (g_) > 0
24
- gpu_call (grad_t_kernel_1!, res_, g_, di, elements = length (g))
25
- gpu_call (grad_t_kernel_2!, res_, g_, di, elements = length (g))
27
+ kernel1 = grad_t_kernel_1! (backend)
28
+ kernel2 = grad_t_kernel_2! (backend)
29
+ kernel1 (res_, g_, di, ndrange = size (g_))
30
+ kernel2 (res_, g_, di, ndrange = size (g_))
26
31
end
32
+
33
+ return res
27
34
end
28
35
29
- function grad_t_kernel_1! (ctx, res, g, di)
30
- idx = @cartesianidx (g)
31
- @inbounds res[idx] += g[idx]
32
- return nothing
36
+ @kernel cpu = false inbounds = true function grad_t_kernel_1! (res, g, di)
37
+ idx = @index (Global, Cartesian)
38
+ res[idx] += g[idx]
33
39
end
34
40
35
- function grad_t_kernel_2! (ctx, res, g, di)
36
- idx = @cartesianidx (g)
37
- @inbounds res[idx + di] -= g[idx]
38
- return nothing
41
+ @kernel cpu = false inbounds = true function grad_t_kernel_2! (res, g, di)
42
+ idx = @index (Global, Cartesian)
43
+ res[idx + di] -= g[idx]
39
44
end
40
45
46
+
41
47
function LinearOperatorCollection. grad_t! (res:: vecT , g:: vecT , shape:: NTuple{N,Int64} , dirs, dims, dim_ends, tmp) where {T, vecT <: AbstractGPUVector{T} , N}
42
48
dim_start = 1
43
49
res = reshape (res, shape)
50
+ backend = get_backend (res)
44
51
45
52
fill! (res, zero (eltype (res)))
53
+ kernel1 = grad_t_kernel_1! (backend)
54
+ kernel2 = grad_t_kernel_2! (backend)
46
55
for (i, di) in enumerate (dirs)
47
56
g_ = reshape (view (g, dim_start: dim_ends[i]), shape .- Tuple (di))
48
57
if length (g_) > 0
49
- gpu_call (grad_t_kernel_1!, res, g_, di, elements = length (g ))
50
- gpu_call (grad_t_kernel_2!, res, g_, di, elements = length (g ))
58
+ kernel1 ( res, g_, di, ndrange = size (g_ ))
59
+ kernel2 ( res, g_, di, ndrange = size (g_ ))
51
60
end
52
61
dim_start = dim_ends[i] + 1
53
62
end
63
+ return res
54
64
end
0 commit comments