Merge pull request #14 from JuliaImageRecon/nh/gpuFix

nHackel · web-flow · commit e54bba27d7d7 · 2025-01-16T15:17:30.000+01:00
Move GPU extension kernels to KernelAbstractions.jl
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "LinearOperatorCollection"
 uuid = "a4a2c56f-fead-462a-a3ab-85921a5f2575"
 authors = ["Tobias Knopp <tobias.knopp@tuhh.de> and contributors"]
-version = "2.0.7"
+version = "2.0.8"
 
 [deps]
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
@@ -21,17 +21,19 @@ RadonKA = "86de8297-835b-47df-b249-c04e8db91db5"
 
 [compat]
 julia = "1.9"
-GPUArrays = "8, 9, 10"
-JLArrays = "0.1"
+GPUArrays = "11"
+KernelAbstractions = "0.9"
+JLArrays = "0.2"
 NFFT = "0.13"
-LinearOperators = "2.3.3"
+LinearOperators = "2"
 RadonKA = "0.6"
 Wavelets = "0.9, 0.10"
 Reexport = "1.0"
 FFTW = "1.0"
 
 [weakdeps]
 GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 NFFT = "efe261a4-0d2b-5849-be55-fc731d526b0d"
 Wavelets = "29a6e085-ba6d-5f35-a997-948ac2efa89a"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
diff --git a/ext/LinearOperatorGPUArraysExt/GradientOp.jl b/ext/LinearOperatorGPUArraysExt/GradientOp.jl
@@ -1,54 +1,64 @@
 function LinearOperatorCollection.grad!(res::vecT, img::vecT, shape::NTuple{N,Int64}, di::CartesianIndex{N}) where {vecT <: AbstractGPUVector, N}
   res = reshape(res, shape .- Tuple(di))
+  backend = get_backend(res)
+
+  @kernel cpu = false inbounds = true function grad_kernel!(res, img, di)
+    idx = @index(Global, Cartesian)
+    res[idx] = img[idx] - img[idx + di]
+  end
 
   if length(res) > 0
-    gpu_call(grad_kernel!, res, reshape(img,shape), di)
+    kernel = grad_kernel!(backend)
+    kernel(res, reshape(img, shape), di, ndrange = size(res))
   end
 
   return res
 end
 
-function grad_kernel!(ctx, res, img, di)
-  idx = @cartesianidx(res)
-  @inbounds res[idx] = img[idx] - img[idx + di]
-  return nothing  
-end
 
 # adjoint of directional gradients
 function LinearOperatorCollection.grad_t!(res::vecT, g::vecT, shape::NTuple{N,Int64}, di::CartesianIndex{N}) where {T, vecT <: AbstractGPUVector{T}, N}
   res_ = reshape(res,shape)
   g_ = reshape(g, shape .- Tuple(di))
+  backend = get_backend(res)
 
   fill!(res, zero(T))
   if length(g_) > 0
-    gpu_call(grad_t_kernel_1!, res_, g_, di, elements = length(g))
-    gpu_call(grad_t_kernel_2!, res_, g_, di, elements = length(g))
+    kernel1 = grad_t_kernel_1!(backend)
+    kernel2 = grad_t_kernel_2!(backend)
+    kernel1(res_, g_, di, ndrange = size(g_))
+    kernel2(res_, g_, di, ndrange = size(g_))
   end
+  
+  return res
 end
 
-function grad_t_kernel_1!(ctx, res, g, di)
-  idx = @cartesianidx(g)
-  @inbounds res[idx] += g[idx]
-  return nothing  
+@kernel cpu = false inbounds = true function grad_t_kernel_1!(res, g, di)
+  idx = @index(Global, Cartesian)
+  res[idx] += g[idx]  
 end
 
-function grad_t_kernel_2!(ctx, res, g, di)
-  idx = @cartesianidx(g)
-  @inbounds res[idx + di] -= g[idx]
-  return nothing  
+@kernel cpu = false inbounds = true function grad_t_kernel_2!(res, g, di)
+  idx = @index(Global, Cartesian)
+  res[idx + di] -= g[idx]  
 end
 
+
 function LinearOperatorCollection.grad_t!(res::vecT, g::vecT, shape::NTuple{N,Int64}, dirs, dims, dim_ends, tmp) where {T, vecT <: AbstractGPUVector{T}, N}
   dim_start = 1
   res = reshape(res, shape)
+  backend = get_backend(res)
 
   fill!(res, zero(eltype(res)))
+  kernel1 = grad_t_kernel_1!(backend)
+  kernel2 = grad_t_kernel_2!(backend)
   for (i, di) in enumerate(dirs)
     g_ = reshape(view(g, dim_start:dim_ends[i]), shape .- Tuple(di))
     if length(g_) > 0
-      gpu_call(grad_t_kernel_1!, res, g_, di, elements = length(g))
-      gpu_call(grad_t_kernel_2!, res, g_, di, elements = length(g))
+      kernel1(res, g_, di, ndrange = size(g_))
+      kernel2(res, g_, di, ndrange = size(g_))
     end  
     dim_start = dim_ends[i] + 1
   end
+  return res
 end
diff --git a/ext/LinearOperatorGPUArraysExt/LinearOperatorGPUArraysExt.jl b/ext/LinearOperatorGPUArraysExt/LinearOperatorGPUArraysExt.jl
@@ -1,6 +1,6 @@
 module LinearOperatorGPUArraysExt
 
-using LinearOperatorCollection, GPUArrays
+using LinearOperatorCollection, GPUArrays, GPUArrays.KernelAbstractions # Hacky but with [KernelAbstractions, GPUArrays] the extension didnt trigger
 
 include("GradientOp.jl")