enable gpu

foldfelis · foldfelis · commit e56f5d63071d · 2021-08-12T01:32:26.000+08:00
diff --git a/Project.toml b/Project.toml
@@ -4,10 +4,13 @@ authors = ["JingYu Ning <foldfelis@gmail.com> and contributors"]
 version = "0.1.0"
 
 [deps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 Fetch = "bb354801-46f6-40b6-9c3d-d42d7a74c775"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
 Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/example/a.jl b/example/a.jl
@@ -0,0 +1,38 @@
+using Zygote
+using Flux
+using CUDA
+using FFTW
+using Tullio
+
+if has_cuda()
+    @info "CUDA is on"
+    device = gpu
+    CUDA.allowscalar(true)
+else
+    device = cpu
+end
+
+function t(𝐱)
+    @tullio 𝐱ᵀ[a, b, c] := 𝐱[b, a, c]
+
+    return 𝐱ᵀ
+end
+
+m = Chain(
+    Dense(2, 5),
+    t,
+    x->Zygote.hook(real, x),
+    x->real(fft(x, 1)),
+    t,
+    Dense(5, 5),
+    t,
+    x->Zygote.hook(real, x),
+    x->real(ifft(x, 1)),
+    t,
+    x->sum(x)
+) |> device
+
+loss(x, y) = Flux.mse(m(x), y)
+
+data = [(rand(Float32, 2, 100, 10), rand(Float32, 10))] |> device
+Flux.train!(loss, params(m), data, Flux.ADAM())
diff --git a/example/burgers.jl b/example/burgers.jl
@@ -1,14 +1,14 @@
 using NeuralOperators
 using Flux
-# using CUDA
+using CUDA
 
-# if has_cuda()
-#     @info "CUDA is on"
-#     device = gpu
-#     CUDA.allowscalar(false)
-# else
+if has_cuda()
+    @info "CUDA is on"
+    device = gpu
+    CUDA.allowscalar(false)
+else
     device = cpu
-# end
+end
 
 m = FourierNeuralOperator() |> device
 loss(𝐱, 𝐲) = sum(abs2, 𝐲 .- m(𝐱)) / size(𝐱)[end]
@@ -33,4 +33,4 @@ end
 
 data = [(𝐱, 𝐲) for (𝐱, 𝐲) in loader_train] |> device
 opt = Flux.Optimiser(WeightDecay(1f-4), Flux.ADAM(1f-3))
-Flux.@epochs 500 @time(Flux.train!(loss, params(m), data, opt, cb=Flux.throttle(loss_test, 10)))
+Flux.@epochs 500 @time(Flux.train!(loss, params(m), data, opt))
diff --git a/src/fourier.jl b/src/fourier.jl
@@ -1,3 +1,5 @@
+using CUDA, CUDAKernels, KernelAbstractions
+
 export
     SpectralConv1d,
     FourierOperator,
@@ -15,6 +17,9 @@ function c_glorot_uniform(dims...)
     return Flux.glorot_uniform(dims...) + Flux.glorot_uniform(dims...) * im
 end
 
+t(𝐱) = @tullio 𝐱ᵀ[a, b, c] := 𝐱[b, a, c]
+ein_mul(𝐱₁, 𝐱₂) = @tullio 𝐲[m, o, b] := 𝐱₁[m, i, b] * 𝐱₂[o, i, m]
+
 function SpectralConv1d(
     ch::Pair{<:Integer, <:Integer},
     modes::Integer,
@@ -27,25 +32,27 @@ function SpectralConv1d(
     weights = scale * init(out_chs, in_chs, modes)
 
     return Chain(
+        t,
         x -> Zygote.hook(real, x),
-        SpectralConv1d(weights, in_chs, out_chs, modes, σ)
+        SpectralConv1d(weights, in_chs, out_chs, modes, σ),
+        t
     )
 end
 
 Flux.@functor SpectralConv1d
 
 function (m::SpectralConv1d)(𝐱::AbstractArray)
-    𝐱_fft = fft(𝐱, 2) # [in_chs, x, batch]
-    𝐱_selected = 𝐱_fft[:, 1:m.modes, :] # [in_chs, modes, batch]
+    𝐱_fft = fft(𝐱, 1) # [x, in_chs, batch]
+    𝐱_selected = 𝐱_fft[1:m.modes, :, :] # [modes, in_chs, batch]
 
-    # [out_chs, modes, batch] <- [in_chs, modes, batch] [out_chs, in_chs, modes]
-    @tullio 𝐱_weighted[o, m, b] := 𝐱_selected[i, m, b] * m.weight[o, i, m]
+    # [modes, out_chs, batch] <- [modes, in_chs, batch] [out_chs, in_chs, modes]
+    𝐱_weighted = ein_mul(𝐱_selected, m.weight)
 
-    s = size(𝐱_weighted)
-    d = size(𝐱, 2) - m.modes
-    𝐱_padded = cat(𝐱_weighted, zeros(ComplexF32, s[1], d, s[3:end]...), dims=2)
+    s = size(𝐱_weighted)[2:end]
+    d = size(𝐱, 1) - m.modes
+    𝐱_padded = cat(𝐱_weighted, zeros(ComplexF32, d, s...), dims=1)
 
-    𝐱_out = ifft(𝐱_padded, 2)
+    𝐱_out = ifft(𝐱_padded, 1) # [x, out_chs, batch]
 
     return m.σ.(real(𝐱_out))
 end