|
| 1 | +using CLArrays |
| 2 | +TY = Float32 |
| 3 | +N = 2^9 |
| 4 | +const h = TY(2*π/N) |
| 5 | +const epsn = TY(h * .5) |
| 6 | +const C = TY(2/epsn) |
| 7 | +const tau = TY(epsn * h) |
| 8 | +Tfinal = 50. |
| 9 | + |
| 10 | +S(x,y) = exp(-x^2/0.1f0)*exp(-y^2/0.1f0) |
| 11 | + |
| 12 | +ArrayType = CLArray |
| 13 | +# real-space and reciprocal-space grids |
| 14 | +# the real-space grid is just used for plotting! |
| 15 | +X_cpu = convert.(TY, collect(linspace(-pi+h, pi, N)) .* ones(1,N)) |
| 16 | +X = ArrayType(X_cpu); |
| 17 | +k = collect([0:N/2; -N/2+1:-1]); |
| 18 | +Â = ArrayType(convert.(TY,kron(k.^2, ones(1,N)) + kron(ones(N), k'.^2))); |
| 19 | + |
| 20 | +# initial condition |
| 21 | +uc = ArrayType(TY(2.0)*(rand(TY, N, N)-TY(0.5))) |
| 22 | + |
| 23 | +################################################################# |
| 24 | +################################################################# |
| 25 | + |
| 26 | +pow3(u) = complex((u * u * u) - u) |
| 27 | +function take_step!(u, Â, t_plot, fftplan!, ifftplan!, u3fft, uc, tmp) |
| 28 | + u3fft .= pow3.(u) |
| 29 | + fftplan! * u3fft |
| 30 | + uc .= complex.(u) |
| 31 | + fftplan! * uc |
| 32 | + @. tmp .= ((1f0+C*tau*Â) .* uc .- tau/epsn * (Â .* u3fft)) ./ (1f0+(epsn*tau)*Â.^2f0+C*tau*Â) |
| 33 | + ifftplan! * tmp |
| 34 | + u .= real.(tmp) |
| 35 | + nothing |
| 36 | +end |
| 37 | +function normalise_af!(u, out) |
| 38 | + out .= u .- minimum(u) |
| 39 | + out .= out ./ maximum(out) |
| 40 | + nothing |
| 41 | +end |
| 42 | +################################################################# |
| 43 | +################################################################# |
| 44 | + |
| 45 | +n = 1 |
| 46 | +T_plot = 0.01; t_plot = 0.0 |
| 47 | +ceil(Tfinal / tau) |
| 48 | +println(typeof(uc)) |
| 49 | +up = copy(uc) |
| 50 | +ucc = complex.(uc) |
| 51 | +fftplan! = plan_fft!(ucc) |
| 52 | +ifftplan! = plan_ifft!(ucc) |
| 53 | +u3fft = similar(ucc) |
| 54 | +tmp = similar(ucc) |
| 55 | + |
| 56 | +using GLVisualize |
| 57 | +w = glscreen(); @async renderloop(w) |
| 58 | +normalise_af!(uc,up) |
| 59 | +up .= up .* 0.1f0 |
| 60 | +robj = visualize(Array(up), :surface).children[] |
| 61 | +_view(robj) |
| 62 | +GLAbstraction.update!(robj[:position_z], Array(up)) |
| 63 | + |
| 64 | +@time for n = 1:10000 |
| 65 | + isopen(w) || break |
| 66 | + # @show n |
| 67 | + take_step!(uc, Â, t_plot, fftplan!, ifftplan!, u3fft, ucc, tmp) |
| 68 | + t_plot += tau |
| 69 | + if t_plot >= T_plot |
| 70 | + normalise_af!(uc, up) |
| 71 | + up .= up .* 0.1f0 |
| 72 | + GLAbstraction.update!(robj[:position_z], Array(up)) |
| 73 | + end |
| 74 | + yield() |
| 75 | +end |
| 76 | + |
| 77 | + |
| 78 | + |
| 79 | + |
| 80 | +using CLArrays, GLVisualize, GPUArrays, GLAbstraction, GeometryTypes |
| 81 | + |
| 82 | +# source: https://github.com/johnfgibson/julia-pde-benchmark/blob/master/1-Kuramoto-Sivashinksy-benchmark.ipynb |
| 83 | +function inner_ks(IFFT!, FFT!, Nt, Nn, Nn1, u, G, A_inv, B, dt2, dt32, uslice, U) |
| 84 | + for n = 1:Nt |
| 85 | + Nn1 .= Nn # shift nonlinear term in time |
| 86 | + Nn .= u # put u into Nn in prep for comp of nonlinear term |
| 87 | + |
| 88 | + IFFT! * Nn |
| 89 | + |
| 90 | + # plotting |
| 91 | + uslice .= real.(Nn) ./ 10f0 |
| 92 | + U[1:Nt, n] = reshape(Array(uslice), (Nt, 1)) # copy from gpu to opengl gpu not implemented for now |
| 93 | + |
| 94 | + # transform Nn to gridpt values, in place |
| 95 | + Nn .= Nn .* Nn # collocation calculation of u^2 |
| 96 | + FFT!*Nn # transform Nn back to spectral coeffs, in place |
| 97 | + |
| 98 | + Nn .= G .* Nn # compute Nn == -1/2 d/dx (u^2) = -u u_x |
| 99 | + |
| 100 | + # loop fusion! Julia translates the folling line of code to a single for loop. |
| 101 | + u .= A_inv .* (B .* u .+ dt32 .* Nn .- dt2 .* Nn1) |
| 102 | + yield() |
| 103 | + end |
| 104 | + GPUArrays.synchronize(u) |
| 105 | +end |
| 106 | + |
| 107 | +function execute(window) |
| 108 | + |
| 109 | + T = Float32; AT = CLArray |
| 110 | + N = 1512 |
| 111 | + Lx = T(64*pi) |
| 112 | + Nx = T(N) |
| 113 | + dt = T(1/16) |
| 114 | + |
| 115 | + x = Lx*(0:Nx-1)/Nx |
| 116 | + u = T.(cos.(x) + 0.1*sin.(x/8) + 0.01*cos.((2*pi/Lx)*x)) |
| 117 | + |
| 118 | + u = AT((T(1)+T(0)im)*u) # force u to be complex |
| 119 | + Nx = length(u) # number of gridpoints |
| 120 | + kx = T.(vcat(0:Nx/2-1, 0:0, -Nx/2+1:-1))# integer wavenumbers: exp(2*pi*kx*x/L) |
| 121 | + alpha = T(2)*pi*kx/Lx # real wavenumbers: exp(alpha*x) |
| 122 | + |
| 123 | + D = T(1)im*alpha # spectral D = d/dx operator |
| 124 | + |
| 125 | + L = alpha.^2 .- alpha.^4 # spectral L = -D^2 - D^4 operator |
| 126 | + |
| 127 | + G = AT(T(-0.5) .* D) # spectral -1/2 D operator, to eval -u u_x = 1/2 d/dx u^2 |
| 128 | + |
| 129 | + # convenience variables |
| 130 | + dt2 = T(dt/2) |
| 131 | + dt32 = T(3*dt/2) |
| 132 | + A_inv = AT((ones(T, Nx) - dt2*L).^(-1)) |
| 133 | + B = AT(ones(T, Nx) + dt2*L) |
| 134 | + |
| 135 | + # compute in-place FFTW plans |
| 136 | + FFT! = plan_fft!(u) |
| 137 | + IFFT! = plan_ifft!(u) |
| 138 | + |
| 139 | + # compute nonlinear term Nn == -u u_x |
| 140 | + powed = u .* u |
| 141 | + Nn = G .* fft(powed); # Nn == -1/2 d/dx (u^2) = -u u_x |
| 142 | + Nn1 = copy(Nn); # Nn1 = Nn at first time step |
| 143 | + FFT! * u; |
| 144 | + |
| 145 | + uslice = real(u) |
| 146 | + U = zeros(Float32, N, N) |
| 147 | + robj = visualize( |
| 148 | + U, :surface, color_norm = Vec2f0(-0.5, 0.5), |
| 149 | + ranges = ((-3f0, 3f0), (-3f0, 3f0)) |
| 150 | + ).children[] |
| 151 | + Ugpu = robj[:position_z] |
| 152 | + _view(robj) |
| 153 | + |
| 154 | + bench = inner_ks((IFFT!), (FFT!), N, Nn, Nn1, u, G, A_inv, B, dt2, dt32, uslice, Ugpu) |
| 155 | +end |
| 156 | + |
| 157 | +w = glscreen(); @async renderloop(w) |
| 158 | + |
| 159 | +execute(w) |
0 commit comments