diff --git a/profiling/reflect.jl b/profiling/reflect.jl index f06c08f..9258dce 100644 --- a/profiling/reflect.jl +++ b/profiling/reflect.jl @@ -1,7 +1,9 @@ using CUDA using Flux using BenchmarkTools +using Random +using HighDimPDE if CUDA.functional() CUDA.allowscalar(false) _device = Flux.gpu @@ -10,6 +12,21 @@ end # testing reflection on batchsize d = 100 batch_size = 10000 +# +for d in [2^i for i in 1:17] + Random.seed!(1) + X0 = fill(0.0f0,d) + X1 = X0 + randn(d) + println("\n\nd = $d\n") + if d <= 16384 + args = copy.((X0, X1, -1, 1)) + @time HighDimPDE._reflect(args...) + args = copy.((X0, X1, -1, 1)) + @time HighDimPDE._reflect_GPU(args...) + end + args = copy.((X0, X1, -1, 1)) + @time HighDimPDE._reflect_outs(args...) +end y0 = CUDA.zeros(d,batch_size) y1 = CUDA.randn(size(y0)...) @btime _reflect_GPU2($y0,$y1,-1f0,1f0,_device) diff --git a/src/reflect.jl b/src/reflect.jl index 6e11261..c6ce4a0 100644 --- a/src/reflect.jl +++ b/src/reflect.jl @@ -88,3 +88,97 @@ function _reflect_GPU(a, b, s::Real, e::Real) end return b end + +function _out_indices(b, s::R, e::R) where {R<:Real} + out1 = findall(b .< s) + out2 = findall(b .> e) + out = vcat(out1, out2) + return out1, out2, out +end + + +""" + _rtemp_lower!(rtemp, a, b, s, out_lower) +Update slice of `rtemp` corresponding to the lower boundary `s`. +""" +function _rtemp_lower!(rtemp, a, b, s, out_lower) + for i in 1:length(out_lower) + idx = out_lower[i] + rtemp[i] = @. (s - a[idx]) / (b[idx] - a[idx]) + end +end + +""" + _rtemp_lower!(rtemp, a, b, e, out_upper, offset) +Update slice of `rtemp` corresponding to the upper boundary `e`. +NOTE: An offset needs to be provided, corresponding to the number of dimensions where `b` is below the lower boundary. +(SEE `rtemp_lower!`). +""" +function _rtemp_upper!(rtemp, a, b, e, out_upper, offset) + for i in 1:length(out_upper) + idx = out_upper[i] + rtemp[i + offset] = @. (e - a[idx]) / (b[idx] - a[idx]) + end +end + + +""" + _discard_reflected_out_index!(out, out1, out2, rmin_idx) +Checks if `b[n_idx]` (which corresponds to `out[rmin_idx]`) is within the specified boundary `[s, e]` after reflection along dimension `n_idx`. +The corresponding `out`-indices are removed before iterating the reflection. +""" +function _discard_reflected_out_index!(out, out1, out2, rmin_idx) + offset = length(out1) + if rmin_idx <= offset + deleteat!(out1, rmin_idx) + deleteat!(out, rmin_idx) + else + deleteat!(out2, rmin_idx - offset) + deleteat!(out, rmin_idx) + end +end + + +function _swap_boundary_outs!(out1, out2, n_idx, rmin_idx) + offset = length(out1) + # Should occur after reflecting OUT-OF-BOUNDS, where + # dimension `n_idx` is assigned to other `out`-vector + # for next reflection. + if rmin_idx <= offset + deleteat!(out1, rmin_idx) + push!(out2, n_idx) + else + deleteat!(out2, rmin_idx - offset) + push!(out1, n_idx) + end +end + +function _reflect_outs(a, b, s::Real, e::Real) + all((a .>= s) .& (a .<= e)) ? nothing : error("a = $a not in hypercube") + size(a) == size(b) ? nothing : error("a not same dim as b") + + # Initialize vectors of indices corresponding to dim of trajectories out of bounds + out1, out2, out = _out_indices(b, s, e) + rtemp = Vector{Float32}(undef, length(out)) + while length(out) > 0 + # TODO: Preallocate rtemp once, then remove elements with deleteat!(rtemp, rmin_idx) + _rtemp_lower!(rtemp, a, b, s, out1) + _rtemp_upper!(rtemp, a, b, e, out2, length(out1)) + + rmin, rmin_idx = findmin(rtemp) + n_idx = out[rmin_idx] + @. a += (b - a) * rmin + b[n_idx] = -b[n_idx] + 2 * a[n_idx] + if s < b[n_idx] && b[n_idx] < e + # Within boundary after reflection, can reduce number of reflecting dimensions. + _discard_reflected_out_index!(out, out1, out2, rmin_idx) + deleteat!(rtemp, rmin_idx) + else + # Reflected outside boundary, need to move element between out1 and out2 + # TODO: Reassign out-indices manually via _swap_boundary_outs!, for now, just recompute out-indices. + # _swap_boundary_outs!(out1, out2, n_idx, rmin_idx) + out1, out2, out = _out_indices(b, s, e) + end + end + return b +end \ No newline at end of file diff --git a/test/reflect.jl b/test/reflect.jl index f2c8dc2..8f50249 100644 --- a/test/reflect.jl +++ b/test/reflect.jl @@ -82,3 +82,15 @@ if CUDA.functional() end end end + +@testset "CPU index reflect methods" begin + d = 1000 + X0 = fill(0.0f0,d) + X1 = X0 + randn(d) + @testset "test equivalence of index with cpu/gpu" begin + args = (X0, X1, -1, 1) + @test HighDimPDE._reflect(copy.(args)...) ≈ HighDimPDE._reflect_outs(copy.(args)...) + @test HighDimPDE._reflect_GPU(copy.(args)...) ≈ HighDimPDE._reflect_outs(copy.(args)...) + end + +end \ No newline at end of file