Merge branch 'ultradeep_Alens'

marius311 · marius311 · commit 409216bdaa70 · 2020-10-30T15:19:43.000-07:00
diff --git a/src/CMBLensing.jl b/src/CMBLensing.jl
@@ -45,7 +45,7 @@ using Zygote: unbroadcast, Numeric, @adjoint, @nograd
 
 
 import Adapt: adapt_structure
-import Base: +, -, *, \, /, ^, ~, ≈, <, <=, |, &, ==,
+import Base: +, -, *, \, /, ^, ~, ≈, <, <=, |, &, ==, !,
     abs, adjoint, all, any, axes, broadcast, broadcastable, BroadcastStyle, conj, copy, convert,
     copy, copyto!, eltype, eps, fill!, getindex, getproperty, hash, hcat, hvcat, inv, isfinite,
     iterate, keys, lastindex, length, literal_pow, mapreduce, materialize!,
diff --git a/src/chains.jl b/src/chains.jl
@@ -2,47 +2,52 @@ import Base: getindex, lastindex
 
 
 @doc doc"""
-    load_chains(filename; burnin=0, thin=1, join=false)
-    
-Load a single chain or multiple parallel chains which were written to a file by
-[`sample_joint`](@ref). 
+    load_chains(filename; burnin=0, burnin_chunks=0, thin=1, join=false, unbatch=true)
+
+Load a single chain or multiple parallel chains which were written to
+a file by [`sample_joint`](@ref). 
 
 Keyword arguments: 
 
-* `burnin` — Remove this many samples from the start of each chain.
+* `burnin` — Remove this many samples from the start of each chain, or
+  if negative, keep only this many samples at the end of each chain.
+* `burnin_chunks` — Same as burnin, but in terms of chain "chunks"
+  stored in the chain file, rather than in terms of samples.
 * `thin` — If `thin` is an integer, thin the chain by this factor. If
-  `thin == :hasmaps`, return only samples which have maps saved. If thin is a
-  `Function`, filter the chain by this function (e.g. `thin=haskey(:g)` on Julia 1.5+)
-* `unbatch` — If true, [unbatch](@ref) the chains if they are batched. 
+  `thin == :hasmaps`, return only samples which have maps saved. If
+  thin is a `Function`, filter the chain by this function (e.g.
+  `thin=haskey(:g)` on Julia 1.5+)
+* `unbatch` — If true, [unbatch](@ref) the chains if they are batched.
 * `join` — If true, concatenate all the chains together.
 * `skip_missing_chunks` — Skip missing chunks in the chain instead of
   terminating the chain there. 
 
 
-The object returned by this function is a `Chain` or `Chains` object, which
-simply wraps an `Array` of `Dicts` or an `Array` of `Array` of `Dicts`,
-respectively (each sample is a `Dict`). The wrapper object has some extra
-indexing properties for convenience: 
+The object returned by this function is a `Chain` or `Chains` object,
+which simply wraps an `Array` of `Dicts` or an `Array` of `Array` of
+`Dicts`, respectively (each sample is a `Dict`). The wrapper object
+has some extra indexing properties for convenience: 
 
-* It can be indexed as if it were a single multidimensional object, e.g.
-  `chains[1,:,:accept]` would return the `:accept` key of all samples in the
-  first chain.
-* Leading colons can be dropped, i.e. `chains[:,:,:accept]` is the same as
-  `chains[:accept]`. 
-* If some samples are missing a particular key, `missing` is returned for those
-  samples insted of an error.
-* The recursion goes arbitrarily deep into the objects it finds. E.g., since
-  sampled parameters are stored in a `NamedTuple` like `(Aϕ=1.3,)` in the `θ`
-  key of each sample `Dict`, you can do `chain[:θ,:Aϕ]` to get all `Aϕ` samples
-  as a vector. 
+* It can be indexed as if it were a single multidimensional object,
+  e.g. `chains[1,:,:accept]` would return the `:accept` key of all
+  samples in the first chain.
+* Leading colons can be dropped, i.e. `chains[:,:,:accept]` is the
+  same as `chains[:accept]`. 
+* If some samples are missing a particular key, `missing` is returned
+  for those samples insted of an error.
+* The recursion goes arbitrarily deep into the objects it finds. E.g.,
+  since sampled parameters are stored in a `NamedTuple` like
+  `(Aϕ=1.3,)` in the `θ` key of each sample `Dict`, you can do
+  `chain[:θ,:Aϕ]` to get all `Aϕ` samples as a vector. 
 
 
 """
-function load_chains(filename; burnin=0, thin=1, join=false, unbatch=true, dropmaps=false)
+function load_chains(filename; burnin=0, thin=1, join=false, unbatch=true, dropmaps=false, burnin_chunks=0)
     chains = jldopen(filename) do io
         ks = keys(io)
-        chunk_ks = [k for k in ks if startswith(k,"chunks_")]
-        for (isfirst,k) in flagfirst(sort(chunk_ks, by=k->parse(Int,k[8:end])))
+        chunk_ks = sort([k for k in ks if startswith(k,"chunks_")], by=k->parse(Int,k[8:end]))
+        chunk_ks = chunk_ks[burnin_chunks>=0 ? (burnin_chunks+1:end) : (end+burnin_chunks+1:end)]
+        for (isfirst,k) in flagfirst(chunk_ks)
             if isfirst
                 chains = read(io,k)
             else
@@ -55,13 +60,13 @@ function load_chains(filename; burnin=0, thin=1, join=false, unbatch=true, dropm
         chains
     end
     if thin isa Int
-        chains = [chain[(1+burnin):thin:end] for chain in chains]
+        chains = [chain[burnin>=0 ? ((1+burnin):thin:end) : (end+(1+burnin):thin:end)] for chain in chains]
     elseif thin == :hasmaps
         chains = [[samp for samp in chain[(1+burnin):end] if :ϕ in keys(samp)] for chain in chains]
     elseif thin isa Function
         chains = [filter(thin,chain) for chain in chains]
     else
-        error("`thin` should be an Int or :hasmaps")
+        error("`thin` should be an Int, :hasmaps, or a filter function")
     end
     chains = wrap_chains(chains)
     if unbatch
@@ -121,8 +126,8 @@ _getindex(x::Union{Dict,NamedTuple}, k::Symbol) = haskey(x,k) ? getindex(x, k) :
 _getindex(x,                         k) = getindex(x, k)
 
 
-wrap_chains(chains::Vector{<:Vector{<:Dict}}) = Chains(Chain.(chains))
-wrap_chains(chain::Vector{<:Dict}) = Chain(chain)
+wrap_chains(chains::Vector{<:Vector}) = Chains(Chain.(chains))
+wrap_chains(chain::Vector) = Chain(chain)
 
 
 # batching
diff --git a/src/flat_batch.jl b/src/flat_batch.jl
@@ -102,7 +102,7 @@ for op in [:+, :-, :*, :/, :<, :<=, :&, :|, :(==)]
         ($op)(a::Real,        b::BatchedReal) = batch(broadcast(($op), a,      b.vals))
     end
 end
-for op in [:-, :sqrt, :one, :zero, :isfinite, :eps]
+for op in [:-, :!, :sqrt, :one, :zero, :isfinite, :eps]
     @eval ($op)(br::BatchedReal) = batch(broadcast(($op),br.vals))
 end
 for op in [:any, :all]
diff --git a/src/gpu.jl b/src/gpu.jl
@@ -128,34 +128,19 @@ gc = () -> (GC.gc(true); CUDA.reclaim())
 
 Assuming you submitted a SLURM job and got several GPUs, possibly across several
 nodes, this assigns each Julia worker process a unique GPU using `CUDA.device!`.
-Assumes the SLURM variables `SLURM_STEP_GPUS` and `GPU_DEVICE_ORDINAL` are
-defined on the workers.
 """
 function assign_GPU_workers()
-    @everywhere @eval using CUDA, Distributed
-    topo = @eval Main pmap(workers()) do _
-        hostname = gethostname()
-        virtgpus = parse.(Int,split(ENV["GPU_DEVICE_ORDINAL"],","))
-        if "SLURM_STEP_GPUS" in keys(ENV)
-            physgpus = parse.(Int,split(ENV["SLURM_STEP_GPUS"],","))
-        else
-            @warn "SLURM_STEP_GPUS not defined, assign_GPU_workers may fail."
-            # SLURM_STEP_GPUS seems not correctly set on all systems. this
-            # will work if you requested a full node's worth of GPUs at least
-            physgpus = virtgpus
-        end
-        if Set(virtgpus)!=Set(deviceid.(devices()))
-            @warn "Virtual GPUs not same as CUDA.devices(), using latter"
-            virtgpus = deviceid.(devices())
-        end
-        (i=myid(), hostname=hostname, virtgpus=virtgpus, physgpus=physgpus)
-    end
+    @everywhere @eval Main using CUDA, Distributed
+    accessible_gpus = @eval Main Dict(pmap(workers()) do _
+        ds = CUDA.devices()
+        myid() => Dict(CUDA.deviceid.(ds) .=> CUDA.uuid.(ds))
+    end)
     claimed = Set()
-    assignments = Dict(map(topo) do (i,hostname,virtgpus,physgpus)
-        for (virtgpu,physgpu) in zip(virtgpus,physgpus)
-            if !((hostname,physgpu) in claimed)
-                push!(claimed,(hostname,physgpu))
-                return i => virtgpu
+    assignments = Dict(map(workers()) do myid
+        for (gpu_id, gpu_uuid) in accessible_gpus[myid]
+            if !(gpu_uuid in claimed)
+                push!(claimed, gpu_uuid)
+                return myid => gpu_id
             end
         end
     end)
diff --git a/src/numerical_algorithms.jl b/src/numerical_algorithms.jl
@@ -205,3 +205,32 @@ function gmres(A, b; Pl=I, maxiter)
     view(K, :, 1:n) * α
     
 end
+
+"""
+    finite_second_derivative(x)
+
+Second derivative of a vector `x` via finite differences, including at end points.
+"""
+function finite_second_derivative(x)
+    map(eachindex(x)) do i
+        if i==1
+            x[3]-2x[2]+x[1]
+        elseif i==length(x)
+            x[end]-2x[end-1]+x[end-2]
+        else
+            x[i+1]-2x[i]+x[i-1]
+        end
+    end
+end
+
+"""
+    longest_run_of_trues(x)
+
+The slice corresponding to the longest run of `true`s in the vector `x`. 
+"""
+function longest_run_of_trues(x)
+    next_true = findnext.(Ref(.!x), eachindex(x))
+    next_true[isnothing.(next_true)] .= 0
+    (len,start) = findmax(next_true .- eachindex(x))
+    start:start+len
+end
diff --git a/src/sampling.jl b/src/sampling.jl
@@ -85,23 +85,38 @@ function grid_and_sample(lnPs::Vector{<:BatchedReal}, xs::AbstractVector; kwargs
     ((batch(getindex.(batches,i)) for i=1:3)...,)
 end
 
-function grid_and_sample(lnPs::Vector, xs::AbstractVector; progress=false, nsamples=1, span=0.25, rtol=1e-5)
+function grid_and_sample(lnPs::Vector, xs::AbstractVector; progress=false, nsamples=1, span=0.25, require_convex=false)
     
+    # trim leading/trailing zero-probability regions
+    support = findnext(isfinite,lnPs,1):findprev(isfinite,lnPs,length(lnPs))
+    xs = xs[support]
+    lnPs = lnPs[support]
+    
+    if require_convex
+        support = longest_run_of_trues(finite_second_derivative(lnPs) .< 0)
+        xs = xs[support]
+        lnPs = lnPs[support]
+    end
+
+    # interpolate PDF
     xmin, xmax = first(xs), last(xs)
     lnPs = lnPs .- maximum(lnPs)
     ilnP = loess(xs, lnPs, span=span)
     
     # normalize the PDF. note the smoothing is done of the log PDF.
-    A = @ondemand(QuadGK.quadgk)(exp∘ilnP, xmin, xmax)[1]
-    lnPs .-= log(A)
-    ilnP = loess(xs, lnPs, span=span)
+    cdf(x) = @ondemand(QuadGK.quadgk)(nan2zero∘exp∘ilnP,xmin,x,rtol=1e-5)[1]
+    logA = nan2zero(log(cdf(xmax)))
+    lnPs = (ilnP.ys .-= logA)
+    ilnP.bs[:,1] .-= logA
     
     # draw samples via inverse transform sampling
-    # (the `+ eps()`` is a workaround since Loess.predict seems to NaN sometimes when
-    # evaluated right at the lower bound)
     θsamples = @showprogress (progress ? 1 : Inf) map(1:nsamples) do i
         r = rand()
-        fzero((x->@ondemand(QuadGK.quadgk)(exp∘ilnP,xmin+sqrt(eps()),x,rtol=rtol)[1]-r),xmin+sqrt(eps()),xmax,rtol=rtol)
+        if (cdf(xmin)-r)*(cdf(xmax)-r) >= 0
+            first(lnPs) > last(lnPs) ? xmin : xmax
+        else
+            fzero(x->cdf(x)-r, xmin, xmax, xatol=(xmax-xmin)*1e-3)
+        end
     end
     
     (nsamples==1 ? θsamples[1] : θsamples), ilnP, lnPs
@@ -213,8 +228,10 @@ function sample_joint(
         
         θstarts = if θstart == :prior
             [map(range->batch((first(range) .+ rand(D) .* (last(range) - first(range)))...), θrange) for i=1:nchains]
-        elseif (θstart isa NamedTuple)
+        elseif θstart isa NamedTuple
             fill(θstart, nchains)
+        elseif θstart isa Vector{<:NamedTuple}
+            θstart
         else
             error("`θstart` should be either `nothing` to randomly sample the starting value or a NamedTuple giving the starting point.")
         end
@@ -227,6 +244,8 @@ function sample_joint(
             fill(batch(zero(diag(ds().Cϕ)), D), nchains)
         elseif ϕstart isa Field
             fill(ϕstart, nchains)
+        elseif ϕstart isa Vector{<:Field}
+            ϕstart
         elseif ϕstart in [:quasi_sample, :best_fit]
             pmap(θstarts) do θstart
                 MAP_joint(adapt(storage,ds(;θstart...)), progress=(progress==:verbose ? :summary : false), Nϕ=adapt(storage,Nϕ), quasi_sample=(ϕstart==:quasi_sample); MAP_kwargs...).ϕ
@@ -268,9 +287,11 @@ function sample_joint(
 
         for chunks_index = (chunks_index+1):(nsamps_per_chain÷nchunk+1)
             
+            println("starting")
             last_chunks = pmap(last.(last_chunks)) do state
                 
                 @unpack i,ϕ°,f,θ = state
+                @show i
                 f,ϕ°,ds,Nϕ = (adapt(storage, x) for x in (f,ϕ°,dsₐ,Nϕₐ))
                 dsθ = ds(θ)
                 ϕ = dsθ.G\ϕ°