Merge pull request #500 from SciML/ensemble

ChrisRackauckas · web-flow · commit eb655ed0b068 · 2020-05-02T07:28:35.000-04:00
Fix splitthreads trajectory choices and test it better
diff --git a/src/ensemble/basic_ensemble_solve.jl b/src/ensemble/basic_ensemble_solve.jl
@@ -55,6 +55,10 @@ function __solve(prob::AbstractEnsembleProblem,
       else
         @error "parallel_type value not recognized"
       end
+    elseif alg isa EnsembleAlgorithm
+      # Assume DifferentialEquations.jl is being used, so default alg
+      ensemblealg = alg
+      alg = nothing
     else
       ensemblealg = EnsembleThreads()
     end
@@ -102,7 +106,8 @@ end
 
 function batch_func(i,prob,alg,I,kwargs...)
   iter = 1
-  new_prob = prob.prob_func(deepcopy(prob.prob),i,iter)
+  _prob = prob.safetycopy ? deepcopy(prob.prob) : prob.prob
+  new_prob = prob.prob_func(_prob,i,iter)
   rerun = true
   x = prob.output_func(solve(new_prob,alg;kwargs...),i)
   if !(typeof(x) <: Tuple)
@@ -114,7 +119,8 @@ function batch_func(i,prob,alg,I,kwargs...)
   rerun = _x[2]
   while rerun
       iter += 1
-      new_prob = prob.prob_func(deepcopy(prob.prob),i,iter)
+      _prob = prob.safetycopy ? deepcopy(prob.prob) : prob.prob
+      new_prob = prob.prob_func(_prob,i,iter)
       x = prob.output_func(solve(new_prob,alg;kwargs...),i)
       if !(typeof(x) <: Tuple)
           @warn("output_func should return (out,rerun). See docs for updated details")
@@ -127,7 +133,7 @@ function batch_func(i,prob,alg,I,kwargs...)
   _x[1]
 end
 
-function solve_batch(prob,alg,::EnsembleDistributed,I,pmap_batch_size,kwargs...)
+function solve_batch(prob,alg,ensemblealg::EnsembleDistributed,I,pmap_batch_size,kwargs...)
   wp=CachingPool(workers())
   batch_data = let
     pmap(wp,I,batch_size=pmap_batch_size) do i
@@ -146,13 +152,14 @@ function solve_batch(prob,alg,::EnsembleSerial,I,pmap_batch_size,kwargs...)
   map(i->batch_data[i],1:length(batch_data))
 end
 
-function solve_batch(prob,alg,::EnsembleThreads,I,pmap_batch_size,kwargs...)
+function solve_batch(prob,alg,ensemblealg::EnsembleThreads,I,pmap_batch_size,kwargs...)
   batch_data = Vector{Any}(undef,length(I))
   let
     Threads.@threads for batch_idx in axes(batch_data, 1)
         i = I[batch_idx]
         iter = 1
-        new_prob = prob.prob_func(deepcopy(prob.prob),i,iter)
+        _prob = prob.safetycopy ? deepcopy(prob.prob) : prob.prob
+        new_prob = prob.prob_func(_prob,i,iter)
         x = prob.output_func(solve(new_prob,alg;kwargs...),i)
         if !(typeof(x) <: Tuple)
             @warn("output_func should return (out,rerun). See docs for updated details")
@@ -164,7 +171,8 @@ function solve_batch(prob,alg,::EnsembleThreads,I,pmap_batch_size,kwargs...)
 
         while rerun
             iter += 1
-            new_prob = prob.prob_func(deepcopy(prob.prob),i,iter)
+            _prob = prob.safetycopy ? deepcopy(prob.prob) : prob.prob
+            new_prob = prob.prob_func(_prob,i,iter)
             x = prob.output_func(solve(new_prob,alg;kwargs...),i)
             if !(typeof(x) <: Tuple)
                 @warn("output_func should return (out,rerun). See docs for updated details")
@@ -182,23 +190,30 @@ end
 
 function solve_batch(prob,alg,::EnsembleSplitThreads,I,pmap_batch_size,kwargs...)
   wp=CachingPool(workers())
+  N = nworkers()
+  batch_size = length(I)÷N
   batch_data = let
-    pmap(wp,1:nprocs(),batch_size=pmap_batch_size) do i
-      thread_monte(prob,I,alg,i,kwargs...)
+    pmap(wp,1:N,batch_size=pmap_batch_size) do i
+      if i == N
+        I_local = I[(batch_size*(i-1)+1):end]
+      else
+        I_local = I[(batch_size*(i-1)+1):(batch_size*i)]
+      end
+      thread_monte(prob,I_local,alg,i,kwargs...)
     end
   end
   _batch_data = vector_batch_data_to_arr(batch_data)
 end
 
 function thread_monte(prob,I,alg,procid,kwargs...)
-  start = I[1]+(procid-1)*length(I)
-  stop = I[1]+procid*length(I)-1
-  portion = start:stop
-  batch_data = Vector{Any}(undef,length(portion))
+  batch_data = Vector{Any}(undef,length(I))
   let
-    Threads.@threads for i in portion
+    j = 0
+    Threads.@threads for i in I
+      j += 1
       iter = 1
-      new_prob = prob.prob_func(deepcopy(prob.prob),i,iter)
+      _prob = prob.safetycopy ? deepcopy(prob.prob) : prob.prob
+      new_prob = prob.prob_func(_prob,i,iter)
       rerun = true
       x = prob.output_func(solve(new_prob,alg;kwargs...),i)
       if !(typeof(x) <: Tuple)
@@ -210,7 +225,8 @@ function thread_monte(prob,I,alg,procid,kwargs...)
       rerun = _x[2]
       while rerun
           iter += 1
-          new_prob = prob.prob_func(deepcopy(prob.prob),i,iter)
+          _prob = prob.safetycopy ? deepcopy(prob.prob) : prob.prob
+          new_prob = prob.prob_func(_prob,i,iter)
           x = prob.output_func(solve(new_prob,alg;kwargs...),i)
           if !(typeof(x) <: Tuple)
               @warn("output_func should return (out,rerun). See docs for updated details")
@@ -220,7 +236,7 @@ function thread_monte(prob,I,alg,procid,kwargs...)
           end
           rerun = _x[2]
       end
-      batch_data[i - start + 1] = _x[1]
+      batch_data[j] = _x[1]
     end
   end
   batch_data
diff --git a/src/ensemble/ensemble_problems.jl b/src/ensemble/ensemble_problems.jl
@@ -7,18 +7,23 @@ struct EnsembleProblem{T,T2,T3,T4,T5} <: AbstractEnsembleProblem
   output_func::T3
   reduction::T4
   u_init::T5
+  safetycopy::Bool
 end
 
+DEFAULT_PROB_FUNC(prob,i,repeat) = prob
+DEFAULT_OUTPUT_FUNC(sol,i) = (sol,false)
+DEFAULT_REDUCTION(u,data,I) = (append!(u,data),false)
 EnsembleProblem(prob;
-    output_func = (sol,i)-> (sol,false),
-    prob_func= (prob,i,repeat)->prob,
-    reduction = (u,data,I)->(append!(u,data),false),
-    u_init = []) =
-    EnsembleProblem(prob,prob_func,output_func,reduction,u_init)
+    output_func = DEFAULT_OUTPUT_FUNC,
+    prob_func= DEFAULT_PROB_FUNC,
+    reduction = DEFAULT_REDUCTION,
+    u_init = [],
+    safetycopy = prob_func !== DEFAULT_PROB_FUNC) =
+    EnsembleProblem(prob,prob_func,output_func,reduction,u_init,safetycopy)
 
 EnsembleProblem(;prob,
-    output_func = (sol,i)-> (sol,false),
-    prob_func= (prob,i,repeat)->prob,
-    reduction = (u,data,I)->(append!(u,data),false),
-    u_init = [], p = nothing) =
-    EnsembleProblem(prob,prob_func,output_func,reduction,u_init)
+    output_func = DEFAULT_OUTPUT_FUNC,
+    prob_func= DEFAULT_PROB_FUNC,
+    reduction = DEFAULT_REDUCTION,
+    u_init = [], p = nothing, safetycopy = prob_func !== DEFAULT_PROB_FUNC) =
+    EnsembleProblem(prob,prob_func,output_func,reduction,u_init,safetycopy)
diff --git a/test/downstream/distributed_ensemble.jl b/test/downstream/distributed_ensemble.jl
@@ -0,0 +1,14 @@
+using Distributed
+addprocs(2)
+println("There are $(nprocs()) processes")
+@everywhere using OrdinaryDiffEq
+
+@everywhere prob = ODEProblem((u,p,t)->1.01u,0.5,(0.0,1.0))
+@everywhere u0s = [rand()*prob.u0 for i in 1:2]
+@everywhere function prob_func(prob,i,repeat)
+    println("Running trajectory $i")
+    ODEProblem(prob.f,u0s[i],prob.tspan)
+end
+
+ensemble_prob = EnsembleProblem(prob, prob_func=prob_func)
+sim = solve(ensemble_prob,Tsit5(),EnsembleSplitThreads(),trajectories=2)
diff --git a/test/downstream/ensemble.jl b/test/downstream/ensemble.jl
@@ -73,7 +73,9 @@ reduction = function (u,batch,I)
   u,((var(u)/sqrt(last(I)))/mean(u)<0.5) ? true : false
 end
 
-prob2 = EnsembleProblem(prob,prob_func=prob_func,output_func=output_func,reduction=reduction,u_init=Vector{Float64}())
+prob2 = EnsembleProblem(prob,prob_func=prob_func,output_func=output_func,
+                        reduction=reduction,u_init=Vector{Float64}(),
+                        safetycopy=false)
 sim = solve(prob2,Tsit5(),trajectories=10000,batch_size=20)
 @test sim.converged == true
 
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,4 +1,4 @@
-using SafeTestsets
+using SafeTestsets, Test
 
 const GROUP = get(ENV, "GROUP", "All")
 const is_APPVEYOR = ( Sys.iswindows() && haskey(ENV,"APPVEYOR") )
@@ -48,6 +48,7 @@ if !is_APPVEYOR && GROUP == "Downstream"
     @time @safetestset "DEDataArray" begin include("downstream/data_array_regression_tests.jl") end
     @time @safetestset "Concrete_solve Tests" begin include("downstream/concrete_solve_tests.jl") end
     @time @safetestset "AD Tests" begin include("downstream/ad_tests.jl") end
+    @time @testset "Distributed Ensemble Tests" begin include("downstream/distributed_ensemble.jl") end
 end
 
 if !is_APPVEYOR && GROUP == "GPU"