replace unoptimized copy_states, put path to vars, set optimize option as default

ucabc46 · Angeladadd · commit c54afa3870ee · 2025-11-09T12:51:34.000Z
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ Manifest-v*.toml
 /docs/build/
 /.benchmarkci
 *.h5
+slurm_log/
diff --git a/extra/weak_scaling/Project.toml b/extra/weak_scaling/Project.toml
@@ -0,0 +1,2 @@
+[deps]
+ParticleDA = "61cd1fb4-f4c4-4bc8-80c6-ea5639a6ca2e"
diff --git a/extra/weak_scaling/kathleen_slurm_copy_states.sh b/extra/weak_scaling/kathleen_slurm_copy_states.sh
@@ -5,15 +5,18 @@
 #SBATCH --cpus-per-task=40
 #SBATCH --nodes=16
 #SBATCH --ntasks-per-node=1
-#SBATCH --chdir=/home/ucabc46/exp/ParticleDA.jl
-#SBATCH --output=test/slurm_log/%x-%j.out
-#SBATCH --error=test/slurm_log/%x-%j.err
+#SBATCH --chdir=/home/ucabc46/ParticleDA.jl
+#SBATCH --output=slurm_log/%x-%j.out
+#SBATCH --error=slurm_log/%x-%j.err
 
 export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
 export JULIA_NUM_THREADS=$OMP_NUM_THREADS
 
 julia --project=. -e 'using Pkg; Pkg.instantiate(); Pkg.precompile()'
 
-/home/ucabc46/.julia/bin/mpiexecjl -n $SLURM_NNODES\
+PARTICLEDA_TEST_DIR=$HOME/ParticleDA.jl/test
+JULIA_DIR=$HOME/.julia
+
+$JULIA_DIR/bin/mpiexecjl -n $SLURM_NNODES\
      julia --project=. \
-     /home/ucabc46/exp/ParticleDA.jl/test/mpi_optimized_copy_states.jl -t /home/ucabc46/exp/ParticleDA.jl/test/output/dedup_threading_optimize_resampling/all_timers_$SLURM_NNODES.h5 -o
+     $PARTICLEDA_TEST_DIR/mpi_optimized_copy_states.jl -t $PARTICLEDA_TEST_DIR/output/all_timers_$SLURM_NNODES.h5 -o
diff --git a/extra/weak_scaling/kathleen_slurm_weak_scaling.sh b/extra/weak_scaling/kathleen_slurm_weak_scaling.sh
@@ -5,13 +5,16 @@
 #SBATCH --nodes=4
 #SBATCH --ntasks-per-node=1
 #SBATCH --cpus-per-task=40
-#SBATCH --chdir=/home/ucabc46/exp/ParticleDA.jl
+#SBATCH --chdir=/home/ucabc46/ParticleDA.jl/extra/weak_scaling
 #SBATCH --output=slurm_log/%x-%j.out
 #SBATCH --error=slurm_log/%x-%j.err
 
 export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
 export JULIA_NUM_THREADS=$OMP_NUM_THREADS
 
-/home/ucabc46/.julia/bin/mpiexecjl -n $SLURM_NNODES\
+PARTICLEDA_WEAKSCALING_DIR=$HOME/ParticleDA.jl/extra/weak_scaling
+JULIA_DIR=$HOME/.julia
+
+$JULIA_DIR/bin/mpiexecjl -n $SLURM_NNODES\
      julia --project=. \
-     /home/ucabc46/exp/ParticleDA.jl/extra/weak_scaling/run_particleda.jl
+     $PARTICLEDA_WEAKSCALING_DIR/run_particleda.jl
diff --git a/extra/weak_scaling/parametersW1.yaml b/extra/weak_scaling/parametersW1.yaml
@@ -3,7 +3,7 @@ simulate_observations:
   seed: 123
 model:
   llw2d:
-    station_filename: "/home/ucabc46/exp/ParticleDA.jl/extra/weak_scaling/stationsW1.txt"
+    station_filename: "stationsW1.txt"
     nu: 2.5
     nu_initial_state: 2.5
     peak_height: 30.0
@@ -27,7 +27,7 @@ model:
       - 1
     time_step: 0.5
 filter:
-  optimize_copy_states: true
+  optimize_resampling: true
   output_filename: "llw2d_filtering.h5"
-  nprt: 1000
+  nprt: 2000
   enable_timers: true
diff --git a/extra/weak_scaling/run_particleda.jl b/extra/weak_scaling/run_particleda.jl
@@ -18,9 +18,9 @@ mpi_size = MPI.Comm_size(MPI.COMM_WORLD)
 llw2d_src = joinpath(dirname(pathof(ParticleDA)), "..", "test", "models", "llw2d.jl")
 include(llw2d_src)
 using .LLW2d
-observation_file = joinpath(dirname(pathof(ParticleDA)), "..", "extra", "weak_scaling", "test_observations.h5")
-parameters_file = joinpath(dirname(pathof(ParticleDA)), "..", "extra", "weak_scaling", "parametersW1.yaml")
-output_file = joinpath(dirname(pathof(ParticleDA)), "..", "extra", "weak_scaling", "llw2d_filtering.h5")
+observation_file = "test_observations.h5"
+parameters_file = "parametersW1.yaml"
+output_file = "llw2d_filtering.h5"
 #filter_type = OptimalFilter
 filter_type = BootstrapFilter
 summary_stat_type = NaiveMeanSummaryStat
@@ -48,7 +48,7 @@ open(parameters_file, "w") do io
     YAML.write(io, parameters)
 end
 
-println("Optimized copy states enabled: ", parameters["filter"]["optimize_copy_states"])
+println("Optimized resampling enabled: ", parameters["filter"]["optimize_resampling"])
 
 final_states, final_statistics = run_particle_filter(
   LLW2d.init, parameters_file, observation_file, filter_type, summary_stat_type
diff --git a/src/ParticleDA.jl b/src/ParticleDA.jl
@@ -271,7 +271,7 @@ function run_particle_filter(
             @timeit_debug timer "Resample" resample!(
                 filter_data.resampling_indices, filter_data.weights, rng
             )
-            if filter_params.optimize_copy_states
+            if filter_params.optimize_resampling
                 # Optimize resampling indices to minimize data movement when copying states
                 @timeit_debug timer "Optimize Resample" filter_data.resampling_indices .= optimized_resample!(
                     filter_data.resampling_indices, my_size
@@ -293,8 +293,7 @@ function run_particle_filter(
             filter_data.resampling_indices,
             my_rank,
             nprt_per_rank,
-            timer,
-            filter_params.optimize_copy_states
+            timer
         )
                                                       
         if filter_params.verbose
diff --git a/src/params.jl b/src/params.jl
@@ -17,8 +17,8 @@ Parameters for ParticleDA run. Keyword arguments:
    the scheduler to balance load across threads but potentially increase overheads.
    If simulation of the model being filtered use multiple threads then it may be 
    beneficial to set the `n_tasks = 1` to avoid too much contention between threads.
-* `optimize_copy_states::Bool`: Flag to control whether to use optimized copy_states
-   function that reduces the number of messages sent during resampling.
+* `optimize_resampling::Bool`: Flag to control whether to optimize resampling indices
+   to minimize data movement when copying states between MPI ranks.
 """
 Base.@kwdef struct FilterParameters{V<:Union{AbstractSet, AbstractVector}}
     master_rank::Int = 0
@@ -29,7 +29,7 @@ Base.@kwdef struct FilterParameters{V<:Union{AbstractSet, AbstractVector}}
     particle_save_time_indices::V = []
     seed::Union{Nothing, Int} = nothing
     n_tasks::Int = -1
-    optimize_copy_states::Bool = false
+    optimize_resampling::Bool = true
 end
 
 
diff --git a/src/utils.jl b/src/utils.jl
@@ -83,73 +83,6 @@ function init_states(model, nprt_per_rank::Int, n_tasks::Int, rng::AbstractRNG)
 end
 
 function copy_states!(
-    particles::AbstractMatrix{T},
-    buffer::AbstractMatrix{T},
-    resampling_indices::Vector{Int},
-    my_rank::Int,
-    nprt_per_rank::Int,
-    to::TimerOutputs.TimerOutput = TimerOutputs.TimerOutput(),
-    dedup::Bool = false
-) where T
-
-    if dedup
-        return copy_states_dedup!(particles, buffer, resampling_indices, my_rank, nprt_per_rank, to)
-    end
-
-    # These are the particle indices stored on this rank
-    particles_have = my_rank * nprt_per_rank + 1:(my_rank + 1) * nprt_per_rank
-
-    # These are the particle indices this rank should have after resampling
-    particles_want = resampling_indices[particles_have]
-
-    # These are the ranks that have the particles this rank should have
-    rank_has = floor.(Int, (particles_want .- 1) / nprt_per_rank)
-
-    # We could work out how many sends and receives we have to do and allocate
-    # this appropriately but, lazy
-    reqs = Vector{MPI.Request}(undef, 0)
-
-    # Send particles to processes that want them
-    @timeit_debug to "send loop" begin
-        for (k,id) in enumerate(resampling_indices)
-            rank_wants = floor(Int, (k - 1) / nprt_per_rank)
-            if id in particles_have && rank_wants != my_rank
-                local_id = id - my_rank * nprt_per_rank
-                req = MPI.Isend(view(particles, :, local_id), rank_wants, id, MPI.COMM_WORLD)
-                push!(reqs, req)
-            end
-        end
-    end
-
-    # Receive particles this rank wants from ranks that have them
-    # If I already have them, just do a local copy
-    # Receive into a buffer so we dont accidentally overwrite stuff
-    @timeit_debug to "receive loop" begin
-        for (k,proc,id) in zip(1:nprt_per_rank, rank_has, particles_want)
-            if proc == my_rank
-                @timeit_debug to "local copy" begin
-                    local_id = id - my_rank * nprt_per_rank
-                    buffer[:, k] .= view(particles, :, local_id)
-                end
-            else
-                @timeit_debug to "remote receive" begin
-                    req = MPI.Irecv!(view(buffer, :, k), proc, id, MPI.COMM_WORLD)
-                    push!(reqs,req)
-                end
-            end
-        end
-    end
-
-    # Wait for all comms to complete
-    @timeit_debug to "waitall phase" MPI.Waitall(reqs)
-
-    @timeit_debug to "buffer write-back" particles .= buffer
-
-end
-
-# An optimized version of copy_states that minimizes the number of messages sent
-# by deduplicating particles that need to be sent between ranks.
-function copy_states_dedup!(
     particles::AbstractMatrix{T},
     buffer::AbstractMatrix{T},
     resampling_indices::Vector{Int},
@@ -243,7 +176,7 @@ function _determine_sends(resampling_indices::Vector{Int}, my_rank::Int, nprt_pe
     return sends_to
 end
 
-function _categorize_wants(particles_want, my_rank::Int, nprt_per_rank::Int)
+function _categorize_wants(particles_want::Vector{Int}, my_rank::Int, nprt_per_rank::Int)
     local_copies = Dict{Int, Vector{Int}}()
     remote_copies = Dict{Int, Vector{Int}}()
 
diff --git a/test/mpi_optimized_copy_states.jl b/test/mpi_optimized_copy_states.jl
@@ -124,8 +124,7 @@ for (trial_name, indices_func) in trial_sets
                 indices, 
                 my_rank, 
                 n_particle_per_rank,
-                timer,
-                !no_dedup
+                timer
             )
         end
     end

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+[deps]`
	`2`	`+ParticleDA = "61cd1fb4-f4c4-4bc8-80c6-ea5639a6ca2e"`