From 47efaf34930da7a61fca5464bcc22f246ec13b0f Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 27 Oct 2025 11:37:36 +0100 Subject: [PATCH 1/9] Use contiguous memory layout with PrecomputedNeighborhoodSearch --- benchmarks/n_body.jl | 4 +- benchmarks/smoothed_particle_hydrodynamics.jl | 6 +- src/cell_lists/full_grid.jl | 3 +- src/gpu.jl | 15 +- src/neighborhood_search.jl | 8 + src/nhs_precomputed.jl | 138 +++++++++++++----- test/neighborhood_search.jl | 10 ++ 7 files changed, 141 insertions(+), 43 deletions(-) diff --git a/benchmarks/n_body.jl b/benchmarks/n_body.jl index 3c14531a..420b5f9e 100644 --- a/benchmarks/n_body.jl +++ b/benchmarks/n_body.jl @@ -18,7 +18,9 @@ function benchmark_n_body(neighborhood_search, coordinates_; # Passing a different backend like `CUDA.CUDABackend` # allows us to change the type of the array to run the benchmark on the GPU. coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_) - nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search) + # Remove unnecessary data structures that are only used for initialization + neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_) # This preserves the data type of `coordinates`, which makes it work for GPU types mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1) diff --git a/benchmarks/smoothed_particle_hydrodynamics.jl b/benchmarks/smoothed_particle_hydrodynamics.jl index 1c3fd4ce..2afe34a5 100644 --- a/benchmarks/smoothed_particle_hydrodynamics.jl +++ b/benchmarks/smoothed_particle_hydrodynamics.jl @@ -86,7 +86,11 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e density_diffusion = density_diffusion) system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system) - nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search) + + # Remove unnecessary data structures that are only used for initialization + neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + + nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_) semi = DummySemidiscretization(nhs, parallelization_backend, true) v = PointNeighbors.Adapt.adapt(parallelization_backend, diff --git a/src/cell_lists/full_grid.jl b/src/cell_lists/full_grid.jl index 11aaf5e3..5e0d6c1b 100644 --- a/src/cell_lists/full_grid.jl +++ b/src/cell_lists/full_grid.jl @@ -21,7 +21,8 @@ See [`copy_neighborhood_search`](@ref) for more details. - `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store the actual cell lists. Can be - `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient. - - `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits. + - `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits + and GPU compatible. - `max_points_per_cell = 100`: Maximum number of points per cell. This will be used to allocate the `DynamicVectorOfVectors`. It is not used with the `Vector{Vector{Int32}}` backend. diff --git a/src/gpu.jl b/src/gpu.jl index a6bb29b9..99b321cc 100644 --- a/src/gpu.jl +++ b/src/gpu.jl @@ -9,6 +9,7 @@ # `Adapt.@adapt_structure` automatically generates the `adapt` function for our custom types. Adapt.@adapt_structure FullGridCellList Adapt.@adapt_structure DynamicVectorOfVectors +Adapt.@adapt_structure GridNeighborhoodSearch # `adapt(CuArray, ::SVector)::SVector`, but `adapt(Array, ::SVector)::Vector`. # We don't want to change the type of the `SVector` here. @@ -22,14 +23,14 @@ function Adapt.adapt_structure(to::typeof(Array), range::UnitRange) return range end -function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch) - (; search_radius, periodic_box, n_cells, cell_size) = nhs +function Adapt.adapt_structure(to, nhs::PrecomputedNeighborhoodSearch) + neighbor_lists = Adapt.adapt_structure(to, nhs.neighbor_lists) + search_radius = Adapt.adapt_structure(to, nhs.search_radius) + periodic_box = Adapt.adapt_structure(to, nhs.periodic_box) + neighborhood_search = Adapt.adapt_structure(to, nhs.neighborhood_search) - cell_list = Adapt.adapt_structure(to, nhs.cell_list) - update_buffer = Adapt.adapt_structure(to, nhs.update_buffer) - - return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells, - cell_size, update_buffer, nhs.update_strategy) + return PrecomputedNeighborhoodSearch{ndims(nhs)}(neighbor_lists, search_radius, + periodic_box, neighborhood_search) end function Adapt.adapt_structure(to, cell_list::SpatialHashingCellList{NDIMS}) where {NDIMS} diff --git a/src/neighborhood_search.jl b/src/neighborhood_search.jl index b3ba12ca..26411509 100644 --- a/src/neighborhood_search.jl +++ b/src/neighborhood_search.jl @@ -108,6 +108,14 @@ GridNeighborhoodSearch{2, Float64, ...}(...) return nothing end +@inline function freeze_neighborhood_search(search::AbstractNeighborhoodSearch) + # Indicate that the neighborhood search is static and will not be updated anymore. + # Some implementations might use this to strip unnecessary data structures for updating. + # Notably, this is used for the `PrecomputedNeighborhoodSearch` to strip a potentially + # not GPU-compatible inner neighborhood search that is used only for initialization. + return search +end + """ PeriodicBox(; min_corner, max_corner) diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index 69749f1c..f470c8cb 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -22,71 +22,102 @@ initialization and update. [`PeriodicBox`](@ref). - `update_strategy`: Strategy to parallelize `update!` of the internally used `GridNeighborhoodSearch`. See [`GridNeighborhoodSearch`](@ref) - for available options. + for available options. This is only used for the default value + of `update_neighborhood_search` below. +- `update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(; periodic_box, update_strategy)`: + The neighborhood search used to compute the neighbor lists. + By default, a [`GridNeighborhoodSearch`](@ref) is used. +- `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store + the neighbor lists. Can be + - `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient. + - `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits + and GPU-compatible. +- `max_neighbors`: Maximum number of neighbors per particle. This will be used to + allocate the `DynamicVectorOfVectors`. It is not used with + other backends. The default is 64 in 2D and 324 in 3D. """ -struct PrecomputedNeighborhoodSearch{NDIMS, NHS, NL, PB} <: AbstractNeighborhoodSearch - neighborhood_search :: NHS +struct PrecomputedNeighborhoodSearch{NDIMS, NL, ELTYPE, PB, NHS} <: + AbstractNeighborhoodSearch neighbor_lists :: NL + search_radius :: ELTYPE periodic_box :: PB + neighborhood_search :: NHS - function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0, - periodic_box = nothing, - update_strategy = nothing) where {NDIMS} - nhs = GridNeighborhoodSearch{NDIMS}(; search_radius, n_points, - periodic_box, update_strategy) - - neighbor_lists = Vector{Vector{Int}}() - - new{NDIMS, typeof(nhs), - typeof(neighbor_lists), - typeof(periodic_box)}(nhs, neighbor_lists, periodic_box) + function PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, + periodic_box, + update_neighborhood_search) where {NDIMS} + return new{NDIMS, typeof(neighbor_lists), + typeof(search_radius), + typeof(periodic_box), + typeof(update_neighborhood_search)}(neighbor_lists, search_radius, + periodic_box, + update_neighborhood_search) end end +function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0, + periodic_box = nothing, + update_strategy = nothing, + update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(; + search_radius, + n_points, + periodic_box, + update_strategy), + backend = DynamicVectorOfVectors{Int32}, + max_neighbors = 4 * NDIMS^4) where {NDIMS} + neighbor_lists = construct_backend(nothing, backend, n_points, max_neighbors) + + PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, + periodic_box, update_neighborhood_search) +end + @inline Base.ndims(::PrecomputedNeighborhoodSearch{NDIMS}) where {NDIMS} = NDIMS @inline requires_update(::PrecomputedNeighborhoodSearch) = (true, true) -@inline function search_radius(search::PrecomputedNeighborhoodSearch) - return search_radius(search.neighborhood_search) -end - function initialize!(search::PrecomputedNeighborhoodSearch, x::AbstractMatrix, y::AbstractMatrix; parallelization_backend = default_backend(x), eachindex_y = axes(y, 2)) (; neighborhood_search, neighbor_lists) = search + if eachindex_y != axes(y, 2) + error("this neighborhood search does not support inactive points") + end + # Initialize grid NHS - initialize!(neighborhood_search, x, y; eachindex_y, parallelization_backend) + initialize!(neighborhood_search, x, y; parallelization_backend) initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, - parallelization_backend, eachindex_y) + parallelization_backend) + + return search end -# WARNING! Experimental feature: -# By default, determine the parallelization backend from the type of `x`. -# Optionally, pass a `KernelAbstractions.Backend` to run the KernelAbstractions.jl code -# on this backend. This can be useful to run GPU kernels on the CPU by passing -# `parallelization_backend = KernelAbstractions.CPU()`, even though `x isa Array`. function update!(search::PrecomputedNeighborhoodSearch, x::AbstractMatrix, y::AbstractMatrix; points_moving = (true, true), parallelization_backend = default_backend(x), eachindex_y = axes(y, 2)) (; neighborhood_search, neighbor_lists) = search - # Update grid NHS - update!(neighborhood_search, x, y; eachindex_y, points_moving, parallelization_backend) + if eachindex_y != axes(y, 2) + error("this neighborhood search does not support inactive points") + end + + # Update the internal neighborhood search + update!(neighborhood_search, x, y; points_moving, parallelization_backend) # Skip update if both point sets are static if any(points_moving) initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, - parallelization_backend, eachindex_y) + parallelization_backend) end + + return search end function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, - parallelization_backend, eachindex_y) + parallelization_backend) # Initialize neighbor lists empty!(neighbor_lists) resize!(neighbor_lists, size(x, 2)) @@ -95,12 +126,28 @@ function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, end # Fill neighbor lists - foreach_point_neighbor(x, y, neighborhood_search; parallelization_backend, - points = eachindex_y) do point, neighbor, _, _ + foreach_point_neighbor(x, y, neighborhood_search; + parallelization_backend) do point, neighbor, _, _ push!(neighbor_lists[point], neighbor) end end +function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors, + neighborhood_search, x, y, parallelization_backend) + resize!(neighbor_lists, size(x, 2)) + + # `Base.empty!.(neighbor_lists)`, but for all backends + @threaded parallelization_backend for i in eachindex(neighbor_lists) + emptyat!(neighbor_lists, i) + end + + # Fill neighbor lists + foreach_point_neighbor(x, y, neighborhood_search; + parallelization_backend) do point, neighbor, _, _ + pushat!(neighbor_lists, point, neighbor) + end +end + @inline function foreach_neighbor(f, neighbor_system_coords, neighborhood_search::PrecomputedNeighborhoodSearch, point, point_coords, search_radius) @@ -132,8 +179,33 @@ end function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch, search_radius, n_points; eachpoint = 1:n_points) - update_strategy_ = nhs.neighborhood_search.update_strategy + update_neighborhood_search = copy_neighborhood_search(nhs.neighborhood_search, + search_radius, n_points; + eachpoint) + max_neighbors = max_inner_length(nhs.neighbor_lists, 4 * ndims(nhs)^4) return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points, periodic_box = nhs.periodic_box, - update_strategy = update_strategy_) + update_neighborhood_search, + backend = typeof(nhs.neighbor_lists), + max_neighbors) +end + +@inline function freeze_neighborhood_search(search::PrecomputedNeighborhoodSearch) + # Indicate that the neighborhood search is static and will not be updated anymore. + # For the `PrecomputedNeighborhoodSearch`, strip the inner neighborhood search, + # which is used only for initialization and updating. + return PrecomputedNeighborhoodSearch{ndims(search)}(search.neighbor_lists, + search.search_radius, + search.periodic_box, + nothing) +end + +# TODO move to `vector_of_vectors.jl` +function max_inner_length(cells::DynamicVectorOfVectors, fallback) + return size(cells.backend, 1) +end + +# Fallback when backend is a `Vector{Vector{T}}`. Only used for copying the cell list. +function max_inner_length(::Any, fallback) + return fallback end diff --git a/test/neighborhood_search.jl b/test/neighborhood_search.jl index 605f7d07..65d3ed85 100644 --- a/test/neighborhood_search.jl +++ b/test/neighborhood_search.jl @@ -59,6 +59,9 @@ backend = Vector{Vector{Int32}})), PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points, periodic_box = periodic_boxes[i]), + PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points, + periodic_box = periodic_boxes[i], + backend = Vector{Vector{Int32}}), GridNeighborhoodSearch{NDIMS}(; search_radius, n_points, periodic_box = periodic_boxes[i], cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * @@ -71,6 +74,7 @@ "`GridNeighborhoodSearch` with `FullGridCellList` with `DynamicVectorOfVectors`", "`GridNeighborhoodSearch` with `FullGridCellList` with `Vector{Vector}`", "`PrecomputedNeighborhoodSearch`", + "`PrecomputedNeighborhoodSearch` with `Vector{Vector}`", "`GridNeighborhoodSearch` with `SpatialHashingCellList`" ] @@ -86,6 +90,8 @@ max_corner = periodic_boxes[i].max_corner, backend = Vector{Vector{Int32}})), PrecomputedNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i]), + PrecomputedNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i], + backend = Vector{Vector{Int32}}), GridNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i], cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * n_points)) @@ -193,6 +199,8 @@ search_radius, backend = Vector{Vector{Int}})), PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points), + PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points, + backend = Vector{Vector{Int}}), GridNeighborhoodSearch{NDIMS}(; search_radius, n_points, cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * n_points)), @@ -211,6 +219,7 @@ "`GridNeighborhoodSearch` with `FullGridCellList` with `DynamicVectorOfVectors` and `SemiParallelUpdate`", "`GridNeighborhoodSearch` with `FullGridCellList` with `Vector{Vector}`", "`PrecomputedNeighborhoodSearch`", + "`PrecomputedNeighborhoodSearch` with `Vector{Vector}`", "`GridNeighborhoodSearch` with `SpatialHashingCellList` with `DynamicVectorOfVectors`", "`GridNeighborhoodSearch` with `SpatialHashingCellList` with `Vector{Vector}`" ] @@ -232,6 +241,7 @@ max_corner, backend = Vector{Vector{Int32}})), PrecomputedNeighborhoodSearch{NDIMS}(), + PrecomputedNeighborhoodSearch{NDIMS}(backend = Vector{Vector{Int32}}), GridNeighborhoodSearch{NDIMS}(cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * n_points)), GridNeighborhoodSearch{NDIMS}(cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * From 447f42bbc366048282081d00ff276156da293e00 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 27 Oct 2025 11:41:40 +0100 Subject: [PATCH 2/9] Fix --- src/nhs_precomputed.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index f470c8cb..5621fd1f 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -65,7 +65,7 @@ function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = update_strategy), backend = DynamicVectorOfVectors{Int32}, max_neighbors = 4 * NDIMS^4) where {NDIMS} - neighbor_lists = construct_backend(nothing, backend, n_points, max_neighbors) + neighbor_lists = construct_backend(backend, n_points, max_neighbors) PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, periodic_box, update_neighborhood_search) From e4d1f4b771604cd69710479746b28608c8fb9ad0 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 27 Oct 2025 12:03:27 +0100 Subject: [PATCH 3/9] Prepare PR --- benchmarks/n_body.jl | 2 ++ src/cell_lists/full_grid.jl | 3 ++- src/cell_lists/spatial_hashing.jl | 13 +++++++------ src/nhs_precomputed.jl | 16 ++++++---------- src/vector_of_vectors.jl | 9 +++++++++ 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/benchmarks/n_body.jl b/benchmarks/n_body.jl index 420b5f9e..265409f7 100644 --- a/benchmarks/n_body.jl +++ b/benchmarks/n_body.jl @@ -18,8 +18,10 @@ function benchmark_n_body(neighborhood_search, coordinates_; # Passing a different backend like `CUDA.CUDABackend` # allows us to change the type of the array to run the benchmark on the GPU. coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_) + # Remove unnecessary data structures that are only used for initialization neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_) # This preserves the data type of `coordinates`, which makes it work for GPU types diff --git a/src/cell_lists/full_grid.jl b/src/cell_lists/full_grid.jl index 5e0d6c1b..484b424b 100644 --- a/src/cell_lists/full_grid.jl +++ b/src/cell_lists/full_grid.jl @@ -187,7 +187,8 @@ function copy_cell_list(cell_list::FullGridCellList, search_radius, periodic_box return FullGridCellList(; min_corner, max_corner, search_radius, backend = typeof(cell_list.cells), - max_points_per_cell = max_points_per_cell(cell_list.cells)) + max_points_per_cell = max_inner_length(cell_list.cells.backend, + 100)) end @inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any, diff --git a/src/cell_lists/spatial_hashing.jl b/src/cell_lists/spatial_hashing.jl index 0db87130..f37c6a8d 100644 --- a/src/cell_lists/spatial_hashing.jl +++ b/src/cell_lists/spatial_hashing.jl @@ -1,12 +1,12 @@ """ - SpatialHashingCellList{NDIMS}(; list_size, + SpatialHashingCellList{NDIMS}(; list_size, backend = DynamicVectorOfVectors{Int32}, max_points_per_cell = 100) -A basic spatial hashing implementation. Similar to [`DictionaryCellList`](@ref), the domain is discretized into cells, -and the particles in each cell are stored in a hash map. The hash is computed using the spatial location of each cell, -as described by Ihmsen et al. (2011)(@cite Ihmsen2011). By using a hash map that stores entries only for non-empty cells, -the domain is effectively infinite. The size of the hash map is recommended to be approximately twice the number of particles +A basic spatial hashing implementation. Similar to [`DictionaryCellList`](@ref), the domain is discretized into cells, +and the particles in each cell are stored in a hash map. The hash is computed using the spatial location of each cell, +as described by Ihmsen et al. (2011)(@cite Ihmsen2011). By using a hash map that stores entries only for non-empty cells, +the domain is effectively infinite. The size of the hash map is recommended to be approximately twice the number of particles to balance memory consumption against the likelihood of hash collisions. # Arguments @@ -128,7 +128,8 @@ function copy_cell_list(cell_list::SpatialHashingCellList, search_radius, return SpatialHashingCellList{NDIMS}(list_size = list_size, backend = typeof(cell_list.cells), - max_points_per_cell = max_points_per_cell(cell_list.cells)) + max_points_per_cell = max_inner_length(cell_list.cells.backend, + 100)) end @inline function Base.getindex(cell_list::SpatialHashingCellList, cell::Tuple) diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index 5621fd1f..021dc94e 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -10,6 +10,9 @@ slower [`update!`](@ref). A [`GridNeighborhoodSearch`](@ref) is used internally to compute the neighbor lists during initialization and update. +When used on the GPU, use `freeze_neighborhood_search` after the initialization +to strip the internal neighborhood search, which is not needed anymore. + # Arguments - `NDIMS`: Number of dimensions. @@ -182,6 +185,9 @@ function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch, update_neighborhood_search = copy_neighborhood_search(nhs.neighborhood_search, search_radius, n_points; eachpoint) + + # For `Vector{Vector}` backend use `4 * ndims(nhs)^4` as fallback. + # This should never be used because this backend doesn't require a `max_neighbors`. max_neighbors = max_inner_length(nhs.neighbor_lists, 4 * ndims(nhs)^4) return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points, periodic_box = nhs.periodic_box, @@ -199,13 +205,3 @@ end search.periodic_box, nothing) end - -# TODO move to `vector_of_vectors.jl` -function max_inner_length(cells::DynamicVectorOfVectors, fallback) - return size(cells.backend, 1) -end - -# Fallback when backend is a `Vector{Vector{T}}`. Only used for copying the cell list. -function max_inner_length(::Any, fallback) - return fallback -end diff --git a/src/vector_of_vectors.jl b/src/vector_of_vectors.jl index b8fcd09f..9e331c50 100644 --- a/src/vector_of_vectors.jl +++ b/src/vector_of_vectors.jl @@ -161,3 +161,12 @@ end return vov end + +function max_inner_length(cells::DynamicVectorOfVectors, fallback) + return size(cells.backend, 1) +end + +# Fallback when backend is a `Vector{Vector{T}}`. Only used for copying the cell list. +function max_inner_length(::Any, fallback) + return fallback +end From e477a42c33a9f6afb1acd99a53ce0491349223c3 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 27 Oct 2025 12:08:13 +0100 Subject: [PATCH 4/9] Include precomputed NHS in GPU benchmarks --- benchmarks/run_benchmarks.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/run_benchmarks.jl b/benchmarks/run_benchmarks.jl index 54c94ad7..e3c4ff7e 100644 --- a/benchmarks/run_benchmarks.jl +++ b/benchmarks/run_benchmarks.jl @@ -149,6 +149,7 @@ end Shortcut to call [`run_benchmark`](@ref) with all GPU-compatible neighborhood search implementations: - `GridNeighborhoodSearch` with `FullGridCellList` +- `PrecomputedNeighborhoodSearch` # Arguments - `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref), @@ -178,9 +179,11 @@ function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0, cell_list = FullGridCellList(; search_radius = 0.0f0, min_corner, max_corner)) + PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0) ] - names = ["GridNeighborhoodSearch with FullGridCellList";;] + names = ["GridNeighborhoodSearch with FullGridCellList";; + "PrecomputedNeighborhoodSearch"] run_benchmark(benchmark, n_points_per_dimension, iterations, neighborhood_searches; names, kwargs...) From 3eade258de667d26bd97b2f4f83006e499563f18 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:32:32 +0100 Subject: [PATCH 5/9] Fix tests --- src/cell_lists/full_grid.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cell_lists/full_grid.jl b/src/cell_lists/full_grid.jl index 484b424b..e04ddf3a 100644 --- a/src/cell_lists/full_grid.jl +++ b/src/cell_lists/full_grid.jl @@ -187,8 +187,7 @@ function copy_cell_list(cell_list::FullGridCellList, search_radius, periodic_box return FullGridCellList(; min_corner, max_corner, search_radius, backend = typeof(cell_list.cells), - max_points_per_cell = max_inner_length(cell_list.cells.backend, - 100)) + max_points_per_cell = max_inner_length(cell_list.cells, 100)) end @inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any, From 18e25092920d8f7ab06d2bf66d0a01231f9d8d13 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:35:47 +0100 Subject: [PATCH 6/9] Reformat --- benchmarks/n_body.jl | 2 +- benchmarks/run_benchmarks.jl | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/benchmarks/n_body.jl b/benchmarks/n_body.jl index 265409f7..1fbbc552 100644 --- a/benchmarks/n_body.jl +++ b/benchmarks/n_body.jl @@ -21,7 +21,7 @@ function benchmark_n_body(neighborhood_search, coordinates_; # Remove unnecessary data structures that are only used for initialization neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search) - + nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_) # This preserves the data type of `coordinates`, which makes it work for GPU types diff --git a/benchmarks/run_benchmarks.jl b/benchmarks/run_benchmarks.jl index e3c4ff7e..ffabc805 100644 --- a/benchmarks/run_benchmarks.jl +++ b/benchmarks/run_benchmarks.jl @@ -175,12 +175,12 @@ function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs min_corner = 0.0f0 .* n_points_per_dimension max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension)) - neighborhood_searches = [ - GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0, - cell_list = FullGridCellList(; search_radius = 0.0f0, - min_corner, max_corner)) - PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0) - ] + neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0, + cell_list = FullGridCellList(; + search_radius = 0.0f0, + min_corner, + max_corner)) + PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)] names = ["GridNeighborhoodSearch with FullGridCellList";; "PrecomputedNeighborhoodSearch"] From 01507251135c2eba24afea3c4ebf8a1e5d9bfd7c Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:36:16 +0100 Subject: [PATCH 7/9] Fix more tests --- src/cell_lists/spatial_hashing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cell_lists/spatial_hashing.jl b/src/cell_lists/spatial_hashing.jl index f37c6a8d..e58a9c25 100644 --- a/src/cell_lists/spatial_hashing.jl +++ b/src/cell_lists/spatial_hashing.jl @@ -128,7 +128,7 @@ function copy_cell_list(cell_list::SpatialHashingCellList, search_radius, return SpatialHashingCellList{NDIMS}(list_size = list_size, backend = typeof(cell_list.cells), - max_points_per_cell = max_inner_length(cell_list.cells.backend, + max_points_per_cell = max_inner_length(cell_list.cells, 100)) end From 219e0fa8c652a0992e72009b6b2cf7a5bd61d0b2 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 3 Nov 2025 15:20:58 +0100 Subject: [PATCH 8/9] Fix 1D tests --- src/nhs_precomputed.jl | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index 021dc94e..481461f5 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -67,13 +67,26 @@ function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = periodic_box, update_strategy), backend = DynamicVectorOfVectors{Int32}, - max_neighbors = 4 * NDIMS^4) where {NDIMS} + max_neighbors = max_neighbors(NDIMS)) where {NDIMS} neighbor_lists = construct_backend(backend, n_points, max_neighbors) PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, periodic_box, update_neighborhood_search) end +# Default values for maximum neighbor count +function max_neighbors(NDIMS) + if NDIMS == 1 + return 32 + elseif NDIMS == 2 + return 64 + elseif NDIMS == 3 + return 320 + end + + throw(ArgumentError("`NDIMS` must be 1, 2, or 3")) +end + @inline Base.ndims(::PrecomputedNeighborhoodSearch{NDIMS}) where {NDIMS} = NDIMS @inline requires_update(::PrecomputedNeighborhoodSearch) = (true, true) @@ -186,9 +199,9 @@ function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch, search_radius, n_points; eachpoint) - # For `Vector{Vector}` backend use `4 * ndims(nhs)^4` as fallback. + # For `Vector{Vector}` backend use `max_neighbors(NDIMS)` as fallback. # This should never be used because this backend doesn't require a `max_neighbors`. - max_neighbors = max_inner_length(nhs.neighbor_lists, 4 * ndims(nhs)^4) + max_neighbors = max_inner_length(nhs.neighbor_lists, max_neighbors(ndims(nhs))) return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points, periodic_box = nhs.periodic_box, update_neighborhood_search, From dc51ce500fb0e2fe5928f54f708484dfbb6dc777 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 3 Nov 2025 15:46:56 +0100 Subject: [PATCH 9/9] Fix scoping error --- src/nhs_precomputed.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index 481461f5..148e7dc7 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -201,12 +201,12 @@ function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch, # For `Vector{Vector}` backend use `max_neighbors(NDIMS)` as fallback. # This should never be used because this backend doesn't require a `max_neighbors`. - max_neighbors = max_inner_length(nhs.neighbor_lists, max_neighbors(ndims(nhs))) + max_neighbors_ = max_inner_length(nhs.neighbor_lists, max_neighbors(ndims(nhs))) return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points, periodic_box = nhs.periodic_box, update_neighborhood_search, backend = typeof(nhs.neighbor_lists), - max_neighbors) + max_neighbors = max_neighbors_) end @inline function freeze_neighborhood_search(search::PrecomputedNeighborhoodSearch)