diff --git a/benchmarks/n_body.jl b/benchmarks/n_body.jl index 3c14531..1fbbc55 100644 --- a/benchmarks/n_body.jl +++ b/benchmarks/n_body.jl @@ -18,7 +18,11 @@ function benchmark_n_body(neighborhood_search, coordinates_; # Passing a different backend like `CUDA.CUDABackend` # allows us to change the type of the array to run the benchmark on the GPU. coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_) - nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search) + + # Remove unnecessary data structures that are only used for initialization + neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + + nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_) # This preserves the data type of `coordinates`, which makes it work for GPU types mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1) diff --git a/benchmarks/run_benchmarks.jl b/benchmarks/run_benchmarks.jl index 54c94ad..ffabc80 100644 --- a/benchmarks/run_benchmarks.jl +++ b/benchmarks/run_benchmarks.jl @@ -149,6 +149,7 @@ end Shortcut to call [`run_benchmark`](@ref) with all GPU-compatible neighborhood search implementations: - `GridNeighborhoodSearch` with `FullGridCellList` +- `PrecomputedNeighborhoodSearch` # Arguments - `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref), @@ -174,13 +175,15 @@ function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs min_corner = 0.0f0 .* n_points_per_dimension max_corner = Float32.(n_points_per_dimension ./ maximum(n_points_per_dimension)) - neighborhood_searches = [ - GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0, - cell_list = FullGridCellList(; search_radius = 0.0f0, - min_corner, max_corner)) - ] - - names = ["GridNeighborhoodSearch with FullGridCellList";;] + neighborhood_searches = [GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0, + cell_list = FullGridCellList(; + search_radius = 0.0f0, + min_corner, + max_corner)) + PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)] + + names = ["GridNeighborhoodSearch with FullGridCellList";; + "PrecomputedNeighborhoodSearch"] run_benchmark(benchmark, n_points_per_dimension, iterations, neighborhood_searches; names, kwargs...) diff --git a/benchmarks/smoothed_particle_hydrodynamics.jl b/benchmarks/smoothed_particle_hydrodynamics.jl index 1c3fd4c..2afe34a 100644 --- a/benchmarks/smoothed_particle_hydrodynamics.jl +++ b/benchmarks/smoothed_particle_hydrodynamics.jl @@ -86,7 +86,11 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e density_diffusion = density_diffusion) system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system) - nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search) + + # Remove unnecessary data structures that are only used for initialization + neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search) + + nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_) semi = DummySemidiscretization(nhs, parallelization_backend, true) v = PointNeighbors.Adapt.adapt(parallelization_backend, diff --git a/src/cell_lists/full_grid.jl b/src/cell_lists/full_grid.jl index 11aaf5e..e04ddf3 100644 --- a/src/cell_lists/full_grid.jl +++ b/src/cell_lists/full_grid.jl @@ -21,7 +21,8 @@ See [`copy_neighborhood_search`](@ref) for more details. - `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store the actual cell lists. Can be - `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient. - - `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits. + - `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits + and GPU compatible. - `max_points_per_cell = 100`: Maximum number of points per cell. This will be used to allocate the `DynamicVectorOfVectors`. It is not used with the `Vector{Vector{Int32}}` backend. @@ -186,7 +187,7 @@ function copy_cell_list(cell_list::FullGridCellList, search_radius, periodic_box return FullGridCellList(; min_corner, max_corner, search_radius, backend = typeof(cell_list.cells), - max_points_per_cell = max_points_per_cell(cell_list.cells)) + max_points_per_cell = max_inner_length(cell_list.cells, 100)) end @inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any, diff --git a/src/cell_lists/spatial_hashing.jl b/src/cell_lists/spatial_hashing.jl index 0db8713..e58a9c2 100644 --- a/src/cell_lists/spatial_hashing.jl +++ b/src/cell_lists/spatial_hashing.jl @@ -1,12 +1,12 @@ """ - SpatialHashingCellList{NDIMS}(; list_size, + SpatialHashingCellList{NDIMS}(; list_size, backend = DynamicVectorOfVectors{Int32}, max_points_per_cell = 100) -A basic spatial hashing implementation. Similar to [`DictionaryCellList`](@ref), the domain is discretized into cells, -and the particles in each cell are stored in a hash map. The hash is computed using the spatial location of each cell, -as described by Ihmsen et al. (2011)(@cite Ihmsen2011). By using a hash map that stores entries only for non-empty cells, -the domain is effectively infinite. The size of the hash map is recommended to be approximately twice the number of particles +A basic spatial hashing implementation. Similar to [`DictionaryCellList`](@ref), the domain is discretized into cells, +and the particles in each cell are stored in a hash map. The hash is computed using the spatial location of each cell, +as described by Ihmsen et al. (2011)(@cite Ihmsen2011). By using a hash map that stores entries only for non-empty cells, +the domain is effectively infinite. The size of the hash map is recommended to be approximately twice the number of particles to balance memory consumption against the likelihood of hash collisions. # Arguments @@ -128,7 +128,8 @@ function copy_cell_list(cell_list::SpatialHashingCellList, search_radius, return SpatialHashingCellList{NDIMS}(list_size = list_size, backend = typeof(cell_list.cells), - max_points_per_cell = max_points_per_cell(cell_list.cells)) + max_points_per_cell = max_inner_length(cell_list.cells, + 100)) end @inline function Base.getindex(cell_list::SpatialHashingCellList, cell::Tuple) diff --git a/src/gpu.jl b/src/gpu.jl index a6bb29b..99b321c 100644 --- a/src/gpu.jl +++ b/src/gpu.jl @@ -9,6 +9,7 @@ # `Adapt.@adapt_structure` automatically generates the `adapt` function for our custom types. Adapt.@adapt_structure FullGridCellList Adapt.@adapt_structure DynamicVectorOfVectors +Adapt.@adapt_structure GridNeighborhoodSearch # `adapt(CuArray, ::SVector)::SVector`, but `adapt(Array, ::SVector)::Vector`. # We don't want to change the type of the `SVector` here. @@ -22,14 +23,14 @@ function Adapt.adapt_structure(to::typeof(Array), range::UnitRange) return range end -function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch) - (; search_radius, periodic_box, n_cells, cell_size) = nhs +function Adapt.adapt_structure(to, nhs::PrecomputedNeighborhoodSearch) + neighbor_lists = Adapt.adapt_structure(to, nhs.neighbor_lists) + search_radius = Adapt.adapt_structure(to, nhs.search_radius) + periodic_box = Adapt.adapt_structure(to, nhs.periodic_box) + neighborhood_search = Adapt.adapt_structure(to, nhs.neighborhood_search) - cell_list = Adapt.adapt_structure(to, nhs.cell_list) - update_buffer = Adapt.adapt_structure(to, nhs.update_buffer) - - return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells, - cell_size, update_buffer, nhs.update_strategy) + return PrecomputedNeighborhoodSearch{ndims(nhs)}(neighbor_lists, search_radius, + periodic_box, neighborhood_search) end function Adapt.adapt_structure(to, cell_list::SpatialHashingCellList{NDIMS}) where {NDIMS} diff --git a/src/neighborhood_search.jl b/src/neighborhood_search.jl index b3ba12c..2641150 100644 --- a/src/neighborhood_search.jl +++ b/src/neighborhood_search.jl @@ -108,6 +108,14 @@ GridNeighborhoodSearch{2, Float64, ...}(...) return nothing end +@inline function freeze_neighborhood_search(search::AbstractNeighborhoodSearch) + # Indicate that the neighborhood search is static and will not be updated anymore. + # Some implementations might use this to strip unnecessary data structures for updating. + # Notably, this is used for the `PrecomputedNeighborhoodSearch` to strip a potentially + # not GPU-compatible inner neighborhood search that is used only for initialization. + return search +end + """ PeriodicBox(; min_corner, max_corner) diff --git a/src/nhs_precomputed.jl b/src/nhs_precomputed.jl index 69749f1..148e7dc 100644 --- a/src/nhs_precomputed.jl +++ b/src/nhs_precomputed.jl @@ -10,6 +10,9 @@ slower [`update!`](@ref). A [`GridNeighborhoodSearch`](@ref) is used internally to compute the neighbor lists during initialization and update. +When used on the GPU, use `freeze_neighborhood_search` after the initialization +to strip the internal neighborhood search, which is not needed anymore. + # Arguments - `NDIMS`: Number of dimensions. @@ -22,71 +25,115 @@ initialization and update. [`PeriodicBox`](@ref). - `update_strategy`: Strategy to parallelize `update!` of the internally used `GridNeighborhoodSearch`. See [`GridNeighborhoodSearch`](@ref) - for available options. + for available options. This is only used for the default value + of `update_neighborhood_search` below. +- `update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(; periodic_box, update_strategy)`: + The neighborhood search used to compute the neighbor lists. + By default, a [`GridNeighborhoodSearch`](@ref) is used. +- `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store + the neighbor lists. Can be + - `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient. + - `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits + and GPU-compatible. +- `max_neighbors`: Maximum number of neighbors per particle. This will be used to + allocate the `DynamicVectorOfVectors`. It is not used with + other backends. The default is 64 in 2D and 324 in 3D. """ -struct PrecomputedNeighborhoodSearch{NDIMS, NHS, NL, PB} <: AbstractNeighborhoodSearch - neighborhood_search :: NHS +struct PrecomputedNeighborhoodSearch{NDIMS, NL, ELTYPE, PB, NHS} <: + AbstractNeighborhoodSearch neighbor_lists :: NL + search_radius :: ELTYPE periodic_box :: PB + neighborhood_search :: NHS - function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0, - periodic_box = nothing, - update_strategy = nothing) where {NDIMS} - nhs = GridNeighborhoodSearch{NDIMS}(; search_radius, n_points, - periodic_box, update_strategy) + function PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, + periodic_box, + update_neighborhood_search) where {NDIMS} + return new{NDIMS, typeof(neighbor_lists), + typeof(search_radius), + typeof(periodic_box), + typeof(update_neighborhood_search)}(neighbor_lists, search_radius, + periodic_box, + update_neighborhood_search) + end +end - neighbor_lists = Vector{Vector{Int}}() +function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0, + periodic_box = nothing, + update_strategy = nothing, + update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(; + search_radius, + n_points, + periodic_box, + update_strategy), + backend = DynamicVectorOfVectors{Int32}, + max_neighbors = max_neighbors(NDIMS)) where {NDIMS} + neighbor_lists = construct_backend(backend, n_points, max_neighbors) + + PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius, + periodic_box, update_neighborhood_search) +end - new{NDIMS, typeof(nhs), - typeof(neighbor_lists), - typeof(periodic_box)}(nhs, neighbor_lists, periodic_box) +# Default values for maximum neighbor count +function max_neighbors(NDIMS) + if NDIMS == 1 + return 32 + elseif NDIMS == 2 + return 64 + elseif NDIMS == 3 + return 320 end + + throw(ArgumentError("`NDIMS` must be 1, 2, or 3")) end @inline Base.ndims(::PrecomputedNeighborhoodSearch{NDIMS}) where {NDIMS} = NDIMS @inline requires_update(::PrecomputedNeighborhoodSearch) = (true, true) -@inline function search_radius(search::PrecomputedNeighborhoodSearch) - return search_radius(search.neighborhood_search) -end - function initialize!(search::PrecomputedNeighborhoodSearch, x::AbstractMatrix, y::AbstractMatrix; parallelization_backend = default_backend(x), eachindex_y = axes(y, 2)) (; neighborhood_search, neighbor_lists) = search + if eachindex_y != axes(y, 2) + error("this neighborhood search does not support inactive points") + end + # Initialize grid NHS - initialize!(neighborhood_search, x, y; eachindex_y, parallelization_backend) + initialize!(neighborhood_search, x, y; parallelization_backend) initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, - parallelization_backend, eachindex_y) + parallelization_backend) + + return search end -# WARNING! Experimental feature: -# By default, determine the parallelization backend from the type of `x`. -# Optionally, pass a `KernelAbstractions.Backend` to run the KernelAbstractions.jl code -# on this backend. This can be useful to run GPU kernels on the CPU by passing -# `parallelization_backend = KernelAbstractions.CPU()`, even though `x isa Array`. function update!(search::PrecomputedNeighborhoodSearch, x::AbstractMatrix, y::AbstractMatrix; points_moving = (true, true), parallelization_backend = default_backend(x), eachindex_y = axes(y, 2)) (; neighborhood_search, neighbor_lists) = search - # Update grid NHS - update!(neighborhood_search, x, y; eachindex_y, points_moving, parallelization_backend) + if eachindex_y != axes(y, 2) + error("this neighborhood search does not support inactive points") + end + + # Update the internal neighborhood search + update!(neighborhood_search, x, y; points_moving, parallelization_backend) # Skip update if both point sets are static if any(points_moving) initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, - parallelization_backend, eachindex_y) + parallelization_backend) end + + return search end function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, - parallelization_backend, eachindex_y) + parallelization_backend) # Initialize neighbor lists empty!(neighbor_lists) resize!(neighbor_lists, size(x, 2)) @@ -95,12 +142,28 @@ function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y, end # Fill neighbor lists - foreach_point_neighbor(x, y, neighborhood_search; parallelization_backend, - points = eachindex_y) do point, neighbor, _, _ + foreach_point_neighbor(x, y, neighborhood_search; + parallelization_backend) do point, neighbor, _, _ push!(neighbor_lists[point], neighbor) end end +function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors, + neighborhood_search, x, y, parallelization_backend) + resize!(neighbor_lists, size(x, 2)) + + # `Base.empty!.(neighbor_lists)`, but for all backends + @threaded parallelization_backend for i in eachindex(neighbor_lists) + emptyat!(neighbor_lists, i) + end + + # Fill neighbor lists + foreach_point_neighbor(x, y, neighborhood_search; + parallelization_backend) do point, neighbor, _, _ + pushat!(neighbor_lists, point, neighbor) + end +end + @inline function foreach_neighbor(f, neighbor_system_coords, neighborhood_search::PrecomputedNeighborhoodSearch, point, point_coords, search_radius) @@ -132,8 +195,26 @@ end function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch, search_radius, n_points; eachpoint = 1:n_points) - update_strategy_ = nhs.neighborhood_search.update_strategy + update_neighborhood_search = copy_neighborhood_search(nhs.neighborhood_search, + search_radius, n_points; + eachpoint) + + # For `Vector{Vector}` backend use `max_neighbors(NDIMS)` as fallback. + # This should never be used because this backend doesn't require a `max_neighbors`. + max_neighbors_ = max_inner_length(nhs.neighbor_lists, max_neighbors(ndims(nhs))) return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points, periodic_box = nhs.periodic_box, - update_strategy = update_strategy_) + update_neighborhood_search, + backend = typeof(nhs.neighbor_lists), + max_neighbors = max_neighbors_) +end + +@inline function freeze_neighborhood_search(search::PrecomputedNeighborhoodSearch) + # Indicate that the neighborhood search is static and will not be updated anymore. + # For the `PrecomputedNeighborhoodSearch`, strip the inner neighborhood search, + # which is used only for initialization and updating. + return PrecomputedNeighborhoodSearch{ndims(search)}(search.neighbor_lists, + search.search_radius, + search.periodic_box, + nothing) end diff --git a/src/vector_of_vectors.jl b/src/vector_of_vectors.jl index b8fcd09..9e331c5 100644 --- a/src/vector_of_vectors.jl +++ b/src/vector_of_vectors.jl @@ -161,3 +161,12 @@ end return vov end + +function max_inner_length(cells::DynamicVectorOfVectors, fallback) + return size(cells.backend, 1) +end + +# Fallback when backend is a `Vector{Vector{T}}`. Only used for copying the cell list. +function max_inner_length(::Any, fallback) + return fallback +end diff --git a/test/neighborhood_search.jl b/test/neighborhood_search.jl index 605f7d0..65d3ed8 100644 --- a/test/neighborhood_search.jl +++ b/test/neighborhood_search.jl @@ -59,6 +59,9 @@ backend = Vector{Vector{Int32}})), PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points, periodic_box = periodic_boxes[i]), + PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points, + periodic_box = periodic_boxes[i], + backend = Vector{Vector{Int32}}), GridNeighborhoodSearch{NDIMS}(; search_radius, n_points, periodic_box = periodic_boxes[i], cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * @@ -71,6 +74,7 @@ "`GridNeighborhoodSearch` with `FullGridCellList` with `DynamicVectorOfVectors`", "`GridNeighborhoodSearch` with `FullGridCellList` with `Vector{Vector}`", "`PrecomputedNeighborhoodSearch`", + "`PrecomputedNeighborhoodSearch` with `Vector{Vector}`", "`GridNeighborhoodSearch` with `SpatialHashingCellList`" ] @@ -86,6 +90,8 @@ max_corner = periodic_boxes[i].max_corner, backend = Vector{Vector{Int32}})), PrecomputedNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i]), + PrecomputedNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i], + backend = Vector{Vector{Int32}}), GridNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i], cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * n_points)) @@ -193,6 +199,8 @@ search_radius, backend = Vector{Vector{Int}})), PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points), + PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points, + backend = Vector{Vector{Int}}), GridNeighborhoodSearch{NDIMS}(; search_radius, n_points, cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * n_points)), @@ -211,6 +219,7 @@ "`GridNeighborhoodSearch` with `FullGridCellList` with `DynamicVectorOfVectors` and `SemiParallelUpdate`", "`GridNeighborhoodSearch` with `FullGridCellList` with `Vector{Vector}`", "`PrecomputedNeighborhoodSearch`", + "`PrecomputedNeighborhoodSearch` with `Vector{Vector}`", "`GridNeighborhoodSearch` with `SpatialHashingCellList` with `DynamicVectorOfVectors`", "`GridNeighborhoodSearch` with `SpatialHashingCellList` with `Vector{Vector}`" ] @@ -232,6 +241,7 @@ max_corner, backend = Vector{Vector{Int32}})), PrecomputedNeighborhoodSearch{NDIMS}(), + PrecomputedNeighborhoodSearch{NDIMS}(backend = Vector{Vector{Int32}}), GridNeighborhoodSearch{NDIMS}(cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 * n_points)), GridNeighborhoodSearch{NDIMS}(cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 *