Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion benchmarks/n_body.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ function benchmark_n_body(neighborhood_search, coordinates_;
# Passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_)
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)

# Remove unnecessary data structures that are only used for initialization
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)

nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)

# This preserves the data type of `coordinates`, which makes it work for GPU types
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
Expand Down
5 changes: 4 additions & 1 deletion benchmarks/run_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ end
Shortcut to call [`run_benchmark`](@ref) with all GPU-compatible neighborhood search
implementations:
- `GridNeighborhoodSearch` with `FullGridCellList`
- `PrecomputedNeighborhoodSearch`

# Arguments
- `benchmark`: The benchmark function. See [`benchmark_count_neighbors`](@ref),
Expand Down Expand Up @@ -178,9 +179,11 @@ function run_benchmark_gpu(benchmark, n_points_per_dimension, iterations; kwargs
GridNeighborhoodSearch{NDIMS}(search_radius = 0.0f0,
cell_list = FullGridCellList(; search_radius = 0.0f0,
min_corner, max_corner))
PrecomputedNeighborhoodSearch{NDIMS}(search_radius = 0.0f0)
]

names = ["GridNeighborhoodSearch with FullGridCellList";;]
names = ["GridNeighborhoodSearch with FullGridCellList";;
"PrecomputedNeighborhoodSearch"]

run_benchmark(benchmark, n_points_per_dimension, iterations,
neighborhood_searches; names, kwargs...)
Expand Down
6 changes: 5 additions & 1 deletion benchmarks/smoothed_particle_hydrodynamics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e
density_diffusion = density_diffusion)

system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)

# Remove unnecessary data structures that are only used for initialization
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)

nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
semi = DummySemidiscretization(nhs, parallelization_backend, true)

v = PointNeighbors.Adapt.adapt(parallelization_backend,
Expand Down
6 changes: 4 additions & 2 deletions src/cell_lists/full_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ See [`copy_neighborhood_search`](@ref) for more details.
- `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store the actual
cell lists. Can be
- `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient.
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits.
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits
and GPU compatible.
- `max_points_per_cell = 100`: Maximum number of points per cell. This will be used to
allocate the `DynamicVectorOfVectors`. It is not used with
the `Vector{Vector{Int32}}` backend.
Expand Down Expand Up @@ -186,7 +187,8 @@ function copy_cell_list(cell_list::FullGridCellList, search_radius, periodic_box

return FullGridCellList(; min_corner, max_corner, search_radius,
backend = typeof(cell_list.cells),
max_points_per_cell = max_points_per_cell(cell_list.cells))
max_points_per_cell = max_inner_length(cell_list.cells.backend,
100))
end

@inline function check_cell_bounds(cell_list::FullGridCellList{<:DynamicVectorOfVectors{<:Any,
Expand Down
13 changes: 7 additions & 6 deletions src/cell_lists/spatial_hashing.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""
SpatialHashingCellList{NDIMS}(; list_size,
SpatialHashingCellList{NDIMS}(; list_size,
backend = DynamicVectorOfVectors{Int32},
max_points_per_cell = 100)

A basic spatial hashing implementation. Similar to [`DictionaryCellList`](@ref), the domain is discretized into cells,
and the particles in each cell are stored in a hash map. The hash is computed using the spatial location of each cell,
as described by Ihmsen et al. (2011)(@cite Ihmsen2011). By using a hash map that stores entries only for non-empty cells,
the domain is effectively infinite. The size of the hash map is recommended to be approximately twice the number of particles
A basic spatial hashing implementation. Similar to [`DictionaryCellList`](@ref), the domain is discretized into cells,
and the particles in each cell are stored in a hash map. The hash is computed using the spatial location of each cell,
as described by Ihmsen et al. (2011)(@cite Ihmsen2011). By using a hash map that stores entries only for non-empty cells,
the domain is effectively infinite. The size of the hash map is recommended to be approximately twice the number of particles
to balance memory consumption against the likelihood of hash collisions.

# Arguments
Expand Down Expand Up @@ -128,7 +128,8 @@ function copy_cell_list(cell_list::SpatialHashingCellList, search_radius,

return SpatialHashingCellList{NDIMS}(list_size = list_size,
backend = typeof(cell_list.cells),
max_points_per_cell = max_points_per_cell(cell_list.cells))
max_points_per_cell = max_inner_length(cell_list.cells.backend,
100))
end

@inline function Base.getindex(cell_list::SpatialHashingCellList, cell::Tuple)
Expand Down
15 changes: 8 additions & 7 deletions src/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# `Adapt.@adapt_structure` automatically generates the `adapt` function for our custom types.
Adapt.@adapt_structure FullGridCellList
Adapt.@adapt_structure DynamicVectorOfVectors
Adapt.@adapt_structure GridNeighborhoodSearch

# `adapt(CuArray, ::SVector)::SVector`, but `adapt(Array, ::SVector)::Vector`.
# We don't want to change the type of the `SVector` here.
Expand All @@ -22,14 +23,14 @@ function Adapt.adapt_structure(to::typeof(Array), range::UnitRange)
return range
end

function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch)
(; search_radius, periodic_box, n_cells, cell_size) = nhs
function Adapt.adapt_structure(to, nhs::PrecomputedNeighborhoodSearch)
neighbor_lists = Adapt.adapt_structure(to, nhs.neighbor_lists)
search_radius = Adapt.adapt_structure(to, nhs.search_radius)
periodic_box = Adapt.adapt_structure(to, nhs.periodic_box)
neighborhood_search = Adapt.adapt_structure(to, nhs.neighborhood_search)

cell_list = Adapt.adapt_structure(to, nhs.cell_list)
update_buffer = Adapt.adapt_structure(to, nhs.update_buffer)

return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells,
cell_size, update_buffer, nhs.update_strategy)
return PrecomputedNeighborhoodSearch{ndims(nhs)}(neighbor_lists, search_radius,
periodic_box, neighborhood_search)
end

function Adapt.adapt_structure(to, cell_list::SpatialHashingCellList{NDIMS}) where {NDIMS}
Expand Down
8 changes: 8 additions & 0 deletions src/neighborhood_search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ GridNeighborhoodSearch{2, Float64, ...}(...)
return nothing
end

@inline function freeze_neighborhood_search(search::AbstractNeighborhoodSearch)
# Indicate that the neighborhood search is static and will not be updated anymore.
# Some implementations might use this to strip unnecessary data structures for updating.
# Notably, this is used for the `PrecomputedNeighborhoodSearch` to strip a potentially
# not GPU-compatible inner neighborhood search that is used only for initialization.
return search
end

"""
PeriodicBox(; min_corner, max_corner)

Expand Down
134 changes: 101 additions & 33 deletions src/nhs_precomputed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ slower [`update!`](@ref).
A [`GridNeighborhoodSearch`](@ref) is used internally to compute the neighbor lists during
initialization and update.

When used on the GPU, use `freeze_neighborhood_search` after the initialization
to strip the internal neighborhood search, which is not needed anymore.

# Arguments
- `NDIMS`: Number of dimensions.

Expand All @@ -22,71 +25,102 @@ initialization and update.
[`PeriodicBox`](@ref).
- `update_strategy`: Strategy to parallelize `update!` of the internally used
`GridNeighborhoodSearch`. See [`GridNeighborhoodSearch`](@ref)
for available options.
for available options. This is only used for the default value
of `update_neighborhood_search` below.
- `update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(; periodic_box, update_strategy)`:
The neighborhood search used to compute the neighbor lists.
By default, a [`GridNeighborhoodSearch`](@ref) is used.
- `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store
the neighbor lists. Can be
- `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient.
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits
and GPU-compatible.
- `max_neighbors`: Maximum number of neighbors per particle. This will be used to
allocate the `DynamicVectorOfVectors`. It is not used with
other backends. The default is 64 in 2D and 324 in 3D.
"""
struct PrecomputedNeighborhoodSearch{NDIMS, NHS, NL, PB} <: AbstractNeighborhoodSearch
neighborhood_search :: NHS
struct PrecomputedNeighborhoodSearch{NDIMS, NL, ELTYPE, PB, NHS} <:
AbstractNeighborhoodSearch
neighbor_lists :: NL
search_radius :: ELTYPE
periodic_box :: PB
neighborhood_search :: NHS

function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
periodic_box = nothing,
update_strategy = nothing) where {NDIMS}
nhs = GridNeighborhoodSearch{NDIMS}(; search_radius, n_points,
periodic_box, update_strategy)

neighbor_lists = Vector{Vector{Int}}()

new{NDIMS, typeof(nhs),
typeof(neighbor_lists),
typeof(periodic_box)}(nhs, neighbor_lists, periodic_box)
function PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius,
periodic_box,
update_neighborhood_search) where {NDIMS}
return new{NDIMS, typeof(neighbor_lists),
typeof(search_radius),
typeof(periodic_box),
typeof(update_neighborhood_search)}(neighbor_lists, search_radius,
periodic_box,
update_neighborhood_search)
end
end

function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
periodic_box = nothing,
update_strategy = nothing,
update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(;
search_radius,
n_points,
periodic_box,
update_strategy),
backend = DynamicVectorOfVectors{Int32},
max_neighbors = 4 * NDIMS^4) where {NDIMS}
neighbor_lists = construct_backend(backend, n_points, max_neighbors)

PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius,
periodic_box, update_neighborhood_search)
end

@inline Base.ndims(::PrecomputedNeighborhoodSearch{NDIMS}) where {NDIMS} = NDIMS

@inline requires_update(::PrecomputedNeighborhoodSearch) = (true, true)

@inline function search_radius(search::PrecomputedNeighborhoodSearch)
return search_radius(search.neighborhood_search)
end

function initialize!(search::PrecomputedNeighborhoodSearch,
x::AbstractMatrix, y::AbstractMatrix;
parallelization_backend = default_backend(x),
eachindex_y = axes(y, 2))
(; neighborhood_search, neighbor_lists) = search

if eachindex_y != axes(y, 2)
error("this neighborhood search does not support inactive points")
end

# Initialize grid NHS
initialize!(neighborhood_search, x, y; eachindex_y, parallelization_backend)
initialize!(neighborhood_search, x, y; parallelization_backend)

initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
parallelization_backend, eachindex_y)
parallelization_backend)

return search
end

# WARNING! Experimental feature:
# By default, determine the parallelization backend from the type of `x`.
# Optionally, pass a `KernelAbstractions.Backend` to run the KernelAbstractions.jl code
# on this backend. This can be useful to run GPU kernels on the CPU by passing
# `parallelization_backend = KernelAbstractions.CPU()`, even though `x isa Array`.
function update!(search::PrecomputedNeighborhoodSearch,
x::AbstractMatrix, y::AbstractMatrix;
points_moving = (true, true), parallelization_backend = default_backend(x),
eachindex_y = axes(y, 2))
(; neighborhood_search, neighbor_lists) = search

# Update grid NHS
update!(neighborhood_search, x, y; eachindex_y, points_moving, parallelization_backend)
if eachindex_y != axes(y, 2)
error("this neighborhood search does not support inactive points")
end

# Update the internal neighborhood search
update!(neighborhood_search, x, y; points_moving, parallelization_backend)

# Skip update if both point sets are static
if any(points_moving)
initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
parallelization_backend, eachindex_y)
parallelization_backend)
end

return search
end

function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
parallelization_backend, eachindex_y)
parallelization_backend)
# Initialize neighbor lists
empty!(neighbor_lists)
resize!(neighbor_lists, size(x, 2))
Expand All @@ -95,12 +129,28 @@ function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
end

# Fill neighbor lists
foreach_point_neighbor(x, y, neighborhood_search; parallelization_backend,
points = eachindex_y) do point, neighbor, _, _
foreach_point_neighbor(x, y, neighborhood_search;
parallelization_backend) do point, neighbor, _, _
push!(neighbor_lists[point], neighbor)
end
end

function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors,
neighborhood_search, x, y, parallelization_backend)
resize!(neighbor_lists, size(x, 2))

# `Base.empty!.(neighbor_lists)`, but for all backends
@threaded parallelization_backend for i in eachindex(neighbor_lists)
emptyat!(neighbor_lists, i)
end

# Fill neighbor lists
foreach_point_neighbor(x, y, neighborhood_search;
parallelization_backend) do point, neighbor, _, _
pushat!(neighbor_lists, point, neighbor)
end
end

@inline function foreach_neighbor(f, neighbor_system_coords,
neighborhood_search::PrecomputedNeighborhoodSearch,
point, point_coords, search_radius)
Expand Down Expand Up @@ -132,8 +182,26 @@ end

function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch,
search_radius, n_points; eachpoint = 1:n_points)
update_strategy_ = nhs.neighborhood_search.update_strategy
update_neighborhood_search = copy_neighborhood_search(nhs.neighborhood_search,
search_radius, n_points;
eachpoint)

# For `Vector{Vector}` backend use `4 * ndims(nhs)^4` as fallback.
# This should never be used because this backend doesn't require a `max_neighbors`.
max_neighbors = max_inner_length(nhs.neighbor_lists, 4 * ndims(nhs)^4)
return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points,
periodic_box = nhs.periodic_box,
update_strategy = update_strategy_)
update_neighborhood_search,
backend = typeof(nhs.neighbor_lists),
max_neighbors)
end

@inline function freeze_neighborhood_search(search::PrecomputedNeighborhoodSearch)
# Indicate that the neighborhood search is static and will not be updated anymore.
# For the `PrecomputedNeighborhoodSearch`, strip the inner neighborhood search,
# which is used only for initialization and updating.
return PrecomputedNeighborhoodSearch{ndims(search)}(search.neighbor_lists,
search.search_radius,
search.periodic_box,
nothing)
end
9 changes: 9 additions & 0 deletions src/vector_of_vectors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,12 @@ end

return vov
end

function max_inner_length(cells::DynamicVectorOfVectors, fallback)
return size(cells.backend, 1)
end

# Fallback when backend is a `Vector{Vector{T}}`. Only used for copying the cell list.
function max_inner_length(::Any, fallback)
return fallback
end
Loading
Loading