Skip to content

Commit 47efaf3

Browse files
committed
Use contiguous memory layout with PrecomputedNeighborhoodSearch
1 parent 9d93faf commit 47efaf3

File tree

7 files changed

+141
-43
lines changed

7 files changed

+141
-43
lines changed

benchmarks/n_body.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ function benchmark_n_body(neighborhood_search, coordinates_;
1818
# Passing a different backend like `CUDA.CUDABackend`
1919
# allows us to change the type of the array to run the benchmark on the GPU.
2020
coordinates = PointNeighbors.Adapt.adapt(parallelization_backend, coordinates_)
21-
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
21+
# Remove unnecessary data structures that are only used for initialization
22+
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
23+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
2224

2325
# This preserves the data type of `coordinates`, which makes it work for GPU types
2426
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)

benchmarks/smoothed_particle_hydrodynamics.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ function __benchmark_wcsph_inner(neighborhood_search, initial_condition, state_e
8686
density_diffusion = density_diffusion)
8787

8888
system = PointNeighbors.Adapt.adapt(parallelization_backend, fluid_system)
89-
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search)
89+
90+
# Remove unnecessary data structures that are only used for initialization
91+
neighborhood_search_ = PointNeighbors.freeze_neighborhood_search(neighborhood_search)
92+
93+
nhs = PointNeighbors.Adapt.adapt(parallelization_backend, neighborhood_search_)
9094
semi = DummySemidiscretization(nhs, parallelization_backend, true)
9195

9296
v = PointNeighbors.Adapt.adapt(parallelization_backend,

src/cell_lists/full_grid.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ See [`copy_neighborhood_search`](@ref) for more details.
2121
- `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store the actual
2222
cell lists. Can be
2323
- `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient.
24-
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits.
24+
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits
25+
and GPU compatible.
2526
- `max_points_per_cell = 100`: Maximum number of points per cell. This will be used to
2627
allocate the `DynamicVectorOfVectors`. It is not used with
2728
the `Vector{Vector{Int32}}` backend.

src/gpu.jl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# `Adapt.@adapt_structure` automatically generates the `adapt` function for our custom types.
1010
Adapt.@adapt_structure FullGridCellList
1111
Adapt.@adapt_structure DynamicVectorOfVectors
12+
Adapt.@adapt_structure GridNeighborhoodSearch
1213

1314
# `adapt(CuArray, ::SVector)::SVector`, but `adapt(Array, ::SVector)::Vector`.
1415
# We don't want to change the type of the `SVector` here.
@@ -22,14 +23,14 @@ function Adapt.adapt_structure(to::typeof(Array), range::UnitRange)
2223
return range
2324
end
2425

25-
function Adapt.adapt_structure(to, nhs::GridNeighborhoodSearch)
26-
(; search_radius, periodic_box, n_cells, cell_size) = nhs
26+
function Adapt.adapt_structure(to, nhs::PrecomputedNeighborhoodSearch)
27+
neighbor_lists = Adapt.adapt_structure(to, nhs.neighbor_lists)
28+
search_radius = Adapt.adapt_structure(to, nhs.search_radius)
29+
periodic_box = Adapt.adapt_structure(to, nhs.periodic_box)
30+
neighborhood_search = Adapt.adapt_structure(to, nhs.neighborhood_search)
2731

28-
cell_list = Adapt.adapt_structure(to, nhs.cell_list)
29-
update_buffer = Adapt.adapt_structure(to, nhs.update_buffer)
30-
31-
return GridNeighborhoodSearch(cell_list, search_radius, periodic_box, n_cells,
32-
cell_size, update_buffer, nhs.update_strategy)
32+
return PrecomputedNeighborhoodSearch{ndims(nhs)}(neighbor_lists, search_radius,
33+
periodic_box, neighborhood_search)
3334
end
3435

3536
function Adapt.adapt_structure(to, cell_list::SpatialHashingCellList{NDIMS}) where {NDIMS}

src/neighborhood_search.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ GridNeighborhoodSearch{2, Float64, ...}(...)
108108
return nothing
109109
end
110110

111+
@inline function freeze_neighborhood_search(search::AbstractNeighborhoodSearch)
112+
# Indicate that the neighborhood search is static and will not be updated anymore.
113+
# Some implementations might use this to strip unnecessary data structures for updating.
114+
# Notably, this is used for the `PrecomputedNeighborhoodSearch` to strip a potentially
115+
# not GPU-compatible inner neighborhood search that is used only for initialization.
116+
return search
117+
end
118+
111119
"""
112120
PeriodicBox(; min_corner, max_corner)
113121

src/nhs_precomputed.jl

Lines changed: 105 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -22,71 +22,102 @@ initialization and update.
2222
[`PeriodicBox`](@ref).
2323
- `update_strategy`: Strategy to parallelize `update!` of the internally used
2424
`GridNeighborhoodSearch`. See [`GridNeighborhoodSearch`](@ref)
25-
for available options.
25+
for available options. This is only used for the default value
26+
of `update_neighborhood_search` below.
27+
- `update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(; periodic_box, update_strategy)`:
28+
The neighborhood search used to compute the neighbor lists.
29+
By default, a [`GridNeighborhoodSearch`](@ref) is used.
30+
- `backend = DynamicVectorOfVectors{Int32}`: Type of the data structure to store
31+
the neighbor lists. Can be
32+
- `Vector{Vector{Int32}}`: Scattered memory, but very memory-efficient.
33+
- `DynamicVectorOfVectors{Int32}`: Contiguous memory, optimizing cache-hits
34+
and GPU-compatible.
35+
- `max_neighbors`: Maximum number of neighbors per particle. This will be used to
36+
allocate the `DynamicVectorOfVectors`. It is not used with
37+
other backends. The default is 64 in 2D and 324 in 3D.
2638
"""
27-
struct PrecomputedNeighborhoodSearch{NDIMS, NHS, NL, PB} <: AbstractNeighborhoodSearch
28-
neighborhood_search :: NHS
39+
struct PrecomputedNeighborhoodSearch{NDIMS, NL, ELTYPE, PB, NHS} <:
40+
AbstractNeighborhoodSearch
2941
neighbor_lists :: NL
42+
search_radius :: ELTYPE
3043
periodic_box :: PB
44+
neighborhood_search :: NHS
3145

32-
function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
33-
periodic_box = nothing,
34-
update_strategy = nothing) where {NDIMS}
35-
nhs = GridNeighborhoodSearch{NDIMS}(; search_radius, n_points,
36-
periodic_box, update_strategy)
37-
38-
neighbor_lists = Vector{Vector{Int}}()
39-
40-
new{NDIMS, typeof(nhs),
41-
typeof(neighbor_lists),
42-
typeof(periodic_box)}(nhs, neighbor_lists, periodic_box)
46+
function PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius,
47+
periodic_box,
48+
update_neighborhood_search) where {NDIMS}
49+
return new{NDIMS, typeof(neighbor_lists),
50+
typeof(search_radius),
51+
typeof(periodic_box),
52+
typeof(update_neighborhood_search)}(neighbor_lists, search_radius,
53+
periodic_box,
54+
update_neighborhood_search)
4355
end
4456
end
4557

58+
function PrecomputedNeighborhoodSearch{NDIMS}(; search_radius = 0.0, n_points = 0,
59+
periodic_box = nothing,
60+
update_strategy = nothing,
61+
update_neighborhood_search = GridNeighborhoodSearch{NDIMS}(;
62+
search_radius,
63+
n_points,
64+
periodic_box,
65+
update_strategy),
66+
backend = DynamicVectorOfVectors{Int32},
67+
max_neighbors = 4 * NDIMS^4) where {NDIMS}
68+
neighbor_lists = construct_backend(nothing, backend, n_points, max_neighbors)
69+
70+
PrecomputedNeighborhoodSearch{NDIMS}(neighbor_lists, search_radius,
71+
periodic_box, update_neighborhood_search)
72+
end
73+
4674
@inline Base.ndims(::PrecomputedNeighborhoodSearch{NDIMS}) where {NDIMS} = NDIMS
4775

4876
@inline requires_update(::PrecomputedNeighborhoodSearch) = (true, true)
4977

50-
@inline function search_radius(search::PrecomputedNeighborhoodSearch)
51-
return search_radius(search.neighborhood_search)
52-
end
53-
5478
function initialize!(search::PrecomputedNeighborhoodSearch,
5579
x::AbstractMatrix, y::AbstractMatrix;
5680
parallelization_backend = default_backend(x),
5781
eachindex_y = axes(y, 2))
5882
(; neighborhood_search, neighbor_lists) = search
5983

84+
if eachindex_y != axes(y, 2)
85+
error("this neighborhood search does not support inactive points")
86+
end
87+
6088
# Initialize grid NHS
61-
initialize!(neighborhood_search, x, y; eachindex_y, parallelization_backend)
89+
initialize!(neighborhood_search, x, y; parallelization_backend)
6290

6391
initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
64-
parallelization_backend, eachindex_y)
92+
parallelization_backend)
93+
94+
return search
6595
end
6696

67-
# WARNING! Experimental feature:
68-
# By default, determine the parallelization backend from the type of `x`.
69-
# Optionally, pass a `KernelAbstractions.Backend` to run the KernelAbstractions.jl code
70-
# on this backend. This can be useful to run GPU kernels on the CPU by passing
71-
# `parallelization_backend = KernelAbstractions.CPU()`, even though `x isa Array`.
7297
function update!(search::PrecomputedNeighborhoodSearch,
7398
x::AbstractMatrix, y::AbstractMatrix;
7499
points_moving = (true, true), parallelization_backend = default_backend(x),
75100
eachindex_y = axes(y, 2))
76101
(; neighborhood_search, neighbor_lists) = search
77102

78-
# Update grid NHS
79-
update!(neighborhood_search, x, y; eachindex_y, points_moving, parallelization_backend)
103+
if eachindex_y != axes(y, 2)
104+
error("this neighborhood search does not support inactive points")
105+
end
106+
107+
# Update the internal neighborhood search
108+
update!(neighborhood_search, x, y; points_moving, parallelization_backend)
80109

81110
# Skip update if both point sets are static
82111
if any(points_moving)
83112
initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
84-
parallelization_backend, eachindex_y)
113+
parallelization_backend)
85114
end
115+
116+
return search
86117
end
87118

88119
function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
89-
parallelization_backend, eachindex_y)
120+
parallelization_backend)
90121
# Initialize neighbor lists
91122
empty!(neighbor_lists)
92123
resize!(neighbor_lists, size(x, 2))
@@ -95,12 +126,28 @@ function initialize_neighbor_lists!(neighbor_lists, neighborhood_search, x, y,
95126
end
96127

97128
# Fill neighbor lists
98-
foreach_point_neighbor(x, y, neighborhood_search; parallelization_backend,
99-
points = eachindex_y) do point, neighbor, _, _
129+
foreach_point_neighbor(x, y, neighborhood_search;
130+
parallelization_backend) do point, neighbor, _, _
100131
push!(neighbor_lists[point], neighbor)
101132
end
102133
end
103134

135+
function initialize_neighbor_lists!(neighbor_lists::DynamicVectorOfVectors,
136+
neighborhood_search, x, y, parallelization_backend)
137+
resize!(neighbor_lists, size(x, 2))
138+
139+
# `Base.empty!.(neighbor_lists)`, but for all backends
140+
@threaded parallelization_backend for i in eachindex(neighbor_lists)
141+
emptyat!(neighbor_lists, i)
142+
end
143+
144+
# Fill neighbor lists
145+
foreach_point_neighbor(x, y, neighborhood_search;
146+
parallelization_backend) do point, neighbor, _, _
147+
pushat!(neighbor_lists, point, neighbor)
148+
end
149+
end
150+
104151
@inline function foreach_neighbor(f, neighbor_system_coords,
105152
neighborhood_search::PrecomputedNeighborhoodSearch,
106153
point, point_coords, search_radius)
@@ -132,8 +179,33 @@ end
132179

133180
function copy_neighborhood_search(nhs::PrecomputedNeighborhoodSearch,
134181
search_radius, n_points; eachpoint = 1:n_points)
135-
update_strategy_ = nhs.neighborhood_search.update_strategy
182+
update_neighborhood_search = copy_neighborhood_search(nhs.neighborhood_search,
183+
search_radius, n_points;
184+
eachpoint)
185+
max_neighbors = max_inner_length(nhs.neighbor_lists, 4 * ndims(nhs)^4)
136186
return PrecomputedNeighborhoodSearch{ndims(nhs)}(; search_radius, n_points,
137187
periodic_box = nhs.periodic_box,
138-
update_strategy = update_strategy_)
188+
update_neighborhood_search,
189+
backend = typeof(nhs.neighbor_lists),
190+
max_neighbors)
191+
end
192+
193+
@inline function freeze_neighborhood_search(search::PrecomputedNeighborhoodSearch)
194+
# Indicate that the neighborhood search is static and will not be updated anymore.
195+
# For the `PrecomputedNeighborhoodSearch`, strip the inner neighborhood search,
196+
# which is used only for initialization and updating.
197+
return PrecomputedNeighborhoodSearch{ndims(search)}(search.neighbor_lists,
198+
search.search_radius,
199+
search.periodic_box,
200+
nothing)
201+
end
202+
203+
# TODO move to `vector_of_vectors.jl`
204+
function max_inner_length(cells::DynamicVectorOfVectors, fallback)
205+
return size(cells.backend, 1)
206+
end
207+
208+
# Fallback when backend is a `Vector{Vector{T}}`. Only used for copying the cell list.
209+
function max_inner_length(::Any, fallback)
210+
return fallback
139211
end

test/neighborhood_search.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@
5959
backend = Vector{Vector{Int32}})),
6060
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points,
6161
periodic_box = periodic_boxes[i]),
62+
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points,
63+
periodic_box = periodic_boxes[i],
64+
backend = Vector{Vector{Int32}}),
6265
GridNeighborhoodSearch{NDIMS}(; search_radius, n_points,
6366
periodic_box = periodic_boxes[i],
6467
cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 *
@@ -71,6 +74,7 @@
7174
"`GridNeighborhoodSearch` with `FullGridCellList` with `DynamicVectorOfVectors`",
7275
"`GridNeighborhoodSearch` with `FullGridCellList` with `Vector{Vector}`",
7376
"`PrecomputedNeighborhoodSearch`",
77+
"`PrecomputedNeighborhoodSearch` with `Vector{Vector}`",
7478
"`GridNeighborhoodSearch` with `SpatialHashingCellList`"
7579
]
7680

@@ -86,6 +90,8 @@
8690
max_corner = periodic_boxes[i].max_corner,
8791
backend = Vector{Vector{Int32}})),
8892
PrecomputedNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i]),
93+
PrecomputedNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i],
94+
backend = Vector{Vector{Int32}}),
8995
GridNeighborhoodSearch{NDIMS}(periodic_box = periodic_boxes[i],
9096
cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 *
9197
n_points))
@@ -193,6 +199,8 @@
193199
search_radius,
194200
backend = Vector{Vector{Int}})),
195201
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points),
202+
PrecomputedNeighborhoodSearch{NDIMS}(; search_radius, n_points,
203+
backend = Vector{Vector{Int}}),
196204
GridNeighborhoodSearch{NDIMS}(; search_radius, n_points,
197205
cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 *
198206
n_points)),
@@ -211,6 +219,7 @@
211219
"`GridNeighborhoodSearch` with `FullGridCellList` with `DynamicVectorOfVectors` and `SemiParallelUpdate`",
212220
"`GridNeighborhoodSearch` with `FullGridCellList` with `Vector{Vector}`",
213221
"`PrecomputedNeighborhoodSearch`",
222+
"`PrecomputedNeighborhoodSearch` with `Vector{Vector}`",
214223
"`GridNeighborhoodSearch` with `SpatialHashingCellList` with `DynamicVectorOfVectors`",
215224
"`GridNeighborhoodSearch` with `SpatialHashingCellList` with `Vector{Vector}`"
216225
]
@@ -232,6 +241,7 @@
232241
max_corner,
233242
backend = Vector{Vector{Int32}})),
234243
PrecomputedNeighborhoodSearch{NDIMS}(),
244+
PrecomputedNeighborhoodSearch{NDIMS}(backend = Vector{Vector{Int32}}),
235245
GridNeighborhoodSearch{NDIMS}(cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 *
236246
n_points)),
237247
GridNeighborhoodSearch{NDIMS}(cell_list = SpatialHashingCellList{NDIMS}(list_size = 2 *

0 commit comments

Comments
 (0)