Skip to content

Commit 26c98ba

Browse files
authored
Make benchmarks run on the GPU (#77)
* Make WCSPH benchmark run on the GPU * Add FP32 WCSPH benchmark * Add GPU benchmark of N-Body as well * Fix typo
1 parent bc64b76 commit 26c98ba

File tree

2 files changed

+80
-10
lines changed

2 files changed

+80
-10
lines changed

benchmarks/n_body.jl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,15 @@ This is a more realistic benchmark for particle-based simulations than
1212
However, due to the higher computational cost, differences between neighborhood search
1313
implementations are less pronounced.
1414
"""
15-
function benchmark_n_body(neighborhood_search, coordinates; parallel = true)
16-
mass = 1e10 * (rand(size(coordinates, 2)) .+ 1)
15+
function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
16+
# Passing a different backend like `CUDA.CUDABackend`
17+
# allows us to change the type of the array to run the benchmark on the GPU.
18+
# Passing `parallel = true` or `parallel = false` will not change anything here.
19+
coordinates = PointNeighbors.Adapt.adapt(parallel, coordinates_)
20+
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
21+
22+
# This preserves the data type of `coordinates`, which makes it work for GPU types
23+
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
1724
G = 6.6743e-11
1825

1926
dv = similar(coordinates)
@@ -36,6 +43,5 @@ function benchmark_n_body(neighborhood_search, coordinates; parallel = true)
3643
return dv
3744
end
3845

39-
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G,
40-
$neighborhood_search, $parallel)
46+
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs, $parallel)
4147
end

benchmarks/smoothed_particle_hydrodynamics.jl

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,80 @@ function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
3434
smoothing_length, viscosity = viscosity,
3535
density_diffusion = density_diffusion)
3636

37-
v = vcat(fluid.velocity, fluid.density')
38-
u = copy(fluid.coordinates)
37+
# Note that we cannot just disable parallelism in TrixiParticles.
38+
# But passing a different backend like `CUDA.CUDABackend`
39+
# allows us to change the type of the array to run the benchmark on the GPU.
40+
if parallel isa Bool
41+
system = fluid_system
42+
nhs = neighborhood_search
43+
else
44+
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
45+
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
46+
end
47+
48+
v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
49+
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
3950
dv = zero(v)
4051

4152
# Initialize the system
42-
TrixiParticles.initialize!(fluid_system, neighborhood_search)
43-
TrixiParticles.compute_pressure!(fluid_system, v)
53+
TrixiParticles.initialize!(system, nhs)
54+
TrixiParticles.compute_pressure!(system, v)
4455

45-
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $neighborhood_search,
46-
$fluid_system, $fluid_system)
56+
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $nhs, $system, $system)
57+
end
58+
59+
"""
60+
benchmark_wcsph_fp32(neighborhood_search, coordinates; parallel = true)
61+
62+
Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
63+
"""
64+
function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true)
65+
coordinates = convert(Matrix{Float32}, coordinates_)
66+
density = 1000.0f0
67+
fluid = InitialCondition(; coordinates, density, mass = 0.1f0)
68+
69+
# Compact support == smoothing length for the Wendland kernel
70+
smoothing_length = convert(Float32, PointNeighbors.search_radius(neighborhood_search))
71+
if ndims(neighborhood_search) == 1
72+
smoothing_kernel = SchoenbergCubicSplineKernel{1}()
73+
else
74+
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
75+
end
76+
77+
sound_speed = 10.0f0
78+
state_equation = StateEquationCole(; sound_speed, reference_density = density,
79+
exponent = 1)
80+
81+
fluid_density_calculator = ContinuityDensity()
82+
viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0)
83+
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0)
84+
85+
fluid_system = WeaklyCompressibleSPHSystem(fluid, fluid_density_calculator,
86+
state_equation, smoothing_kernel,
87+
smoothing_length, viscosity = viscosity,
88+
acceleration = (0.0f0, 0.0f0, 0.0f0),
89+
density_diffusion = density_diffusion)
90+
91+
# Note that we cannot just disable parallelism in TrixiParticles.
92+
# But passing a different backend like `CUDA.CUDABackend`
93+
# allows us to change the type of the array to run the benchmark on the GPU.
94+
if parallel isa Bool
95+
system = fluid_system
96+
nhs = neighborhood_search
97+
else
98+
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
99+
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
100+
end
101+
102+
v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
103+
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
104+
dv = zero(v)
105+
106+
# Initialize the system
107+
TrixiParticles.initialize!(system, nhs)
108+
TrixiParticles.compute_pressure!(system, v)
109+
110+
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $nhs, $system, $system)
47111
end
48112

49113
"""

0 commit comments

Comments
 (0)