Skip to content

Commit 45c4e03

Browse files
committed
Improve TLSPH performance on GPUs
1 parent f314552 commit 45c4e03

File tree

5 files changed

+69
-45
lines changed

5 files changed

+69
-45
lines changed

src/general/abstract_system.jl

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,26 +53,30 @@ end
5353
initialize!(system, semi) = system
5454

5555
# This should not be dispatched by system type. We always expect to get a column of `A`.
56-
@propagate_inbounds function extract_svector(A, system, i)
57-
extract_svector(A, Val(ndims(system)), i)
56+
@propagate_inbounds function extract_svector(A, system, i...)
57+
extract_svector(A, Val(ndims(system)), i...)
5858
end
5959

6060
# Return the `i`-th column of the array `A` as an `SVector`.
61-
@inline function extract_svector(A, ::Val{NDIMS}, i) where {NDIMS}
61+
@inline function extract_svector(A, ::Val{NDIMS}, i...) where {NDIMS}
6262
# Explicit bounds check, which can be removed by calling this function with `@inbounds`
63-
@boundscheck checkbounds(A, NDIMS, i)
63+
@boundscheck checkbounds(A, NDIMS, i...)
6464

6565
# Assume inbounds access now
66-
return SVector(ntuple(@inline(dim->@inbounds A[dim, i]), NDIMS))
66+
return SVector(ntuple(@inline(dim->@inbounds A[dim, i...]), NDIMS))
6767
end
6868

6969
# Return `A[:, :, i]` as an `SMatrix`.
7070
@inline function extract_smatrix(A, system, particle)
71+
@boundscheck checkbounds(A, ndims(system), ndims(system), particle)
72+
7173
# Extract the matrix elements for this particle as a tuple to pass to SMatrix
7274
return SMatrix{ndims(system),
73-
ndims(system)}(ntuple(@inline(i->A[mod(i - 1, ndims(system)) + 1,
74-
div(i - 1, ndims(system)) + 1,
75-
particle]),
75+
ndims(system)}(ntuple(@inline(i->@inbounds A[mod(i - 1,
76+
ndims(system)) + 1,
77+
div(i - 1,
78+
ndims(system)) + 1,
79+
particle]),
7680
Val(ndims(system)^2)))
7781
end
7882

@@ -86,7 +90,7 @@ end
8690
@inline current_coordinates(u, system) = u
8791

8892
# Specifically get the initial coordinates of a particle for all system types
89-
@inline function initial_coords(system, particle)
93+
@propagate_inbounds function initial_coords(system, particle)
9094
return extract_svector(initial_coordinates(system), system, particle)
9195
end
9296

@@ -102,7 +106,7 @@ end
102106
# By default, try to extract it from `v`.
103107
@inline current_velocity(v, system) = v
104108

105-
@inline function current_density(v, system::AbstractSystem, particle)
109+
@propagate_inbounds function current_density(v, system::AbstractSystem, particle)
106110
return current_density(v, system)[particle]
107111
end
108112

src/schemes/fluid/weakly_compressible_sph/system.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ end
244244

245245
system_correction(system::WeaklyCompressibleSPHSystem) = system.correction
246246

247-
@inline function current_velocity(v, system::WeaklyCompressibleSPHSystem)
247+
@propagate_inbounds function current_velocity(v, system::WeaklyCompressibleSPHSystem)
248248
return current_velocity(v, system.density_calculator, system)
249249
end
250250

@@ -254,14 +254,14 @@ end
254254
return v
255255
end
256256

257-
@inline function current_velocity(v, ::ContinuityDensity,
258-
system::WeaklyCompressibleSPHSystem)
257+
@propagate_inbounds function current_velocity(v, ::ContinuityDensity,
258+
system::WeaklyCompressibleSPHSystem)
259259
# When using `ContinuityDensity`, the velocity is stored
260260
# in the first `ndims(system)` rows of `v`.
261261
return view(v, 1:ndims(system), :)
262262
end
263263

264-
@inline function current_density(v, system::WeaklyCompressibleSPHSystem)
264+
@propagate_inbounds function current_density(v, system::WeaklyCompressibleSPHSystem)
265265
return current_density(v, system.density_calculator, system)
266266
end
267267

@@ -271,8 +271,8 @@ end
271271
return system.cache.density
272272
end
273273

274-
@inline function current_density(v, ::ContinuityDensity,
275-
system::WeaklyCompressibleSPHSystem)
274+
@propagate_inbounds function current_density(v, ::ContinuityDensity,
275+
system::WeaklyCompressibleSPHSystem)
276276
# When using `ContinuityDensity`, the density is stored in the last row of `v`
277277
return view(v, size(v, 1), :)
278278
end

src/schemes/structure/total_lagrangian_sph/penalty_force.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ end
2121
return zero(initial_pos_diff)
2222
end
2323

24-
@inline function dv_penalty_force(penalty_force::PenaltyForceGanzenmueller,
24+
@propagate_inbounds function dv_penalty_force(penalty_force::PenaltyForceGanzenmueller,
2525
particle, neighbor, initial_pos_diff, initial_distance,
2626
current_pos_diff, current_distance,
2727
system, m_a, m_b, rho_a, rho_b)
@@ -33,15 +33,17 @@ end
3333
F_a = deformation_gradient(system, particle)
3434
F_b = deformation_gradient(system, neighbor)
3535

36+
inv_current_distance = 1 / current_distance
37+
3638
# Use the symmetry of epsilon to simplify computations
3739
eps_sum = (F_a + F_b) * initial_pos_diff - 2 * current_pos_diff
38-
delta_sum = dot(eps_sum, current_pos_diff) / current_distance
40+
delta_sum = dot(eps_sum, current_pos_diff) * inv_current_distance
3941

4042
E = young_modulus(system, particle)
4143

4244
f = (penalty_force.alpha / 2) * volume_a * volume_b *
43-
kernel_weight / initial_distance^2 * E * delta_sum * current_pos_diff /
44-
current_distance
45+
kernel_weight / initial_distance^2 * E * delta_sum * current_pos_diff *
46+
inv_current_distance
4547

4648
# Divide force by mass to obtain acceleration
4749
return f / m_a

src/schemes/structure/total_lagrangian_sph/rhs.jl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ end
2626
points=each_integrated_particle(system)) do particle, neighbor,
2727
initial_pos_diff,
2828
initial_distance
29-
# Only consider particles with a distance > 0. See `src/general/smoothing_kernels.jl` for more details.
29+
# Only consider particles with a distance > 0.
30+
# See `src/general/smoothing_kernels.jl` for more details.
3031
initial_distance^2 < eps(initial_smoothing_length(system)^2) && return
3132

3233
rho_a = @inbounds system.material_density[particle]
@@ -40,18 +41,18 @@ end
4041

4142
current_pos_diff = @inbounds current_coords(system, particle) -
4243
current_coords(system, neighbor)
43-
current_distance = norm(current_pos_diff)
44+
current_distance = sqrt(dot(current_pos_diff, current_pos_diff))
4445

45-
dv_stress = m_b *
46-
(pk1_corrected(system, particle) / rho_a^2 +
47-
pk1_corrected(system, neighbor) / rho_b^2) * grad_kernel
46+
dv_stress = @inbounds m_b *
47+
(pk1_corrected(system, particle) +
48+
pk1_corrected(system, neighbor)) * grad_kernel
4849

49-
dv_penalty_force_ = dv_penalty_force(penalty_force, particle, neighbor,
50+
dv_penalty_force_ = @inbounds dv_penalty_force(penalty_force, particle, neighbor,
5051
initial_pos_diff, initial_distance,
5152
current_pos_diff, current_distance,
5253
system, m_a, m_b, rho_a, rho_b)
5354

54-
dv_viscosity = dv_viscosity_tlsph(system, v_system, particle, neighbor,
55+
dv_viscosity = @inbounds dv_viscosity_tlsph(system, v_system, particle, neighbor,
5556
current_pos_diff, current_distance,
5657
m_a, m_b, rho_a, rho_b, grad_kernel)
5758

src/schemes/structure/total_lagrangian_sph/system.jl

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,23 @@ function TotalLagrangianSPHSystem(initial_condition, smoothing_kernel, smoothing
118118
pk1_corrected = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
119119
deformation_grad = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
120120

121+
# initial_coordinates_ = copy(initial_condition.coordinates)
122+
# current_coordinates_ = copy(initial_condition.coordinates)
123+
# initial_coordinates = PermutedDimsArray(permutedims(initial_coordinates_, (2, 1)),
124+
# (2, 1))
125+
# current_coordinates = PermutedDimsArray(permutedims(current_coordinates_, (2, 1)),
126+
# (2, 1))
127+
# # mass = copy(initial_condition.mass)
128+
# # material_density = copy(initial_condition.density)
129+
# correction_matrix_ = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
130+
# correction_matrix = PermutedDimsArray(permutedims(correction_matrix_, (3, 2, 1)),
131+
# (3, 2, 1))
132+
# pk1_corrected_ = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
133+
# pk1_corrected = PermutedDimsArray(permutedims(pk1_corrected_, (3, 2, 1)), (3, 2, 1))
134+
# deformation_grad_ = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
135+
# deformation_grad = PermutedDimsArray(permutedims(deformation_grad_, (3, 2, 1)),
136+
# (3, 2, 1))
137+
121138
n_integrated_particles = n_particles - n_clamped_particles
122139

123140
lame_lambda = @. young_modulus * poisson_ratio /
@@ -171,14 +188,14 @@ end
171188
return system.current_coordinates
172189
end
173190

174-
@inline function current_coords(system::TotalLagrangianSPHSystem, particle)
191+
@propagate_inbounds function current_coords(system::TotalLagrangianSPHSystem, particle)
175192
# For this system, the current coordinates are stored in the system directly,
176193
# so we don't need a `u` array. This function is only to be used in this file
177194
# when no `u` is available.
178195
current_coords(nothing, system, particle)
179196
end
180197

181-
@inline function current_velocity(v, system::TotalLagrangianSPHSystem, particle)
198+
@propagate_inbounds function current_velocity(v, system::TotalLagrangianSPHSystem, particle)
182199
if particle <= system.n_integrated_particles
183200
return extract_svector(v, system, particle)
184201
end
@@ -204,58 +221,58 @@ end
204221
error("`current_velocity(v, system)` is not implemented for `TotalLagrangianSPHSystem`")
205222
end
206223

207-
@inline function viscous_velocity(v, system::TotalLagrangianSPHSystem, particle)
224+
@propagate_inbounds function viscous_velocity(v, system::TotalLagrangianSPHSystem, particle)
208225
return extract_svector(system.boundary_model.cache.wall_velocity, system, particle)
209226
end
210227

211-
@inline function current_density(v, system::TotalLagrangianSPHSystem)
228+
@propagate_inbounds function current_density(v, system::TotalLagrangianSPHSystem)
212229
return current_density(v, system.boundary_model, system)
213230
end
214231

215232
# In fluid-structure interaction, use the "hydrodynamic pressure" of the structure particles
216233
# corresponding to the chosen boundary model.
217-
@inline function current_pressure(v, system::TotalLagrangianSPHSystem)
234+
@propagate_inbounds function current_pressure(v, system::TotalLagrangianSPHSystem)
218235
return current_pressure(v, system.boundary_model, system)
219236
end
220237

221-
@inline function hydrodynamic_mass(system::TotalLagrangianSPHSystem, particle)
238+
@propagate_inbounds function hydrodynamic_mass(system::TotalLagrangianSPHSystem, particle)
222239
return system.boundary_model.hydrodynamic_mass[particle]
223240
end
224241

225-
@inline function correction_matrix(system, particle)
242+
@propagate_inbounds function correction_matrix(system, particle)
226243
extract_smatrix(system.correction_matrix, system, particle)
227244
end
228245

229-
@inline function deformation_gradient(system, particle)
246+
@propagate_inbounds function deformation_gradient(system, particle)
230247
extract_smatrix(system.deformation_grad, system, particle)
231248
end
232-
@inline function pk1_corrected(system, particle)
249+
@propagate_inbounds function pk1_corrected(system, particle)
233250
extract_smatrix(system.pk1_corrected, system, particle)
234251
end
235252

236-
function young_modulus(system::TotalLagrangianSPHSystem, particle)
253+
@propagate_inbounds function young_modulus(system::TotalLagrangianSPHSystem, particle)
237254
return young_modulus(system, system.young_modulus, particle)
238255
end
239256

240-
function young_modulus(::TotalLagrangianSPHSystem, young_modulus, particle)
257+
@inline function young_modulus(::TotalLagrangianSPHSystem, young_modulus, particle)
241258
return young_modulus
242259
end
243260

244-
function young_modulus(::TotalLagrangianSPHSystem,
245-
young_modulus::AbstractVector, particle)
261+
@propagate_inbounds function young_modulus(::TotalLagrangianSPHSystem,
262+
young_modulus::AbstractVector, particle)
246263
return young_modulus[particle]
247264
end
248265

249-
function poisson_ratio(system::TotalLagrangianSPHSystem, particle)
266+
@propagate_inbounds function poisson_ratio(system::TotalLagrangianSPHSystem, particle)
250267
return poisson_ratio(system, system.poisson_ratio, particle)
251268
end
252269

253-
function poisson_ratio(::TotalLagrangianSPHSystem, poisson_ratio, particle)
270+
@inline function poisson_ratio(::TotalLagrangianSPHSystem, poisson_ratio, particle)
254271
return poisson_ratio
255272
end
256273

257-
function poisson_ratio(::TotalLagrangianSPHSystem,
258-
poisson_ratio::AbstractVector, particle)
274+
@inline function poisson_ratio(::TotalLagrangianSPHSystem,
275+
poisson_ratio::AbstractVector, particle)
259276
return poisson_ratio[particle]
260277
end
261278

@@ -325,7 +342,7 @@ end
325342
pk1_particle_corrected = pk1_particle * correction_matrix(system, particle)
326343

327344
@inbounds for j in 1:ndims(system), i in 1:ndims(system)
328-
system.pk1_corrected[i, j, particle] = pk1_particle_corrected[i, j]
345+
system.pk1_corrected[i, j, particle] = pk1_particle_corrected[i, j] / system.material_density[particle]^2
329346
end
330347
end
331348
end

0 commit comments

Comments
 (0)