Skip to content

Commit a510ff6

Browse files
committed
Improve TLSPH performance on GPUs
1 parent f314552 commit a510ff6

File tree

7 files changed

+91
-71
lines changed

7 files changed

+91
-71
lines changed

src/general/abstract_system.jl

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,26 +53,30 @@ end
5353
initialize!(system, semi) = system
5454

5555
# This should not be dispatched by system type. We always expect to get a column of `A`.
56-
@propagate_inbounds function extract_svector(A, system, i)
57-
extract_svector(A, Val(ndims(system)), i)
56+
@propagate_inbounds function extract_svector(A, system, i...)
57+
extract_svector(A, Val(ndims(system)), i...)
5858
end
5959

6060
# Return the `i`-th column of the array `A` as an `SVector`.
61-
@inline function extract_svector(A, ::Val{NDIMS}, i) where {NDIMS}
61+
@inline function extract_svector(A, ::Val{NDIMS}, i...) where {NDIMS}
6262
# Explicit bounds check, which can be removed by calling this function with `@inbounds`
63-
@boundscheck checkbounds(A, NDIMS, i)
63+
@boundscheck checkbounds(A, NDIMS, i...)
6464

6565
# Assume inbounds access now
66-
return SVector(ntuple(@inline(dim->@inbounds A[dim, i]), NDIMS))
66+
return SVector(ntuple(@inline(dim->@inbounds A[dim, i...]), NDIMS))
6767
end
6868

6969
# Return `A[:, :, i]` as an `SMatrix`.
7070
@inline function extract_smatrix(A, system, particle)
71+
@boundscheck checkbounds(A, ndims(system), ndims(system), particle)
72+
7173
# Extract the matrix elements for this particle as a tuple to pass to SMatrix
7274
return SMatrix{ndims(system),
73-
ndims(system)}(ntuple(@inline(i->A[mod(i - 1, ndims(system)) + 1,
74-
div(i - 1, ndims(system)) + 1,
75-
particle]),
75+
ndims(system)}(ntuple(@inline(i->@inbounds A[mod(i - 1,
76+
ndims(system)) + 1,
77+
div(i - 1,
78+
ndims(system)) + 1,
79+
particle]),
7680
Val(ndims(system)^2)))
7781
end
7882

@@ -86,7 +90,7 @@ end
8690
@inline current_coordinates(u, system) = u
8791

8892
# Specifically get the initial coordinates of a particle for all system types
89-
@inline function initial_coords(system, particle)
93+
@propagate_inbounds function initial_coords(system, particle)
9094
return extract_svector(initial_coordinates(system), system, particle)
9195
end
9296

@@ -102,7 +106,7 @@ end
102106
# By default, try to extract it from `v`.
103107
@inline current_velocity(v, system) = v
104108

105-
@inline function current_density(v, system::AbstractSystem, particle)
109+
@propagate_inbounds function current_density(v, system::AbstractSystem, particle)
106110
return current_density(v, system)[particle]
107111
end
108112

src/schemes/fluid/weakly_compressible_sph/system.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ end
244244

245245
system_correction(system::WeaklyCompressibleSPHSystem) = system.correction
246246

247-
@inline function current_velocity(v, system::WeaklyCompressibleSPHSystem)
247+
@propagate_inbounds function current_velocity(v, system::WeaklyCompressibleSPHSystem)
248248
return current_velocity(v, system.density_calculator, system)
249249
end
250250

@@ -254,14 +254,14 @@ end
254254
return v
255255
end
256256

257-
@inline function current_velocity(v, ::ContinuityDensity,
258-
system::WeaklyCompressibleSPHSystem)
257+
@propagate_inbounds function current_velocity(v, ::ContinuityDensity,
258+
system::WeaklyCompressibleSPHSystem)
259259
# When using `ContinuityDensity`, the velocity is stored
260260
# in the first `ndims(system)` rows of `v`.
261261
return view(v, 1:ndims(system), :)
262262
end
263263

264-
@inline function current_density(v, system::WeaklyCompressibleSPHSystem)
264+
@propagate_inbounds function current_density(v, system::WeaklyCompressibleSPHSystem)
265265
return current_density(v, system.density_calculator, system)
266266
end
267267

@@ -271,8 +271,8 @@ end
271271
return system.cache.density
272272
end
273273

274-
@inline function current_density(v, ::ContinuityDensity,
275-
system::WeaklyCompressibleSPHSystem)
274+
@propagate_inbounds function current_density(v, ::ContinuityDensity,
275+
system::WeaklyCompressibleSPHSystem)
276276
# When using `ContinuityDensity`, the density is stored in the last row of `v`
277277
return view(v, size(v, 1), :)
278278
end

src/schemes/structure/total_lagrangian_sph/penalty_force.jl

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@ end
2121
return zero(initial_pos_diff)
2222
end
2323

24-
@inline function dv_penalty_force(penalty_force::PenaltyForceGanzenmueller,
25-
particle, neighbor, initial_pos_diff, initial_distance,
26-
current_pos_diff, current_distance,
27-
system, m_a, m_b, rho_a, rho_b)
24+
@propagate_inbounds function dv_penalty_force(penalty_force::PenaltyForceGanzenmueller,
25+
particle, neighbor, initial_pos_diff,
26+
initial_distance,
27+
current_pos_diff, current_distance,
28+
system, m_a, m_b, rho_a, rho_b)
2829
volume_a = m_a / rho_a
2930
volume_b = m_b / rho_b
3031

@@ -33,15 +34,17 @@ end
3334
F_a = deformation_gradient(system, particle)
3435
F_b = deformation_gradient(system, neighbor)
3536

37+
inv_current_distance = 1 / current_distance
38+
3639
# Use the symmetry of epsilon to simplify computations
3740
eps_sum = (F_a + F_b) * initial_pos_diff - 2 * current_pos_diff
38-
delta_sum = dot(eps_sum, current_pos_diff) / current_distance
41+
delta_sum = dot(eps_sum, current_pos_diff) * inv_current_distance
3942

4043
E = young_modulus(system, particle)
4144

4245
f = (penalty_force.alpha / 2) * volume_a * volume_b *
43-
kernel_weight / initial_distance^2 * E * delta_sum * current_pos_diff /
44-
current_distance
46+
kernel_weight / initial_distance^2 * E * delta_sum * current_pos_diff *
47+
inv_current_distance
4548

4649
# Divide force by mass to obtain acceleration
4750
return f / m_a

src/schemes/structure/total_lagrangian_sph/rhs.jl

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ end
2626
points=each_integrated_particle(system)) do particle, neighbor,
2727
initial_pos_diff,
2828
initial_distance
29-
# Only consider particles with a distance > 0. See `src/general/smoothing_kernels.jl` for more details.
29+
# Only consider particles with a distance > 0.
30+
# See `src/general/smoothing_kernels.jl` for more details.
3031
initial_distance^2 < eps(initial_smoothing_length(system)^2) && return
3132

3233
rho_a = @inbounds system.material_density[particle]
@@ -38,22 +39,24 @@ end
3839
m_a = @inbounds system.mass[particle]
3940
m_b = @inbounds system.mass[neighbor]
4041

42+
# PK1 / rho^2
43+
pk1_rho2_a = @inbounds pk1_rho2(system, particle)
44+
pk1_rho2_b = @inbounds pk1_rho2(system, neighbor)
45+
4146
current_pos_diff = @inbounds current_coords(system, particle) -
4247
current_coords(system, neighbor)
43-
current_distance = norm(current_pos_diff)
48+
current_distance = sqrt(dot(current_pos_diff, current_pos_diff))
4449

45-
dv_stress = m_b *
46-
(pk1_corrected(system, particle) / rho_a^2 +
47-
pk1_corrected(system, neighbor) / rho_b^2) * grad_kernel
50+
dv_stress = m_b * (pk1_rho2_a + pk1_rho2_b) * grad_kernel
4851

49-
dv_penalty_force_ = dv_penalty_force(penalty_force, particle, neighbor,
50-
initial_pos_diff, initial_distance,
51-
current_pos_diff, current_distance,
52-
system, m_a, m_b, rho_a, rho_b)
52+
dv_penalty_force_ = @inbounds dv_penalty_force(penalty_force, particle, neighbor,
53+
initial_pos_diff, initial_distance,
54+
current_pos_diff, current_distance,
55+
system, m_a, m_b, rho_a, rho_b)
5356

54-
dv_viscosity = dv_viscosity_tlsph(system, v_system, particle, neighbor,
55-
current_pos_diff, current_distance,
56-
m_a, m_b, rho_a, rho_b, grad_kernel)
57+
dv_viscosity = @inbounds dv_viscosity_tlsph(system, v_system, particle, neighbor,
58+
current_pos_diff, current_distance,
59+
m_a, m_b, rho_a, rho_b, grad_kernel)
5760

5861
dv_particle = dv_stress + dv_penalty_force_ + dv_viscosity
5962

src/schemes/structure/total_lagrangian_sph/system.jl

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ struct TotalLagrangianSPHSystem{BM, NDIMS, ELTYPE <: Real, IC, ARRAY1D, ARRAY2D,
6868
current_coordinates :: ARRAY2D # Array{ELTYPE, 2}: [dimension, particle]
6969
mass :: ARRAY1D # Array{ELTYPE, 1}: [particle]
7070
correction_matrix :: ARRAY3D # Array{ELTYPE, 3}: [i, j, particle]
71-
pk1_corrected :: ARRAY3D # Array{ELTYPE, 3}: [i, j, particle]
71+
pk1_rho2 :: ARRAY3D # PK1 corrected divided by rho^2: [i, j, particle]
7272
deformation_grad :: ARRAY3D # Array{ELTYPE, 3}: [i, j, particle]
7373
material_density :: ARRAY1D # Array{ELTYPE, 1}: [particle]
7474
n_integrated_particles :: Int64
@@ -115,7 +115,7 @@ function TotalLagrangianSPHSystem(initial_condition, smoothing_kernel, smoothing
115115
mass = copy(initial_condition.mass)
116116
material_density = copy(initial_condition.density)
117117
correction_matrix = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
118-
pk1_corrected = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
118+
pk1_rho2 = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
119119
deformation_grad = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles)
120120

121121
n_integrated_particles = n_particles - n_clamped_particles
@@ -132,7 +132,7 @@ function TotalLagrangianSPHSystem(initial_condition, smoothing_kernel, smoothing
132132

133133
return TotalLagrangianSPHSystem(initial_condition, initial_coordinates,
134134
current_coordinates, mass, correction_matrix,
135-
pk1_corrected, deformation_grad, material_density,
135+
pk1_rho2, deformation_grad, material_density,
136136
n_integrated_particles, young_modulus, poisson_ratio,
137137
lame_lambda, lame_mu, smoothing_kernel,
138138
smoothing_length, acceleration_, boundary_model,
@@ -171,14 +171,14 @@ end
171171
return system.current_coordinates
172172
end
173173

174-
@inline function current_coords(system::TotalLagrangianSPHSystem, particle)
174+
@propagate_inbounds function current_coords(system::TotalLagrangianSPHSystem, particle)
175175
# For this system, the current coordinates are stored in the system directly,
176176
# so we don't need a `u` array. This function is only to be used in this file
177177
# when no `u` is available.
178178
current_coords(nothing, system, particle)
179179
end
180180

181-
@inline function current_velocity(v, system::TotalLagrangianSPHSystem, particle)
181+
@propagate_inbounds function current_velocity(v, system::TotalLagrangianSPHSystem, particle)
182182
if particle <= system.n_integrated_particles
183183
return extract_svector(v, system, particle)
184184
end
@@ -204,58 +204,58 @@ end
204204
error("`current_velocity(v, system)` is not implemented for `TotalLagrangianSPHSystem`")
205205
end
206206

207-
@inline function viscous_velocity(v, system::TotalLagrangianSPHSystem, particle)
207+
@propagate_inbounds function viscous_velocity(v, system::TotalLagrangianSPHSystem, particle)
208208
return extract_svector(system.boundary_model.cache.wall_velocity, system, particle)
209209
end
210210

211-
@inline function current_density(v, system::TotalLagrangianSPHSystem)
211+
@propagate_inbounds function current_density(v, system::TotalLagrangianSPHSystem)
212212
return current_density(v, system.boundary_model, system)
213213
end
214214

215215
# In fluid-structure interaction, use the "hydrodynamic pressure" of the structure particles
216216
# corresponding to the chosen boundary model.
217-
@inline function current_pressure(v, system::TotalLagrangianSPHSystem)
217+
@propagate_inbounds function current_pressure(v, system::TotalLagrangianSPHSystem)
218218
return current_pressure(v, system.boundary_model, system)
219219
end
220220

221-
@inline function hydrodynamic_mass(system::TotalLagrangianSPHSystem, particle)
221+
@propagate_inbounds function hydrodynamic_mass(system::TotalLagrangianSPHSystem, particle)
222222
return system.boundary_model.hydrodynamic_mass[particle]
223223
end
224224

225-
@inline function correction_matrix(system, particle)
225+
@propagate_inbounds function correction_matrix(system, particle)
226226
extract_smatrix(system.correction_matrix, system, particle)
227227
end
228228

229-
@inline function deformation_gradient(system, particle)
229+
@propagate_inbounds function deformation_gradient(system, particle)
230230
extract_smatrix(system.deformation_grad, system, particle)
231231
end
232-
@inline function pk1_corrected(system, particle)
233-
extract_smatrix(system.pk1_corrected, system, particle)
232+
@propagate_inbounds function pk1_rho2(system, particle)
233+
extract_smatrix(system.pk1_rho2, system, particle)
234234
end
235235

236-
function young_modulus(system::TotalLagrangianSPHSystem, particle)
236+
@propagate_inbounds function young_modulus(system::TotalLagrangianSPHSystem, particle)
237237
return young_modulus(system, system.young_modulus, particle)
238238
end
239239

240-
function young_modulus(::TotalLagrangianSPHSystem, young_modulus, particle)
240+
@inline function young_modulus(::TotalLagrangianSPHSystem, young_modulus, particle)
241241
return young_modulus
242242
end
243243

244-
function young_modulus(::TotalLagrangianSPHSystem,
245-
young_modulus::AbstractVector, particle)
244+
@propagate_inbounds function young_modulus(::TotalLagrangianSPHSystem,
245+
young_modulus::AbstractVector, particle)
246246
return young_modulus[particle]
247247
end
248248

249-
function poisson_ratio(system::TotalLagrangianSPHSystem, particle)
249+
@propagate_inbounds function poisson_ratio(system::TotalLagrangianSPHSystem, particle)
250250
return poisson_ratio(system, system.poisson_ratio, particle)
251251
end
252252

253-
function poisson_ratio(::TotalLagrangianSPHSystem, poisson_ratio, particle)
253+
@inline function poisson_ratio(::TotalLagrangianSPHSystem, poisson_ratio, particle)
254254
return poisson_ratio
255255
end
256256

257-
function poisson_ratio(::TotalLagrangianSPHSystem,
258-
poisson_ratio::AbstractVector, particle)
257+
@inline function poisson_ratio(::TotalLagrangianSPHSystem,
258+
poisson_ratio::AbstractVector, particle)
259259
return poisson_ratio[particle]
260260
end
261261

@@ -316,16 +316,18 @@ function update_boundary_interpolation!(system::TotalLagrangianSPHSystem, v, u,
316316
end
317317

318318
@inline function compute_pk1_corrected!(system, semi)
319-
(; deformation_grad) = system
319+
(; deformation_grad, pk1_rho2, material_density) = system
320320

321321
calc_deformation_grad!(deformation_grad, system, semi)
322322

323323
@threaded semi for particle in eachparticle(system)
324324
pk1_particle = pk1_stress_tensor(system, particle)
325325
pk1_particle_corrected = pk1_particle * correction_matrix(system, particle)
326+
rho2_inv = 1 / material_density[particle]^2
326327

327-
@inbounds for j in 1:ndims(system), i in 1:ndims(system)
328-
system.pk1_corrected[i, j, particle] = pk1_particle_corrected[i, j]
328+
for j in 1:ndims(system), i in 1:ndims(system)
329+
# Precompute PK1 / rho^2 to avoid repeated divisions in the interaction loop
330+
@inbounds pk1_rho2[i, j, particle] = pk1_particle_corrected[i, j] * rho2_inv
329331
end
330332
end
331333
end
@@ -446,7 +448,7 @@ end
446448
# The von-Mises stress is one form of equivalent stress, where sigma is the deviatoric stress.
447449
# See pages 32 and 123.
448450
function von_mises_stress(system)
449-
von_mises_stress_vector = zeros(eltype(system.pk1_corrected), nparticles(system))
451+
von_mises_stress_vector = zeros(eltype(system.pk1_rho2), nparticles(system))
450452

451453
@threaded default_backend(von_mises_stress_vector) for particle in
452454
each_integrated_particle(system)
@@ -463,7 +465,7 @@ end
463465
@inline function von_mises_stress(system, particle::Integer)
464466
F = deformation_gradient(system, particle)
465467
J = det(F)
466-
P = pk1_corrected(system, particle)
468+
P = pk1_rho2(system, particle) * system.material_density[particle]^2
467469
sigma = (1.0 / J) * P * F'
468470

469471
# Calculate deviatoric stress tensor
@@ -479,14 +481,14 @@ end
479481
function cauchy_stress(system::TotalLagrangianSPHSystem)
480482
NDIMS = ndims(system)
481483

482-
cauchy_stress_tensors = zeros(eltype(system.pk1_corrected), NDIMS, NDIMS,
484+
cauchy_stress_tensors = zeros(eltype(system.pk1_rho2), NDIMS, NDIMS,
483485
nparticles(system))
484486

485487
@threaded default_backend(cauchy_stress_tensors) for particle in
486488
each_integrated_particle(system)
487489
F = deformation_gradient(system, particle)
488490
J = det(F)
489-
P = pk1_corrected(system, particle)
491+
P = pk1_rho2(system, particle) * system.material_density[particle]^2
490492
sigma = (1.0 / J) * P * F'
491493
cauchy_stress_tensors[:, :, particle] = sigma
492494
end
@@ -507,7 +509,7 @@ end
507509
end
508510

509511
function system_data(system::TotalLagrangianSPHSystem, dv_ode, du_ode, v_ode, u_ode, semi)
510-
(; mass, material_density, deformation_grad, pk1_corrected, young_modulus,
512+
(; mass, material_density, deformation_grad, young_modulus,
511513
poisson_ratio, lame_lambda, lame_mu) = system
512514

513515
dv = wrap_v(dv_ode, system, semi)
@@ -518,6 +520,8 @@ function system_data(system::TotalLagrangianSPHSystem, dv_ode, du_ode, v_ode, u_
518520
initial_coordinates_ = initial_coordinates(system)
519521
velocity = [current_velocity(v, system, particle) for particle in eachparticle(system)]
520522
acceleration = system_data_acceleration(dv, system, system.clamped_particles_motion)
523+
pk1_corrected = [pk1_rho2(system, particle) * system.material_density[particle]^2
524+
for particle in eachparticle(system)]
521525

522526
return (; coordinates, initial_coordinates=initial_coordinates_, velocity, mass,
523527
material_density, deformation_grad, pk1_corrected, young_modulus, poisson_ratio,

0 commit comments

Comments
 (0)