Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
LightXML = "9c8b4983-aa76-5018-a973-4c85ecc9e179"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PointNeighbors = "1c4d5385-0a27-49de-8e2c-43b175c8985c"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Expand Down
96 changes: 96 additions & 0 deletions benchmark.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
using Peridynamics

RESPATH_ROOT::String = length(ARGS) ≥ 1 ? ARGS[1] : joinpath("results")
RESPATH::String = joinpath(RESPATH_ROOT, "mode_I_perf_test")

function get_job(N::Int)
l, Δx, δ, a = 1.0, 1/N, 3.015/N, 0.5
pos, vol = uniform_box(l, l, 0.1l, Δx)
ids = sortperm(pos[2,:])
mat = BBMaterial()
b = Body(mat, pos[:, ids], vol[ids])
material!(b; horizon=3.015Δx, E=2.1e5, rho=8e-6, Gc=2.7)
point_set!(p -> p[1] ≤ -l/2+a && 0 ≤ p[2] ≤ 2δ, b, :set_a)
point_set!(p -> p[1] ≤ -l/2+a && -2δ ≤ p[2] < 0, b, :set_b)
precrack!(b, :set_a, :set_b)
point_set!(p -> p[2] > l/2-Δx, b, :set_top)
point_set!(p -> p[2] < -l/2+Δx, b, :set_bottom)
velocity_bc!(t -> -30, b, :set_bottom, :y)
velocity_bc!(t -> 30, b, :set_top, :y)
vv = VelocityVerlet(steps=2000)
job = Job(b, vv; path=RESPATH)
return job
end

function main()
# @rootdo println("-- compilation --")
# job = get_job(40)
# @mpitime submit(job)

@rootdo println("-- work --")
job = get_job(40)
@mpitime submit(job)
return nothing
end

using BenchmarkTools, PointNeighbors, Plots

function benchmark_peridynamics(neighborhood_search, job)
point_decomp = Peridynamics.PointDecomposition(job.spatial_setup, 1)
tdh = Peridynamics.ThreadsDataHandler(job.spatial_setup, job.time_solver, point_decomp)

return @belapsed Peridynamics.calc_force_density!($(tdh.chunks[1]), $neighborhood_search);
end

function plot_benchmarks(N_start, iterations; title = "")
neighborhood_searches_names = [
"Original Code";;
"GridNeighborhoodSearch";;
"NeighborListsNeighborhoodSearch";;
"NeighborListsNHS contiguous"
]

# Multiply number of points in each iteration (roughly) by this factor
scaling_factor = 4
per_dimension_factor = scaling_factor^(1 / 3)
N = [round(Int, N_start * per_dimension_factor^(iter - 1))
for iter in 1:iterations]
n_points = zero(N)

times = zeros(iterations, length(neighborhood_searches_names))

for iter in 1:iterations
job = get_job(N[iter])
n_points[iter] = job.spatial_setup.n_points
search_radius = Peridynamics.maximum_horizon(job.spatial_setup)

neighborhood_searches = [
nothing,
GridNeighborhoodSearch{3}(search_radius, job.spatial_setup.n_points),
NeighborListsNeighborhoodSearch{3}(search_radius, job.spatial_setup.n_points,
backend = Vector{Vector{Int}}),
NeighborListsNeighborhoodSearch{3}(search_radius, job.spatial_setup.n_points),
]

for i in eachindex(neighborhood_searches)
neighborhood_search = neighborhood_searches[i]
if !isnothing(neighborhood_search)
initialize!(neighborhood_search,
job.spatial_setup.position, job.spatial_setup.position)
end

time = benchmark_peridynamics(neighborhood_search, job)
times[iter, i] = time
time_string = BenchmarkTools.prettytime(time * 1e9)
println("$(neighborhood_searches_names[i])")
println("with $(n_points[iter]) points finished in $time_string\n")
end
end

plot(n_points, times,
xaxis = :log, yaxis = :log,
xticks = (n_points, n_points),
xlabel = "#particles", ylabel = "Runtime [s]",
legend = :outerright, size = (750, 400), dpi = 200,
label = neighborhood_searches_names, title = title)
end
50 changes: 49 additions & 1 deletion src/physics/force_density.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,56 @@ function _calc_force_density!(storage::AbstractStorage, system::AbstractSystem,
params::AbstractPointParameters, each_point_idx)
storage.b_int .= 0
storage.n_active_bonds .= 0
for point_id in each_point_idx
Threads.@threads :static for point_id in each_point_idx
Copy link
Author

@efaulhaber efaulhaber Jun 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just wanted to ignore the chunks and only have one chunk, over which I run a threaded loop. The performance is probably similar, I just wanted to have more threads and therefore less cache per thread, and this was easier than looking at the chunks stuff.

force_density_point!(storage, system, mat, params, point_id)
end
return nothing
end

function calc_force_density!(b::AbstractBodyChunk, nhs)
_calc_force_density!(b.storage, b.system, b.mat, b.paramsetup, each_point_idx(b), nhs)
return nothing
end

function calc_force_density!(b::AbstractBodyChunk, nhs::Nothing)
# Call original function
_calc_force_density!(b.storage, b.system, b.mat, b.paramsetup, each_point_idx(b))
return nothing
end

function _calc_force_density!(storage::AbstractStorage, system::AbstractSystem,
mat::AbstractMaterial,
params::AbstractPointParameters, each_point_idx, nhs)
storage.b_int .= 0
storage.n_active_bonds .= 0

# for_particle_neighbor(storage.position, storage.position, nhs,
# particles=each_point_idx, parallel=false) do i, j, _, L
Threads.@threads :static for point_id in each_point_idx
foreach_neighbor(storage.position, storage.position, nhs, point_id) do i, j, _, L
# current bond length
Δxijx = storage.position[1, j] - storage.position[1, i]
Δxijy = storage.position[2, j] - storage.position[2, i]
Δxijz = storage.position[3, j] - storage.position[3, i]
l = sqrt(Δxijx * Δxijx + Δxijy * Δxijy + Δxijz * Δxijz)

# bond strain
ε = (l - L) / L

# failure mechanism
# if ε > params.εc && bond.fail_permit
# storage.bond_active[bond_id] = false
# end
# storage.n_active_bonds[i] += storage.bond_active[bond_id]

# update of force density
scfactor = 1
bond_fail = true
temp = bond_fail * scfactor * params.bc * ε / l * system.volume[j]
storage.b_int[1, i] += temp * Δxijx
storage.b_int[2, i] += temp * Δxijy
storage.b_int[3, i] += temp * Δxijz
end
end
return nothing
end
2 changes: 1 addition & 1 deletion src/time_solvers/velocity_verlet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ function solve_timestep!(dh::AbstractThreadsDataHandler, options::AbstractOption
apply_bcs!(chunk, t)
update_disp_and_pos!(chunk, Δt)
end
@threads :static for chunk_id in eachindex(dh.chunks)
for chunk_id in eachindex(dh.chunks)
exchange_loc_to_halo!(dh, chunk_id)
calc_force_density!(dh.chunks[chunk_id])
end
Expand Down