Skip to content

Commit 000494e

Browse files
ufechner71-Bart-1
andauthored
Reduce allocation (#126)
* Reduce allocations * Use normalize! * Reduce allocations * Use \cdot * Cleanup * Replace cross with \times * Add bench2.jl * Update comments * Add P to VSMSolution and bench2.jl * Make use of panel_width_array * Now 112, 600 allocations * Now 596 allocations * Down to 590 allocations * Cleanup * Update comment * Fix tests * Fix examples * Cleanup * Down to 586 allocations * Refactor update_effective_angle_of_attack_if_VSM() * Now 424 allocations * Now 422 allocations * Now 420 allocations * Change comment * Cleanup * Add PreallocationTools * Now 408 allocations * Now 398 allocations * Refactoring * Refactoring * Refactoring * Refactoring * Cleanup * Refactoring * Refactoring * Update comment * Now 394 allocations * Refactoring * Add BaseResult * Improve LoopResult * Refactoring, use LoopResult * Refactoring * Add comment * Refactoring * Refactoring * Update docstring * Refactoring * Cleanup * Refactoring * Refactoring * Refactoring * Refactoring * Update struct BaseResult * Add minimal working example * Improve mwe_01.jl * Now 390 allocations * Refactoring * Add TODO * Now 350 allocations * Now 348 allocations * Now 346 allocations * Now 344 allocations * Now 342 allocations * Add comment * Refactoring * Refactoring * Update comment * Update comments * Refactoring * Now 340 allocations * Add comment * Now 337 allocations * Update comments * Now 328 allocations * Cleanup * Update comment * Update comment * Update comment * Add mwe_02.jl * Cleanup * Add file to .gitignore * Use Julia 1.10 to create bin files * Address remarks * Remove print statement * Add comments --------- Co-authored-by: Uwe Fechner <[email protected]> Co-authored-by: 1-Bart-1 <[email protected]>
1 parent d9667c4 commit 000494e

15 files changed

+1752
-313
lines changed

Manifest-v1.10.toml

Lines changed: 1300 additions & 0 deletions
Large diffs are not rendered by default.

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
1515
NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
1616
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
1717
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
18+
PreallocationTools = "d236fae5-4411-538c-8e31-a6e3d9e00b46"
1819
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
1920
SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
2021
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
@@ -47,6 +48,7 @@ Measures = "0.3"
4748
NonlinearSolve = "4"
4849
Parameters = "0.12"
4950
Pkg = "1"
51+
PreallocationTools = "0.4.25"
5052
Serialization = "1"
5153
SharedArrays = "1"
5254
StaticArrays = "1"

examples/bench.jl

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,18 @@ vel_app = [cos(alpha), 0.0, sin(alpha)] .* v_a
3939
set_va!(wa, vel_app)
4040

4141
# Step 4: Initialize solvers for both LLT and VSM methods
42-
llt_solver = Solver(aerodynamic_model_type=LLT)
43-
vsm_solver = Solver(aerodynamic_model_type=VSM)
42+
P = length(wa.panels)
43+
llt_solver = Solver{P}(aerodynamic_model_type=LLT)
44+
vsm_solver = Solver{P}(aerodynamic_model_type=VSM)
4445

4546
# Step 5: Solve using both methods
4647
results_vsm = solve(vsm_solver, wa)
4748
sol = solve!(vsm_solver, wa)
48-
results_vsm_base = solve_base(vsm_solver, wa)
49-
println("Rectangular wing, solve_base:")
50-
@time results_vsm_base = solve_base(vsm_solver, wa)
51-
# time Python: 32.0 ms Ryzen 7950x
52-
# time Julia: 0.6 ms Ryzen 7950x
53-
# 0.8 ms laptop, performance mode, battery
49+
results_vsm_base = solve_base!(vsm_solver, wa)
50+
println("Rectangular wing, solve_base!:")
51+
@time results_vsm_base = solve_base!(vsm_solver, wa)
52+
# time Python: 32.0 ms Ryzen 7950x
53+
# time Julia: 0.42 ms Ryzen 7950x
5454
println("Rectangular wing, solve!:")
5555
@time sol = solve!(vsm_solver, wa)
5656
println("Rectangular wing, solve:")
@@ -61,7 +61,8 @@ wing = RamAirWing("data/ram_air_kite_body.obj", "data/ram_air_kite_foil.dat")
6161
body_aero = BodyAerodynamics([wing])
6262

6363
# Create solvers
64-
vsm_solver = Solver(
64+
P = length(wa.panels)
65+
vsm_solver = Solver{P}(
6566
aerodynamic_model_type=VSM,
6667
is_with_artificial_damping=false
6768
)
@@ -81,8 +82,8 @@ set_va!(body_aero, vel_app)
8182

8283
# Solving and plotting distributions
8384
results = solve(vsm_solver, body_aero)
84-
results_base = solve_base(vsm_solver, body_aero)
85+
results_base = solve_base!(vsm_solver, body_aero)
8586
println("RAM-air kite:")
86-
@time results_base = solve_base(vsm_solver, body_aero)
87+
@time results_base = solve_base!(vsm_solver, body_aero)
8788

8889
nothing

examples/ram_air_kite.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ if DEFORM
2424
end
2525

2626
# Create solvers
27-
vsm_solver = Solver(
27+
P = length(body_aero.panels)
28+
vsm_solver = Solver{P}(
2829
aerodynamic_model_type=VSM,
2930
is_with_artificial_damping=false
3031
)

examples/rectangular_wing.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ vel_app = [cos(alpha), 0.0, sin(alpha)] .* v_a
3535
set_va!(wa, vel_app, [0, 0, 0.1])
3636

3737
# Step 4: Initialize solvers for both LLT and VSM methods
38-
llt_solver = Solver(aerodynamic_model_type=LLT)
39-
vsm_solver = Solver(aerodynamic_model_type=VSM)
38+
P = length(wa.panels)
39+
llt_solver = Solver{P}(aerodynamic_model_type=LLT)
40+
vsm_solver = Solver{P}(aerodynamic_model_type=VSM)
4041

4142
# Step 5: Solve using both methods
4243
results_llt = solve(llt_solver, wa)

examples/stall_model.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@ end
4545
body_aero_CAD_19ribs = BodyAerodynamics([CAD_wing])
4646

4747
# Create solvers
48-
vsm_solver = Solver(
48+
P = length(wa.panels)
49+
vsm_solver = Solver{P}(
4950
aerodynamic_model_type=VSM,
5051
is_with_artificial_damping=false
5152
)
52-
VSM_with_stall_correction = Solver(
53+
VSM_with_stall_correction = Solver{P}(
5354
aerodynamic_model_type=VSM,
5455
is_with_artificial_damping=true
5556
)

mwes/mwe_01.jl

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Replace va_norm_array = norm.(eachrow(solver.sol.va_array)) with a for loop
2+
3+
# Testcase that shows that the new function is equivalent to the old, allocating line of code.
4+
using Test
5+
using LinearAlgebra # for the norm function
6+
7+
# Define a mock Solver struct
8+
struct MockSolver
9+
sol::NamedTuple
10+
end
11+
12+
function calc_norm_array!(va_norm_array, va_array)
13+
for i in 1:size(va_array, 1)
14+
va_norm_array[i] = norm(view(va_array, i, :))
15+
end
16+
end
17+
18+
@testset "va_norm_array calculation" begin
19+
global va_norm_array
20+
21+
# Create a sample 2D array
22+
sample_va_array = [
23+
1.0 2.0 3.0;
24+
4.0 5.0 6.0;
25+
7.0 8.0 9.0
26+
]
27+
28+
# Create a mock solver with the sample array
29+
mock_solver = MockSolver((va_array = sample_va_array,))
30+
31+
# Calculate va_norm_array
32+
n = @allocated va_norm_array = norm.(eachrow(mock_solver.sol.va_array))
33+
println(n)
34+
35+
va_norm_array2 = zeros(3)
36+
m = @allocated calc_norm_array!(va_norm_array2, sample_va_array)
37+
println(m)
38+
39+
# Expected results (calculated manually)
40+
expected_norms = [
41+
sqrt(1^2 + 2^2 + 3^2),
42+
sqrt(4^2 + 5^2 + 6^2),
43+
sqrt(7^2 + 8^2 + 9^2)
44+
]
45+
46+
# Test the results
47+
@test length(va_norm_array) == size(sample_va_array, 1)
48+
@test va_norm_array expected_norms atol=1e-10
49+
50+
# Test individual values
51+
@test va_norm_array[1] norm(sample_va_array[1, :]) atol=1e-10
52+
@test va_norm_array[2] norm(sample_va_array[2, :]) atol=1e-10
53+
@test va_norm_array[3] norm(sample_va_array[3, :]) atol=1e-10
54+
55+
@test length(va_norm_array2) == size(sample_va_array, 1)
56+
@test va_norm_array2 expected_norms atol=1e-10
57+
58+
# Test individual values
59+
@test va_norm_array2[1] norm(sample_va_array[1, :]) atol=1e-10
60+
@test va_norm_array2[2] norm(sample_va_array[2, :]) atol=1e-10
61+
@test va_norm_array2[3] norm(sample_va_array[3, :]) atol=1e-10
62+
end
63+
nothing

mwes/mwe_02.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
# I tried to put a lazy cache (to avoid allocations for temporary vectors)
3+
# into a struct. This does not work at all with the macro provided by the package
4+
# Parameters. It works (no syntax error) with the macro Base.@kwdef, but if you
5+
# make use of this cache it is extremely slow.
6+
#
7+
# This mwe can serve as starting point for further investigations. Currently the only
8+
# cache that works is a cache in a global const variable.
9+
10+
using PreallocationTools, Parameters
11+
12+
Base.@kwdef mutable struct MockSolver
13+
const ca::NTuple{11, LazyBufferCache{typeof(identity), typeof(identity)}} = ([LazyBufferCache() for _ in 1:11])
14+
end

src/VortexStepMethod.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@ using Interpolations: Extrapolation
1515
using Parameters
1616
using Serialization
1717
using SharedArrays
18+
using PreallocationTools
1819
using Pkg
1920

2021
# Export public interface
2122
export Wing, Section, RamAirWing
2223
export BodyAerodynamics
23-
export Solver, solve, solve_base, solve!, VSMSolution
24+
export Solver, solve, solve_base!, solve!, VSMSolution
2425
export calculate_results
2526
export add_section!, set_va!
2627
export calculate_span, calculate_projected_area

src/body_aerodynamics.jl

Lines changed: 69 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -297,16 +297,15 @@ Calculate Aerodynamic Influence Coefficient matrices.
297297
298298
See also: [BodyAerodynamics](@ref), [Model](@ref)
299299
300-
Returns:
301-
Tuple of (`AIC_x`, `AIC_y`, `AIC_z`) matrices
300+
Returns: nothing
302301
"""
303-
function calculate_AIC_matrices!(body_aero::BodyAerodynamics, model::Model,
302+
@inline function calculate_AIC_matrices!(body_aero::BodyAerodynamics, model::Model,
304303
core_radius_fraction::Float64,
305304
va_norm_array::Vector{Float64},
306305
va_unit_array::Matrix{Float64})
307306
# Determine evaluation point based on model
308-
evaluation_point = model === VSM ? :control_point : :aero_center
309-
evaluation_point_on_bound = model === LLT
307+
evaluation_point = model == VSM ? :control_point : :aero_center
308+
evaluation_point_on_bound = model == LLT
310309

311310
# Initialize AIC matrices
312311
velocity_induced, tempvel, va_unit, U_2D = zeros(MVec3), zeros(MVec3), zeros(MVec3), zeros(MVec3)
@@ -330,13 +329,13 @@ function calculate_AIC_matrices!(body_aero::BodyAerodynamics, model::Model,
330329
core_radius_fraction,
331330
body_aero.work_vectors
332331
)
333-
body_aero.AIC[:, icp, jring] .= velocity_induced
334-
332+
335333
# Subtract 2D induced velocity for VSM
336-
if icp == jring && model === VSM
334+
if icp == jring && model == VSM
337335
calculate_velocity_induced_bound_2D!(U_2D, body_aero.panels[jring], ep, body_aero.work_vectors)
338-
body_aero.AIC[:, icp, jring] .-= U_2D
336+
velocity_induced .-= U_2D
339337
end
338+
body_aero.AIC[:, icp, jring] .= velocity_induced
340339
end
341340
end
342341
return nothing
@@ -347,24 +346,23 @@ end
347346
348347
Calculate circulation distribution for an elliptical wing.
349348
350-
Returns:
351-
Vector{Float64}: Circulation distribution along the wing
349+
Returns: nothing
352350
"""
353-
function calculate_circulation_distribution_elliptical_wing(body_aero::BodyAerodynamics, gamma_0::Float64=1.0)
351+
function calculate_circulation_distribution_elliptical_wing(gamma_i, body_aero::BodyAerodynamics, gamma_0::Float64=1.0)
354352
length(body_aero.wings) == 1 || throw(ArgumentError("Multiple wings not yet implemented"))
355353

356354
wing_span = body_aero.wings[1].span
357355
@debug "Wing span: $wing_span"
358356

359357
# Calculate y-coordinates of control points
358+
# TODO: pre-allocate y
360359
y = [panel.control_point[2] for panel in body_aero.panels]
361360

362361
# Calculate elliptical distribution
363-
gamma_i = gamma_0 * sqrt.(1 .- (2 .* y ./ wing_span).^2)
362+
gamma_i .= gamma_0 * sqrt.(1 .- (2 .* y ./ wing_span).^2)
364363

365364
@debug "Calculated circulation distribution: $gamma_i"
366-
367-
return gamma_i
365+
nothing
368366
end
369367

370368
"""
@@ -413,6 +411,8 @@ function calculate_stall_angle_list(panels::Vector{Panel};
413411
return stall_angles
414412
end
415413

414+
const cache_body = [LazyBufferCache() for _ in 1:5]
415+
416416
"""
417417
update_effective_angle_of_attack_if_VSM(body_aero::BodyAerodynamics, gamma::Vector{Float64},
418418
core_radius_fraction::Float64,
@@ -427,7 +427,8 @@ Update angle of attack at aerodynamic center for VSM method.
427427
Returns:
428428
Vector{Float64}: Updated angles of attack
429429
"""
430-
function update_effective_angle_of_attack_if_VSM(body_aero::BodyAerodynamics,
430+
function update_effective_angle_of_attack!(alpha_corrected,
431+
body_aero::BodyAerodynamics,
431432
gamma::Vector{Float64},
432433
core_radius_fraction::Float64,
433434
z_airf_array::Matrix{Float64},
@@ -436,26 +437,53 @@ function update_effective_angle_of_attack_if_VSM(body_aero::BodyAerodynamics,
436437
va_norm_array::Vector{Float64},
437438
va_unit_array::Matrix{Float64})
438439

439-
# Calculate AIC matrices at aerodynamic center using LLT method
440-
calculate_AIC_matrices!(
441-
body_aero, LLT, core_radius_fraction, va_norm_array, va_unit_array
442-
)
443-
AIC_x, AIC_y, AIC_z = @views body_aero.AIC[1, :, :], body_aero.AIC[2, :, :], body_aero.AIC[3, :, :]
444-
445-
# Calculate induced velocities
446-
induced_velocity = [
447-
AIC_x * gamma,
448-
AIC_y * gamma,
449-
AIC_z * gamma
450-
]
451-
induced_velocity = hcat(induced_velocity...)
440+
# Calculate AIC matrices (keep existing optimized view)
441+
calculate_AIC_matrices!(body_aero, LLT, core_radius_fraction, va_norm_array, va_unit_array)
442+
443+
# Get dimensions from existing data
444+
n_rows = size(body_aero.AIC, 2)
445+
n_cols = size(body_aero.AIC, 3)
446+
447+
# Preallocate induced velocity array
448+
induced_velocity = cache_body[1][va_array]
449+
450+
# Calculate each component with explicit loops
451+
for j in 1:3 # For each x/y/z component
452+
for i in 1:n_rows
453+
acc = zero(eltype(induced_velocity)) # Type-stable accumulator
454+
for k in 1:n_cols
455+
acc += body_aero.AIC[j, i, k] * gamma[k]
456+
end
457+
induced_velocity[i, j] = acc
458+
end
459+
end
460+
461+
# In-place relative velocity calculation
462+
relative_velocity = cache_body[2][va_array]
463+
relative_velocity .= va_array .+ induced_velocity
464+
465+
# Preallocate and compute dot products manually
466+
n = size(relative_velocity, 1)
467+
v_normal = cache_body[3][relative_velocity]
468+
v_tangential = cache_body[4][relative_velocity]
452469

453-
# Calculate relative velocities and angles
454-
relative_velocity = va_array + induced_velocity
455-
v_normal = sum(z_airf_array .* relative_velocity, dims=2)
456-
v_tangential = sum(x_airf_array .* relative_velocity, dims=2)
457-
alpha_array = atan.(v_normal ./ v_tangential)
458-
return alpha_array
470+
@inbounds for i in 1:n
471+
vn = 0.0
472+
vt = 0.0
473+
for j in 1:3
474+
vn += z_airf_array[i, j] * relative_velocity[i, j]
475+
vt += x_airf_array[i, j] * relative_velocity[i, j]
476+
end
477+
v_normal[i] = vn
478+
v_tangential[i] = vt
479+
end
480+
481+
# Direct angle calculation without temporary arrays
482+
@inbounds for i in 1:n
483+
alpha_corrected[i] = atan(v_normal[i], v_tangential[i])
484+
end
485+
486+
nothing
459487
end
460488

461489
"""
@@ -501,6 +529,7 @@ function calculate_results(
501529
cd_array = zeros(n_panels)
502530
cm_array = zeros(n_panels)
503531
panel_width_array = zeros(n_panels)
532+
alpha_corrected = zeros(n_panels)
504533

505534
# Calculate coefficients for each panel
506535
for (i, panel) in enumerate(panels)
@@ -515,8 +544,9 @@ function calculate_results(
515544
moment = reshape((cm_array .* 0.5 .* density .* v_a_array.^2 .* chord_array), :, 1)
516545

517546
# Calculate alpha corrections based on model type
518-
alpha_corrected = if aerodynamic_model_type === VSM
519-
update_effective_angle_of_attack_if_VSM(
547+
if aerodynamic_model_type === VSM
548+
update_effective_angle_of_attack!(
549+
alpha_corrected,
520550
body_aero,
521551
gamma_new,
522552
core_radius_fraction,
@@ -526,10 +556,8 @@ function calculate_results(
526556
va_norm_array,
527557
va_unit_array
528558
)
529-
elseif aerodynamic_model_type === LLT
530-
alpha_array
531-
else
532-
throw(ArgumentError("Unknown aerodynamic model type, should be LLT or VSM"))
559+
elseif aerodynamic_model_type == LLT
560+
alpha_corrected .= alpha_array
533561
end
534562

535563
# Verify va is not distributed

0 commit comments

Comments
 (0)