Skip to content

Commit ff44833

Browse files
authored
Benchmark simplify operation (#250)
* Add bench_simplify.jl * Add comment * Make sure sam.sys is defined * Calc rel performance * Run in a separate process
1 parent 805358a commit ff44833

File tree

5 files changed

+224
-30
lines changed

5 files changed

+224
-30
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,4 @@ docs/src/.$kite_power_tools.drawio.bkp
6060
data/linear_model.arrow
6161
data/nonlinear_model.arrow
6262
coverage/lcov.info
63+
temp_benchmark.jl

src/mtk_model.jl

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ function linear_vsm_eqs!(s, eqs, guesses; aero_force_b, aero_moment_b, group_aer
876876
return eqs, guesses
877877
end
878878

879-
function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b)
879+
function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b, bench=false)
880880
eqs = []
881881
defaults = Pair{Num, Any}[]
882882
guesses = Pair{Num, Any}[]
@@ -916,31 +916,15 @@ function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b)
916916
eqs, defaults = wing_eqs!(s, eqs, defaults; tether_wing_force, tether_wing_moment, aero_force_b, aero_moment_b,
917917
ω_b, α_b, R_b_w, wing_pos, wing_vel, wing_acc, stabilize, fix_nonstiff)
918918
eqs = scalar_eqs!(s, eqs; R_b_w, wind_vec_gnd, va_wing_b, wing_pos, wing_vel, wing_acc, twist_angle, twist_ω, ω_b, α_b)
919-
920-
# te_I = (1/3 * (s.set.mass/8) * te_length^2)
921-
# # -damping / I * ω = α_damping
922-
# # solve for c: (c * (k*m/s^2) / (k*m^2)) * (m/s)=m/s^2 in wolframalpha
923-
# # damping should be in N*m*s
924-
# rot_damping = 0.1s.damping * te_length
925-
926-
# eqs = [
927-
# eqs
928-
# trailing_edge_α[1] ~ (force[:, s.i_A]) ⋅ e_te_A * te_length / te_I - (rot_damping[1] / te_I) * trailing_edge_ω[1] # TODO: add trailing edge
929-
# trailing_edge_α[2] ~ (force[:, s.i_B]) ⋅ e_te_B * te_length / te_I - (rot_damping[2] / te_I) * trailing_edge_ω[2]
930-
# ]
931-
919+
932920
eqs = Symbolics.scalarize.(reduce(vcat, Symbolics.scalarize.(eqs)))
933921

934-
# discrete_events = [
935-
# true => [
936-
# [Q_b_w[i] ~ normalize(Q_b_w)[i] for i in 1:4]
937-
# [twist_angle[i] ~ clamp(twist_angle[i], -π/2, π/2) for i in eachindex(s.point_groups)]
938-
# ]
939-
# ]
940-
941-
@info "Creating ODESystem"
942-
# @named sys = ODESystem(eqs, t; discrete_events)
943-
@time @named sys = ODESystem(eqs, t)
922+
! bench && @info "Creating ODESystem"
923+
if bench
924+
@named sys = ODESystem(eqs, t)
925+
else
926+
@time @named sys = ODESystem(eqs, t)
927+
end
944928

945929
defaults = [
946930
defaults

src/symbolic_awe_model.jl

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ and only update the state variables. Otherwise, it will create a new model from
301301
"""
302302
function init!(s::SymbolicAWEModel;
303303
solver=nothing, stiffness_factor = nothing, delta = nothing, adaptive=true, prn=true,
304-
precompile=false, remake=false, reload=false,
304+
precompile=false, remake=false, reload=false, bench=false,
305305
lin_outputs=Num[]
306306
)
307307
if isnothing(solver)
@@ -322,11 +322,19 @@ function init!(s::SymbolicAWEModel;
322322
init_Q_b_w, R_b_w, init_va_b = initial_orient(s)
323323
init!(s.sys_struct, s.set)
324324

325-
inputs = create_sys!(s, s.sys_struct; init_va_b)
325+
inputs = create_sys!(s, s.sys_struct; init_va_b, bench)
326326
prn && @info "Simplifying the system"
327327
@suppress_err begin
328-
prn ? (@time (sys, _) = structural_simplify(s.full_sys, (inputs, []))) :
329-
((sys, _) = structural_simplify(s.full_sys, (inputs, [])))
328+
if prn && !bench
329+
@time (sys, _) = structural_simplify(s.full_sys, (inputs, []))
330+
elseif bench
331+
local elapsed, sys
332+
elapsed = @elapsed (sys, _) = structural_simplify(s.full_sys, (inputs, []))
333+
s.sys = sys
334+
return elapsed
335+
else
336+
(sys, _) = structural_simplify(s.full_sys, (inputs, []))
337+
end
330338
s.sys = sys
331339
end
332340
dt = SimFloat(1/s.set.sample_freq)
@@ -353,13 +361,19 @@ function init!(s::SymbolicAWEModel;
353361
end
354362
model_path = joinpath(KiteUtils.get_data_path(), get_model_name(s.set; precompile))
355363
if !ispath(model_path) || remake
356-
init(s)
364+
res = init(s)
365+
if bench
366+
return res
367+
end
357368
end
358369
_, success = reinit!(s, solver; adaptive, precompile, reload, lin_outputs, prn)
359370
if !success
360371
rm(model_path)
361372
@info "Rebuilding the system. This can take some minutes..."
362-
init(s)
373+
res = init(s)
374+
if bench
375+
return res
376+
end
363377
reinit!(s, solver; adaptive, precompile, lin_outputs, prn, reload=true)
364378
end
365379
return s.integrator

test/bench_ref.jl

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# SPDX-FileCopyrightText: 2022 Uwe Fechner
2+
# SPDX-License-Identifier: MIT
3+
4+
const reference = 4.826620521958565e7 # AMD Ryzen 7840U, Julia 1.11, 1 thread
5+
6+
"""
7+
cpu_benchmark_scalar(target_time=1.0)
8+
9+
Performs scalar CPU-intensive operations without SIMD for approximately `target_time` seconds.
10+
This function performs basic arithmetic operations on scalar values to measure CPU performance
11+
without utilizing vector/SIMD instructions.
12+
13+
Returns the number of operations performed and the actual elapsed time.
14+
"""
15+
function cpu_benchmark_scalar(target_time=1.0)
16+
# First, do a short calibration run to estimate operations per second
17+
calibrate_ops = 1_000_000
18+
start_cal = Base.time()
19+
20+
# Scalar operations that avoid SIMD optimization
21+
result = 0.0
22+
x = 1.23456789
23+
y = 9.87654321
24+
25+
for i in 1:calibrate_ops
26+
# Mix of operations to avoid compiler optimizations
27+
x = x * 1.000001 + sin(y * 0.001)
28+
y = y * 0.999999 + cos(x * 0.001)
29+
result += sqrt(abs(x + y))
30+
end
31+
32+
cal_time = Base.time() - start_cal
33+
ops_per_second = calibrate_ops / cal_time
34+
35+
# Estimate total operations needed for target time
36+
target_ops = Int(round(ops_per_second * target_time))
37+
38+
# Main benchmark run
39+
start_time = Base.time()
40+
result = 0.0
41+
x = 1.23456789
42+
y = 9.87654321
43+
44+
@inbounds for i in 1:target_ops
45+
# Scalar arithmetic operations that are hard to vectorize
46+
x = x * 1.000001 + sin(y * 0.001)
47+
y = y * 0.999999 + cos(x * 0.001)
48+
result += sqrt(abs(x + y))
49+
50+
# Additional scalar operations to increase workload
51+
if i % 1000 == 0
52+
x = x / (1.0 + 1e-10) # Prevent overflow
53+
y = y / (1.0 + 1e-10)
54+
end
55+
end
56+
57+
elapsed_time = Base.time() - start_time
58+
59+
# Force the compiler to use the result (prevent dead code elimination)
60+
println("Benchmark result (ignore): $(result)")
61+
62+
return target_ops / elapsed_time
63+
end
64+
65+
"""
66+
rel_cpu_performance()
67+
68+
A simple CPU benchmark that performs scalar operations for approximately 1 second.
69+
This is a standalone function that doesn't depend on any external packages.
70+
"""
71+
function rel_cpu_performance()
72+
ops = cpu_benchmark_scalar(1.0)
73+
println("CPU Performance: $(round(ops/1e6, digits=1)) million operations per second")
74+
return ops/reference
75+
end

test/bench_simplify.jl

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# Copyright (c) 2024, 2025 Bart van de Lint, Uwe Fechner
2+
# SPDX-License-Identifier: MIT
3+
4+
SIMPLE = false
5+
T_REF = 48.0 # AMD Ryzen 7840U, Julia 1.11, no sys image [s]
6+
# 37s with sys image
7+
8+
using Pkg
9+
if ! ("Test" keys(Pkg.project().dependencies))
10+
using TestEnv; TestEnv.activate()
11+
end
12+
using KiteModels, LinearAlgebra, Statistics, Test, Distributed
13+
include("bench_ref.jl")
14+
15+
# Simulation parameters
16+
dt = 0.05
17+
total_time = 10.0 # Longer simulation to see oscillations
18+
vsm_interval = 3
19+
steps = Int(round(total_time / dt))
20+
21+
# Steering parameters
22+
steering_freq = 1/2 # Hz - full left-right cycle frequency
23+
steering_magnitude = 10.0 # Magnitude of steering input [Nm]
24+
25+
# Function to run benchmark in separate Julia process
26+
function run_benchmark_subprocess()
27+
# Create a temporary script file for the benchmark
28+
benchmark_script = """
29+
using Pkg
30+
if ! ("Test" ∈ keys(Pkg.project().dependencies))
31+
using TestEnv; TestEnv.activate()
32+
end
33+
using KiteModels, LinearAlgebra, Statistics
34+
include("test/bench_ref.jl")
35+
36+
SIMPLE = $SIMPLE
37+
T_REF = $T_REF
38+
39+
# Initialize model
40+
set = load_settings("system_ram.yaml")
41+
set.segments = 3
42+
set_values = [-50, 0.0, 0.0] # Set values of the torques of the three winches. [Nm]
43+
set.quasi_static = false
44+
set.physical_model = SIMPLE ? "simple_ram" : "ram"
45+
46+
sam = SymbolicAWEModel(set)
47+
sam.set.abs_tol = 1e-2
48+
sam.set.rel_tol = 1e-2
49+
rm("data/model_1.11_ram_dynamic_3_seg.bin"; force=true)
50+
51+
# Initialize at elevation
52+
set.l_tethers[2] += 0.2
53+
set.l_tethers[3] += 0.2
54+
time_ = init!(sam; remake=false, reload=true, bench=true)
55+
@info "Simplify took \$time_ seconds"
56+
rel_performance = (T_REF / rel_cpu_performance())/time_
57+
58+
# Write results to file for parent process to read
59+
open("benchmark_results.tmp", "w") do f
60+
println(f, time_)
61+
println(f, rel_performance)
62+
end
63+
"""
64+
65+
# Write the script to a temporary file
66+
temp_script = "temp_benchmark.jl"
67+
open(temp_script, "w") do f
68+
write(f, benchmark_script)
69+
end
70+
71+
try
72+
# Run the benchmark in a separate Julia process
73+
result = run(`julia --project=. $temp_script`)
74+
75+
if result.exitcode == 0
76+
# Read results from temporary file
77+
if isfile("benchmark_results.tmp")
78+
lines = readlines("benchmark_results.tmp")
79+
time_ = parse(Float64, lines[1])
80+
rel_performance = parse(Float64, lines[2])
81+
82+
@info "Simplify took $time_ seconds"
83+
@info "Relative performance: $rel_performance"
84+
@test rel_performance > 0.8
85+
86+
# Clean up temporary files
87+
rm("benchmark_results.tmp", force=true)
88+
rm(temp_script, force=true)
89+
90+
return time_, rel_performance
91+
else
92+
error("Benchmark results file not found")
93+
end
94+
else
95+
error("Benchmark process failed with exit code $(result.exitcode)")
96+
end
97+
catch e
98+
# Clean up temporary files in case of error
99+
rm("benchmark_results.tmp", force=true)
100+
rm(temp_script, force=true)
101+
rethrow(e)
102+
end
103+
end
104+
105+
# Run the benchmark in a separate process
106+
time_, rel_performance = run_benchmark_subprocess()
107+
108+
# Note: sys object is not available when running in separate process
109+
# If you need sys, you would need to serialize it or run parts in the main process
110+
nothing
111+
112+
# Desktop, AMD Ryzen 9 7950X, Julia 1.11:
113+
# - first run 34.5 seconds
114+
# - second run 21.1 seconds
115+
116+
# Laptop, AMD Ryzen 7 7840U, Julia 1.11:
117+
# - first run 35.0 seconds
118+
# - second run 24.0 seconds
119+
120+

0 commit comments

Comments
 (0)