Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ docs/src/.$kite_power_tools.drawio.bkp
data/linear_model.arrow
data/nonlinear_model.arrow
coverage/lcov.info
temp_benchmark.jl
32 changes: 8 additions & 24 deletions src/mtk_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,7 @@ function linear_vsm_eqs!(s, eqs, guesses; aero_force_b, aero_moment_b, group_aer
return eqs, guesses
end

function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b)
function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b, bench=false)
eqs = []
defaults = Pair{Num, Any}[]
guesses = Pair{Num, Any}[]
Expand Down Expand Up @@ -916,31 +916,15 @@ function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b)
eqs, defaults = wing_eqs!(s, eqs, defaults; tether_wing_force, tether_wing_moment, aero_force_b, aero_moment_b,
ω_b, α_b, R_b_w, wing_pos, wing_vel, wing_acc, stabilize, fix_nonstiff)
eqs = scalar_eqs!(s, eqs; R_b_w, wind_vec_gnd, va_wing_b, wing_pos, wing_vel, wing_acc, twist_angle, twist_ω, ω_b, α_b)

# te_I = (1/3 * (s.set.mass/8) * te_length^2)
# # -damping / I * ω = α_damping
# # solve for c: (c * (k*m/s^2) / (k*m^2)) * (m/s)=m/s^2 in wolframalpha
# # damping should be in N*m*s
# rot_damping = 0.1s.damping * te_length

# eqs = [
# eqs
# trailing_edge_α[1] ~ (force[:, s.i_A]) ⋅ e_te_A * te_length / te_I - (rot_damping[1] / te_I) * trailing_edge_ω[1] # TODO: add trailing edge
# trailing_edge_α[2] ~ (force[:, s.i_B]) ⋅ e_te_B * te_length / te_I - (rot_damping[2] / te_I) * trailing_edge_ω[2]
# ]


eqs = Symbolics.scalarize.(reduce(vcat, Symbolics.scalarize.(eqs)))

# discrete_events = [
# true => [
# [Q_b_w[i] ~ normalize(Q_b_w)[i] for i in 1:4]
# [twist_angle[i] ~ clamp(twist_angle[i], -π/2, π/2) for i in eachindex(s.point_groups)]
# ]
# ]

@info "Creating ODESystem"
# @named sys = ODESystem(eqs, t; discrete_events)
@time @named sys = ODESystem(eqs, t)
! bench && @info "Creating ODESystem"
if bench
@named sys = ODESystem(eqs, t)
else
@time @named sys = ODESystem(eqs, t)
end

defaults = [
defaults
Expand Down
26 changes: 20 additions & 6 deletions src/symbolic_awe_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ and only update the state variables. Otherwise, it will create a new model from
"""
function init!(s::SymbolicAWEModel;
solver=nothing, stiffness_factor = nothing, delta = nothing, adaptive=true, prn=true,
precompile=false, remake=false, reload=false,
precompile=false, remake=false, reload=false, bench=false,
lin_outputs=Num[]
)
if isnothing(solver)
Expand All @@ -322,11 +322,19 @@ function init!(s::SymbolicAWEModel;
init_Q_b_w, R_b_w, init_va_b = initial_orient(s)
init!(s.sys_struct, s.set)

inputs = create_sys!(s, s.sys_struct; init_va_b)
inputs = create_sys!(s, s.sys_struct; init_va_b, bench)
prn && @info "Simplifying the system"
@suppress_err begin
prn ? (@time (sys, _) = structural_simplify(s.full_sys, (inputs, []))) :
((sys, _) = structural_simplify(s.full_sys, (inputs, [])))
if prn && !bench
@time (sys, _) = structural_simplify(s.full_sys, (inputs, []))
elseif bench
local elapsed, sys
elapsed = @elapsed (sys, _) = structural_simplify(s.full_sys, (inputs, []))
s.sys = sys
return elapsed
else
(sys, _) = structural_simplify(s.full_sys, (inputs, []))
end
s.sys = sys
end
dt = SimFloat(1/s.set.sample_freq)
Expand All @@ -353,13 +361,19 @@ function init!(s::SymbolicAWEModel;
end
model_path = joinpath(KiteUtils.get_data_path(), get_model_name(s.set; precompile))
if !ispath(model_path) || remake
init(s)
res = init(s)
if bench
return res
end
end
_, success = reinit!(s, solver; adaptive, precompile, reload, lin_outputs, prn)
if !success
rm(model_path)
@info "Rebuilding the system. This can take some minutes..."
init(s)
res = init(s)
if bench
return res
end
reinit!(s, solver; adaptive, precompile, lin_outputs, prn, reload=true)
end
return s.integrator
Expand Down
75 changes: 75 additions & 0 deletions test/bench_ref.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# SPDX-FileCopyrightText: 2022 Uwe Fechner
# SPDX-License-Identifier: MIT

const reference = 4.826620521958565e7 # AMD Ryzen 7840U, Julia 1.11, 1 thread

"""
cpu_benchmark_scalar(target_time=1.0)

Performs scalar CPU-intensive operations without SIMD for approximately `target_time` seconds.
This function performs basic arithmetic operations on scalar values to measure CPU performance
without utilizing vector/SIMD instructions.

Returns the number of operations performed and the actual elapsed time.
"""
function cpu_benchmark_scalar(target_time=1.0)
# First, do a short calibration run to estimate operations per second
calibrate_ops = 1_000_000
start_cal = Base.time()

# Scalar operations that avoid SIMD optimization
result = 0.0
x = 1.23456789
y = 9.87654321

for i in 1:calibrate_ops
# Mix of operations to avoid compiler optimizations
x = x * 1.000001 + sin(y * 0.001)
y = y * 0.999999 + cos(x * 0.001)
result += sqrt(abs(x + y))
end

cal_time = Base.time() - start_cal
ops_per_second = calibrate_ops / cal_time

# Estimate total operations needed for target time
target_ops = Int(round(ops_per_second * target_time))

# Main benchmark run
start_time = Base.time()
result = 0.0
x = 1.23456789
y = 9.87654321

@inbounds for i in 1:target_ops
# Scalar arithmetic operations that are hard to vectorize
x = x * 1.000001 + sin(y * 0.001)
y = y * 0.999999 + cos(x * 0.001)
result += sqrt(abs(x + y))

# Additional scalar operations to increase workload
if i % 1000 == 0
x = x / (1.0 + 1e-10) # Prevent overflow
y = y / (1.0 + 1e-10)
end
end

elapsed_time = Base.time() - start_time

# Force the compiler to use the result (prevent dead code elimination)
println("Benchmark result (ignore): $(result)")

return target_ops / elapsed_time
end

"""
rel_cpu_performance()

A simple CPU benchmark that performs scalar operations for approximately 1 second.
This is a standalone function that doesn't depend on any external packages.
"""
function rel_cpu_performance()
ops = cpu_benchmark_scalar(1.0)
println("CPU Performance: $(round(ops/1e6, digits=1)) million operations per second")
return ops/reference
end
120 changes: 120 additions & 0 deletions test/bench_simplify.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright (c) 2024, 2025 Bart van de Lint, Uwe Fechner
# SPDX-License-Identifier: MIT

SIMPLE = false
T_REF = 48.0 # AMD Ryzen 7840U, Julia 1.11, no sys image [s]
# 37s with sys image

using Pkg
if ! ("Test" ∈ keys(Pkg.project().dependencies))
using TestEnv; TestEnv.activate()
end
using KiteModels, LinearAlgebra, Statistics, Test, Distributed
include("bench_ref.jl")

# Simulation parameters
dt = 0.05
total_time = 10.0 # Longer simulation to see oscillations
vsm_interval = 3
steps = Int(round(total_time / dt))

# Steering parameters
steering_freq = 1/2 # Hz - full left-right cycle frequency
steering_magnitude = 10.0 # Magnitude of steering input [Nm]

# Function to run benchmark in separate Julia process
function run_benchmark_subprocess()
# Create a temporary script file for the benchmark
benchmark_script = """
using Pkg
if ! ("Test" ∈ keys(Pkg.project().dependencies))
using TestEnv; TestEnv.activate()
end
using KiteModels, LinearAlgebra, Statistics
include("test/bench_ref.jl")

SIMPLE = $SIMPLE
T_REF = $T_REF

# Initialize model
set = load_settings("system_ram.yaml")
set.segments = 3
set_values = [-50, 0.0, 0.0] # Set values of the torques of the three winches. [Nm]
set.quasi_static = false
set.physical_model = SIMPLE ? "simple_ram" : "ram"

sam = SymbolicAWEModel(set)
sam.set.abs_tol = 1e-2
sam.set.rel_tol = 1e-2
rm("data/model_1.11_ram_dynamic_3_seg.bin"; force=true)

# Initialize at elevation
set.l_tethers[2] += 0.2
set.l_tethers[3] += 0.2
time_ = init!(sam; remake=false, reload=true, bench=true)
@info "Simplify took \$time_ seconds"
rel_performance = (T_REF / rel_cpu_performance())/time_

# Write results to file for parent process to read
open("benchmark_results.tmp", "w") do f
println(f, time_)
println(f, rel_performance)
end
"""

# Write the script to a temporary file
temp_script = "temp_benchmark.jl"
open(temp_script, "w") do f
write(f, benchmark_script)
end

try
# Run the benchmark in a separate Julia process
result = run(`julia --project=. $temp_script`)

if result.exitcode == 0
# Read results from temporary file
if isfile("benchmark_results.tmp")
lines = readlines("benchmark_results.tmp")
time_ = parse(Float64, lines[1])
rel_performance = parse(Float64, lines[2])

@info "Simplify took $time_ seconds"
@info "Relative performance: $rel_performance"
@test rel_performance > 0.8

# Clean up temporary files
rm("benchmark_results.tmp", force=true)
rm(temp_script, force=true)

return time_, rel_performance
else
error("Benchmark results file not found")
end
else
error("Benchmark process failed with exit code $(result.exitcode)")
end
catch e
# Clean up temporary files in case of error
rm("benchmark_results.tmp", force=true)
rm(temp_script, force=true)
rethrow(e)
end
end

# Run the benchmark in a separate process
time_, rel_performance = run_benchmark_subprocess()

# Note: sys object is not available when running in separate process
# If you need sys, you would need to serialize it or run parts in the main process
nothing

# Desktop, AMD Ryzen 9 7950X, Julia 1.11:
# - first run 34.5 seconds
# - second run 21.1 seconds

# Laptop, AMD Ryzen 7 7840U, Julia 1.11:
# - first run 35.0 seconds
# - second run 24.0 seconds


Loading