diff --git a/.gitignore b/.gitignore index 11531c35..0f68bce6 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,4 @@ docs/src/.$kite_power_tools.drawio.bkp data/linear_model.arrow data/nonlinear_model.arrow coverage/lcov.info +temp_benchmark.jl diff --git a/src/mtk_model.jl b/src/mtk_model.jl index 6b34c5a9..9d14a5d2 100644 --- a/src/mtk_model.jl +++ b/src/mtk_model.jl @@ -876,7 +876,7 @@ function linear_vsm_eqs!(s, eqs, guesses; aero_force_b, aero_moment_b, group_aer return eqs, guesses end -function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b) +function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b, bench=false) eqs = [] defaults = Pair{Num, Any}[] guesses = Pair{Num, Any}[] @@ -916,31 +916,15 @@ function create_sys!(s::SymbolicAWEModel, system::SystemStructure; init_va_b) eqs, defaults = wing_eqs!(s, eqs, defaults; tether_wing_force, tether_wing_moment, aero_force_b, aero_moment_b, ω_b, α_b, R_b_w, wing_pos, wing_vel, wing_acc, stabilize, fix_nonstiff) eqs = scalar_eqs!(s, eqs; R_b_w, wind_vec_gnd, va_wing_b, wing_pos, wing_vel, wing_acc, twist_angle, twist_ω, ω_b, α_b) - - # te_I = (1/3 * (s.set.mass/8) * te_length^2) - # # -damping / I * ω = α_damping - # # solve for c: (c * (k*m/s^2) / (k*m^2)) * (m/s)=m/s^2 in wolframalpha - # # damping should be in N*m*s - # rot_damping = 0.1s.damping * te_length - - # eqs = [ - # eqs - # trailing_edge_α[1] ~ (force[:, s.i_A]) ⋅ e_te_A * te_length / te_I - (rot_damping[1] / te_I) * trailing_edge_ω[1] # TODO: add trailing edge - # trailing_edge_α[2] ~ (force[:, s.i_B]) ⋅ e_te_B * te_length / te_I - (rot_damping[2] / te_I) * trailing_edge_ω[2] - # ] - + eqs = Symbolics.scalarize.(reduce(vcat, Symbolics.scalarize.(eqs))) - # discrete_events = [ - # true => [ - # [Q_b_w[i] ~ normalize(Q_b_w)[i] for i in 1:4] - # [twist_angle[i] ~ clamp(twist_angle[i], -π/2, π/2) for i in eachindex(s.point_groups)] - # ] - # ] - - @info "Creating ODESystem" - # @named sys = ODESystem(eqs, t; discrete_events) - @time @named sys = ODESystem(eqs, t) + ! bench && @info "Creating ODESystem" + if bench + @named sys = ODESystem(eqs, t) + else + @time @named sys = ODESystem(eqs, t) + end defaults = [ defaults diff --git a/src/symbolic_awe_model.jl b/src/symbolic_awe_model.jl index 38521001..eb1ffead 100644 --- a/src/symbolic_awe_model.jl +++ b/src/symbolic_awe_model.jl @@ -301,7 +301,7 @@ and only update the state variables. Otherwise, it will create a new model from """ function init!(s::SymbolicAWEModel; solver=nothing, stiffness_factor = nothing, delta = nothing, adaptive=true, prn=true, - precompile=false, remake=false, reload=false, + precompile=false, remake=false, reload=false, bench=false, lin_outputs=Num[] ) if isnothing(solver) @@ -322,11 +322,19 @@ function init!(s::SymbolicAWEModel; init_Q_b_w, R_b_w, init_va_b = initial_orient(s) init!(s.sys_struct, s.set) - inputs = create_sys!(s, s.sys_struct; init_va_b) + inputs = create_sys!(s, s.sys_struct; init_va_b, bench) prn && @info "Simplifying the system" @suppress_err begin - prn ? (@time (sys, _) = structural_simplify(s.full_sys, (inputs, []))) : - ((sys, _) = structural_simplify(s.full_sys, (inputs, []))) + if prn && !bench + @time (sys, _) = structural_simplify(s.full_sys, (inputs, [])) + elseif bench + local elapsed, sys + elapsed = @elapsed (sys, _) = structural_simplify(s.full_sys, (inputs, [])) + s.sys = sys + return elapsed + else + (sys, _) = structural_simplify(s.full_sys, (inputs, [])) + end s.sys = sys end dt = SimFloat(1/s.set.sample_freq) @@ -353,13 +361,19 @@ function init!(s::SymbolicAWEModel; end model_path = joinpath(KiteUtils.get_data_path(), get_model_name(s.set; precompile)) if !ispath(model_path) || remake - init(s) + res = init(s) + if bench + return res + end end _, success = reinit!(s, solver; adaptive, precompile, reload, lin_outputs, prn) if !success rm(model_path) @info "Rebuilding the system. This can take some minutes..." - init(s) + res = init(s) + if bench + return res + end reinit!(s, solver; adaptive, precompile, lin_outputs, prn, reload=true) end return s.integrator diff --git a/test/bench_ref.jl b/test/bench_ref.jl new file mode 100644 index 00000000..67e85f63 --- /dev/null +++ b/test/bench_ref.jl @@ -0,0 +1,75 @@ +# SPDX-FileCopyrightText: 2022 Uwe Fechner +# SPDX-License-Identifier: MIT + +const reference = 4.826620521958565e7 # AMD Ryzen 7840U, Julia 1.11, 1 thread + +""" + cpu_benchmark_scalar(target_time=1.0) + +Performs scalar CPU-intensive operations without SIMD for approximately `target_time` seconds. +This function performs basic arithmetic operations on scalar values to measure CPU performance +without utilizing vector/SIMD instructions. + +Returns the number of operations performed and the actual elapsed time. +""" +function cpu_benchmark_scalar(target_time=1.0) + # First, do a short calibration run to estimate operations per second + calibrate_ops = 1_000_000 + start_cal = Base.time() + + # Scalar operations that avoid SIMD optimization + result = 0.0 + x = 1.23456789 + y = 9.87654321 + + for i in 1:calibrate_ops + # Mix of operations to avoid compiler optimizations + x = x * 1.000001 + sin(y * 0.001) + y = y * 0.999999 + cos(x * 0.001) + result += sqrt(abs(x + y)) + end + + cal_time = Base.time() - start_cal + ops_per_second = calibrate_ops / cal_time + + # Estimate total operations needed for target time + target_ops = Int(round(ops_per_second * target_time)) + + # Main benchmark run + start_time = Base.time() + result = 0.0 + x = 1.23456789 + y = 9.87654321 + + @inbounds for i in 1:target_ops + # Scalar arithmetic operations that are hard to vectorize + x = x * 1.000001 + sin(y * 0.001) + y = y * 0.999999 + cos(x * 0.001) + result += sqrt(abs(x + y)) + + # Additional scalar operations to increase workload + if i % 1000 == 0 + x = x / (1.0 + 1e-10) # Prevent overflow + y = y / (1.0 + 1e-10) + end + end + + elapsed_time = Base.time() - start_time + + # Force the compiler to use the result (prevent dead code elimination) + println("Benchmark result (ignore): $(result)") + + return target_ops / elapsed_time +end + +""" + rel_cpu_performance() + +A simple CPU benchmark that performs scalar operations for approximately 1 second. +This is a standalone function that doesn't depend on any external packages. +""" +function rel_cpu_performance() + ops = cpu_benchmark_scalar(1.0) + println("CPU Performance: $(round(ops/1e6, digits=1)) million operations per second") + return ops/reference +end \ No newline at end of file diff --git a/test/bench_simplify.jl b/test/bench_simplify.jl new file mode 100644 index 00000000..9615af92 --- /dev/null +++ b/test/bench_simplify.jl @@ -0,0 +1,120 @@ +# Copyright (c) 2024, 2025 Bart van de Lint, Uwe Fechner +# SPDX-License-Identifier: MIT + +SIMPLE = false +T_REF = 48.0 # AMD Ryzen 7840U, Julia 1.11, no sys image [s] + # 37s with sys image + +using Pkg +if ! ("Test" ∈ keys(Pkg.project().dependencies)) + using TestEnv; TestEnv.activate() +end +using KiteModels, LinearAlgebra, Statistics, Test, Distributed +include("bench_ref.jl") + +# Simulation parameters +dt = 0.05 +total_time = 10.0 # Longer simulation to see oscillations +vsm_interval = 3 +steps = Int(round(total_time / dt)) + +# Steering parameters +steering_freq = 1/2 # Hz - full left-right cycle frequency +steering_magnitude = 10.0 # Magnitude of steering input [Nm] + +# Function to run benchmark in separate Julia process +function run_benchmark_subprocess() + # Create a temporary script file for the benchmark + benchmark_script = """ + using Pkg + if ! ("Test" ∈ keys(Pkg.project().dependencies)) + using TestEnv; TestEnv.activate() + end + using KiteModels, LinearAlgebra, Statistics + include("test/bench_ref.jl") + + SIMPLE = $SIMPLE + T_REF = $T_REF + + # Initialize model + set = load_settings("system_ram.yaml") + set.segments = 3 + set_values = [-50, 0.0, 0.0] # Set values of the torques of the three winches. [Nm] + set.quasi_static = false + set.physical_model = SIMPLE ? "simple_ram" : "ram" + + sam = SymbolicAWEModel(set) + sam.set.abs_tol = 1e-2 + sam.set.rel_tol = 1e-2 + rm("data/model_1.11_ram_dynamic_3_seg.bin"; force=true) + + # Initialize at elevation + set.l_tethers[2] += 0.2 + set.l_tethers[3] += 0.2 + time_ = init!(sam; remake=false, reload=true, bench=true) + @info "Simplify took \$time_ seconds" + rel_performance = (T_REF / rel_cpu_performance())/time_ + + # Write results to file for parent process to read + open("benchmark_results.tmp", "w") do f + println(f, time_) + println(f, rel_performance) + end + """ + + # Write the script to a temporary file + temp_script = "temp_benchmark.jl" + open(temp_script, "w") do f + write(f, benchmark_script) + end + + try + # Run the benchmark in a separate Julia process + result = run(`julia --project=. $temp_script`) + + if result.exitcode == 0 + # Read results from temporary file + if isfile("benchmark_results.tmp") + lines = readlines("benchmark_results.tmp") + time_ = parse(Float64, lines[1]) + rel_performance = parse(Float64, lines[2]) + + @info "Simplify took $time_ seconds" + @info "Relative performance: $rel_performance" + @test rel_performance > 0.8 + + # Clean up temporary files + rm("benchmark_results.tmp", force=true) + rm(temp_script, force=true) + + return time_, rel_performance + else + error("Benchmark results file not found") + end + else + error("Benchmark process failed with exit code $(result.exitcode)") + end + catch e + # Clean up temporary files in case of error + rm("benchmark_results.tmp", force=true) + rm(temp_script, force=true) + rethrow(e) + end +end + +# Run the benchmark in a separate process +time_, rel_performance = run_benchmark_subprocess() + +# Note: sys object is not available when running in separate process +# If you need sys, you would need to serialize it or run parts in the main process +nothing + +# Desktop, AMD Ryzen 9 7950X, Julia 1.11: +# - first run 34.5 seconds +# - second run 21.1 seconds + +# Laptop, AMD Ryzen 7 7840U, Julia 1.11: +# - first run 35.0 seconds +# - second run 24.0 seconds + +