diff --git a/docs/src/tutorials/autotune.md b/docs/src/tutorials/autotune.md index ad87d6354..f829360a7 100644 --- a/docs/src/tutorials/autotune.md +++ b/docs/src/tutorials/autotune.md @@ -132,6 +132,37 @@ results = autotune_setup( ) ``` +### Time Limits for Algorithm Tests + +Control the maximum time allowed for each algorithm test (including accuracy check): + +```julia +# Default: 100 seconds maximum per algorithm test +results = autotune_setup() # maxtime = 100.0 + +# Quick timeout for fast exploration +results = autotune_setup(maxtime = 10.0) + +# Extended timeout for slow algorithms or large matrices +results = autotune_setup( + maxtime = 300.0, # 5 minutes per test + sizes = [:large, :big] +) + +# Conservative timeout for production benchmarking +results = autotune_setup( + maxtime = 200.0, + samples = 10, + seconds = 2.0 +) +``` + +When an algorithm exceeds the `maxtime` limit: +- The test is skipped to prevent hanging +- The result is recorded as `NaN` in the benchmark data +- A warning is displayed indicating the timeout +- The benchmark continues with the next algorithm + ### Missing Algorithm Handling By default, autotune expects all algorithms to be available to ensure complete benchmarking. You can relax this requirement: diff --git a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl index f698de377..39276726c 100644 --- a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl +++ b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl @@ -78,13 +78,13 @@ function Base.show(io::IO, results::AutotuneResults) println(io, " • Julia: ", get(results.sysinfo, "julia_version", "Unknown")) println(io, " • Threads: ", get(results.sysinfo, "num_threads", "Unknown"), " (BLAS: ", get(results.sysinfo, "blas_num_threads", "Unknown"), ")") - # Results summary - successful_results = filter(row -> row.success, results.results_df) + # Results summary - filter out NaN values + successful_results = filter(row -> row.success && !isnan(row.gflops), results.results_df) if nrow(successful_results) > 0 println(io, "\nšŸ† Top Performing Algorithms:") summary = combine(groupby(successful_results, :algorithm), - :gflops => mean => :avg_gflops, - :gflops => maximum => :max_gflops, + :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops, + :gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops, nrow => :num_tests) sort!(summary, :avg_gflops, rev = true) @@ -104,6 +104,12 @@ function Base.show(io::IO, results::AutotuneResults) println(io, "šŸ“ Matrix Sizes: ", minimum(sizes), "Ɨ", minimum(sizes), " to ", maximum(sizes), "Ɨ", maximum(sizes)) + # Report timeouts if any + timeout_results = filter(row -> isnan(row.gflops), results.results_df) + if nrow(timeout_results) > 0 + println(io, "ā±ļø Timed Out: ", nrow(timeout_results), " tests exceeded time limit") + end + # Call to action - reordered println(io, "\n" * "="^60) println(io, "šŸš€ For comprehensive results, consider running:") @@ -158,7 +164,8 @@ end seconds::Float64 = 0.5, eltypes = (Float32, Float64, ComplexF32, ComplexF64), skip_missing_algs::Bool = false, - include_fastlapack::Bool = false) + include_fastlapack::Bool = false, + maxtime::Float64 = 100.0) Run a comprehensive benchmark of all available LU factorization methods and optionally: @@ -182,6 +189,8 @@ Run a comprehensive benchmark of all available LU factorization methods and opti - `eltypes = (Float32, Float64, ComplexF32, ComplexF64)`: Element types to benchmark - `skip_missing_algs::Bool = false`: If false, error when expected algorithms are missing; if true, warn instead - `include_fastlapack::Bool = false`: If true, includes FastLUFactorization in benchmarks + - `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check). + If exceeded, the run is skipped and recorded as NaN # Returns @@ -216,7 +225,8 @@ function autotune_setup(; seconds::Float64 = 0.5, eltypes = (Float64,), skip_missing_algs::Bool = false, - include_fastlapack::Bool = false) + include_fastlapack::Bool = false, + maxtime::Float64 = 100.0) @info "Starting LinearSolve.jl autotune setup..." @info "Configuration: sizes=$sizes, set_preferences=$set_preferences" @info "Element types to benchmark: $(join(eltypes, ", "))" @@ -249,18 +259,25 @@ function autotune_setup(; # Run benchmarks @info "Running benchmarks (this may take several minutes)..." + @info "Maximum time per algorithm test: $(maxtime)s" results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes; - samples = samples, seconds = seconds, sizes = sizes) + samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime) - # Display results table - successful_results = filter(row -> row.success, results_df) + # Display results table - filter out NaN values + successful_results = filter(row -> row.success && !isnan(row.gflops), results_df) + timeout_results = filter(row -> isnan(row.gflops), results_df) + + if nrow(timeout_results) > 0 + @info "$(nrow(timeout_results)) tests timed out (exceeded $(maxtime)s limit)" + end + if nrow(successful_results) > 0 @info "Benchmark completed successfully!" - # Create summary table for display + # Create summary table for display - handle NaN values summary = combine(groupby(successful_results, :algorithm), - :gflops => mean => :avg_gflops, - :gflops => maximum => :max_gflops, + :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops, + :gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops, nrow => :num_tests) sort!(summary, :avg_gflops, rev = true) diff --git a/lib/LinearSolveAutotune/src/benchmarking.jl b/lib/LinearSolveAutotune/src/benchmarking.jl index 2b2362f74..21d868647 100644 --- a/lib/LinearSolveAutotune/src/benchmarking.jl +++ b/lib/LinearSolveAutotune/src/benchmarking.jl @@ -73,14 +73,19 @@ end """ benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes; - samples=5, seconds=0.5, sizes=[:small, :medium]) + samples=5, seconds=0.5, sizes=[:small, :medium], + maxtime=100.0) Benchmark the given algorithms across different matrix sizes and element types. Returns a DataFrame with results including element type information. + +# Arguments +- `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check). + If the accuracy check exceeds this time, the run is skipped and recorded as NaN. """ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes; samples = 5, seconds = 0.5, sizes = [:tiny, :small, :medium, :large], - check_correctness = true, correctness_tol = 1e0) + check_correctness = true, correctness_tol = 1e0, maxtime = 100.0) # Set benchmark parameters old_params = BenchmarkTools.DEFAULT_PARAMETERS @@ -136,10 +141,11 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes; ProgressMeter.update!(progress, desc="Benchmarking $name on $(n)Ɨ$(n) $eltype matrix: ") - gflops = 0.0 + gflops = NaN # Use NaN for timed out runs success = true error_msg = "" passed_correctness = true + timed_out = false try # Create the linear problem for this test @@ -147,41 +153,108 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes; u0 = copy(u0), alias = LinearAliasSpecifier(alias_A = true, alias_b = true)) - # Warmup run and correctness check - warmup_sol = solve(prob, alg) + # Time the warmup run and correctness check + start_time = time() - # Check correctness if reference solution is available - if check_correctness && reference_solution !== nothing - # Compute relative error - rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u) - - if rel_error > correctness_tol - passed_correctness = false - @warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " * - "Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " * - "Algorithm will be excluded from results." - success = false - error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))" + # Create a channel for communication between tasks + result_channel = Channel(1) + + # Warmup run and correctness check with timeout + warmup_task = @async begin + try + result = solve(prob, alg) + put!(result_channel, result) + catch e + put!(result_channel, e) + end + end + + # Timer task to enforce timeout + timer_task = @async begin + sleep(maxtime) + if !istaskdone(warmup_task) + try + Base.throwto(warmup_task, InterruptException()) + catch + # Task might be in non-interruptible state + end + put!(result_channel, :timeout) + end + end + + # Wait for result or timeout + warmup_sol = nothing + result = take!(result_channel) + + # Clean up timer task if still running + if !istaskdone(timer_task) + try + Base.throwto(timer_task, InterruptException()) + catch + # Timer task might have already finished end end - # Only benchmark if correctness check passed - if passed_correctness - # Actual benchmark - bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem( - copy($A), copy($b); - u0 = copy($u0), - alias = LinearAliasSpecifier(alias_A = true, alias_b = true))) - - # Calculate GFLOPs - min_time_sec = minimum(bench.times) / 1e9 - flops = luflop(n, n) - gflops = flops / min_time_sec / 1e9 + if result === :timeout + # Task timed out + timed_out = true + @warn "Algorithm $name timed out (exceeded $(maxtime)s) for size $n, eltype $eltype. Recording as NaN." + success = false + error_msg = "Timed out (exceeded $(maxtime)s)" + gflops = NaN + elseif result isa Exception + # Task threw an error + throw(result) + else + # Successful completion + warmup_sol = result + elapsed_time = time() - start_time + + # Check correctness if reference solution is available + if check_correctness && reference_solution !== nothing + # Compute relative error + rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u) + + if rel_error > correctness_tol + passed_correctness = false + @warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " * + "Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " * + "Algorithm will be excluded from results." + success = false + error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))" + gflops = 0.0 + end + end + + # Only benchmark if correctness check passed and we have time remaining + if passed_correctness && !timed_out + # Check if we have enough time remaining for benchmarking + # Allow at least 2x the warmup time for benchmarking + remaining_time = maxtime - elapsed_time + if remaining_time < 2 * elapsed_time + @warn "Algorithm $name: insufficient time remaining for benchmarking (warmup took $(round(elapsed_time, digits=2))s). Recording as NaN." + gflops = NaN + success = false + error_msg = "Insufficient time for benchmarking" + else + # Actual benchmark + bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem( + copy($A), copy($b); + u0 = copy($u0), + alias = LinearAliasSpecifier(alias_A = true, alias_b = true))) + + # Calculate GFLOPs + min_time_sec = minimum(bench.times) / 1e9 + flops = luflop(n, n) + gflops = flops / min_time_sec / 1e9 + end + end end catch e success = false error_msg = string(e) + gflops = NaN # Don't warn for each failure, just record it end @@ -252,8 +325,8 @@ Categorize the benchmark results into size ranges and find the best algorithm fo For complex types, avoids RFLUFactorization if possible due to known issues. """ function categorize_results(df::DataFrame) - # Filter successful results - successful_df = filter(row -> row.success, df) + # Filter successful results and exclude NaN values + successful_df = filter(row -> row.success && !isnan(row.gflops), df) if nrow(successful_df) == 0 @warn "No successful benchmark results found!" @@ -293,8 +366,9 @@ function categorize_results(df::DataFrame) continue end - # Calculate average GFLOPs for each algorithm in this range - avg_results = combine(groupby(range_df, :algorithm), :gflops => mean => :avg_gflops) + # Calculate average GFLOPs for each algorithm in this range, excluding NaN values + avg_results = combine(groupby(range_df, :algorithm), + :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops) # Sort by performance sort!(avg_results, :avg_gflops, rev=true) diff --git a/lib/LinearSolveAutotune/src/plotting.jl b/lib/LinearSolveAutotune/src/plotting.jl index 02e1c7a8c..ec841cd79 100644 --- a/lib/LinearSolveAutotune/src/plotting.jl +++ b/lib/LinearSolveAutotune/src/plotting.jl @@ -44,7 +44,7 @@ function create_benchmark_plots(df::DataFrame; title_base = "LinearSolve.jl LU F # Plot each algorithm for this element type for alg in algorithms - alg_df = filter(row -> row.algorithm == alg, eltype_df) + alg_df = filter(row -> row.algorithm == alg && !isnan(row.gflops), eltype_df) if nrow(alg_df) > 0 # Sort by size for proper line plotting sort!(alg_df, :size) diff --git a/lib/LinearSolveAutotune/src/telemetry.jl b/lib/LinearSolveAutotune/src/telemetry.jl index f41634d67..ab6e66513 100644 --- a/lib/LinearSolveAutotune/src/telemetry.jl +++ b/lib/LinearSolveAutotune/src/telemetry.jl @@ -365,9 +365,10 @@ function format_detailed_results_markdown(df::DataFrame) end # Create a summary table with average performance per algorithm for this element type + # Filter out NaN values when computing statistics summary = combine(groupby(eltype_df, :algorithm), - :gflops => mean => :avg_gflops, - :gflops => std => :std_gflops, + :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops, + :gflops => (x -> std(filter(!isnan, x))) => :std_gflops, nrow => :num_tests) sort!(summary, :avg_gflops, rev = true)