From dbe9b156199306a42e177539cc94388f45d14f19 Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <accounts@chrisrackauckas.com>
Date: Mon, 11 Aug 2025 11:16:50 -0400
Subject: [PATCH 1/4] Add maxtime parameter to LinearSolveAutotune

- Added maxtime parameter with 100s default to autotune_setup() and benchmark_algorithms()
- Implements timeout handling during accuracy checks and benchmarking
- Records timed out runs as NaN in results
- Updated docstrings and documentation to explain the new parameter
- Prevents hanging on slow algorithms or large matrices
---
 docs/src/tutorials/autotune.md                |  31 ++++++
 .../src/LinearSolveAutotune.jl                |  11 +-
 lib/LinearSolveAutotune/src/benchmarking.jl   | 103 +++++++++++++-----
 3 files changed, 112 insertions(+), 33 deletions(-)

diff --git a/docs/src/tutorials/autotune.md b/docs/src/tutorials/autotune.md
index ad87d6354..f829360a7 100644
--- a/docs/src/tutorials/autotune.md
+++ b/docs/src/tutorials/autotune.md
@@ -132,6 +132,37 @@ results = autotune_setup(
 )
 ```
 
+### Time Limits for Algorithm Tests
+
+Control the maximum time allowed for each algorithm test (including accuracy check):
+
+```julia
+# Default: 100 seconds maximum per algorithm test
+results = autotune_setup()  # maxtime = 100.0
+
+# Quick timeout for fast exploration
+results = autotune_setup(maxtime = 10.0)
+
+# Extended timeout for slow algorithms or large matrices
+results = autotune_setup(
+    maxtime = 300.0,  # 5 minutes per test
+    sizes = [:large, :big]
+)
+
+# Conservative timeout for production benchmarking
+results = autotune_setup(
+    maxtime = 200.0,
+    samples = 10,
+    seconds = 2.0
+)
+```
+
+When an algorithm exceeds the `maxtime` limit:
+- The test is skipped to prevent hanging
+- The result is recorded as `NaN` in the benchmark data
+- A warning is displayed indicating the timeout
+- The benchmark continues with the next algorithm
+
 ### Missing Algorithm Handling
 
 By default, autotune expects all algorithms to be available to ensure complete benchmarking. You can relax this requirement:
diff --git a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
index f698de377..42d89cbeb 100644
--- a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
+++ b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
@@ -158,7 +158,8 @@ end
         seconds::Float64 = 0.5,
         eltypes = (Float32, Float64, ComplexF32, ComplexF64),
         skip_missing_algs::Bool = false,
-        include_fastlapack::Bool = false)
+        include_fastlapack::Bool = false,
+        maxtime::Float64 = 100.0)
 
 Run a comprehensive benchmark of all available LU factorization methods and optionally:
 
@@ -182,6 +183,8 @@ Run a comprehensive benchmark of all available LU factorization methods and opti
   - `eltypes = (Float32, Float64, ComplexF32, ComplexF64)`: Element types to benchmark
   - `skip_missing_algs::Bool = false`: If false, error when expected algorithms are missing; if true, warn instead
   - `include_fastlapack::Bool = false`: If true, includes FastLUFactorization in benchmarks
+  - `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check). 
+    If exceeded, the run is skipped and recorded as NaN
 
 # Returns
 
@@ -216,7 +219,8 @@ function autotune_setup(;
         seconds::Float64 = 0.5,
         eltypes = (Float64,),
         skip_missing_algs::Bool = false,
-        include_fastlapack::Bool = false)
+        include_fastlapack::Bool = false,
+        maxtime::Float64 = 100.0)
     @info "Starting LinearSolve.jl autotune setup..."
     @info "Configuration: sizes=$sizes, set_preferences=$set_preferences"
     @info "Element types to benchmark: $(join(eltypes, ", "))"
@@ -249,8 +253,9 @@ function autotune_setup(;
 
     # Run benchmarks
     @info "Running benchmarks (this may take several minutes)..."
+    @info "Maximum time per algorithm test: $(maxtime)s"
     results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes;
-        samples = samples, seconds = seconds, sizes = sizes)
+        samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime)
 
     # Display results table
     successful_results = filter(row -> row.success, results_df)
diff --git a/lib/LinearSolveAutotune/src/benchmarking.jl b/lib/LinearSolveAutotune/src/benchmarking.jl
index 2b2362f74..1dad9d0b6 100644
--- a/lib/LinearSolveAutotune/src/benchmarking.jl
+++ b/lib/LinearSolveAutotune/src/benchmarking.jl
@@ -73,14 +73,19 @@ end
 
 """
     benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes; 
-                        samples=5, seconds=0.5, sizes=[:small, :medium])
+                        samples=5, seconds=0.5, sizes=[:small, :medium],
+                        maxtime=100.0)
 
 Benchmark the given algorithms across different matrix sizes and element types.
 Returns a DataFrame with results including element type information.
+
+# Arguments
+- `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check). 
+  If the accuracy check exceeds this time, the run is skipped and recorded as NaN.
 """
 function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
         samples = 5, seconds = 0.5, sizes = [:tiny, :small, :medium, :large],
-        check_correctness = true, correctness_tol = 1e0)
+        check_correctness = true, correctness_tol = 1e0, maxtime = 100.0)
 
     # Set benchmark parameters
     old_params = BenchmarkTools.DEFAULT_PARAMETERS
@@ -136,10 +141,11 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
                     ProgressMeter.update!(progress, 
                         desc="Benchmarking $name on $(n)×$(n) $eltype matrix: ")
                     
-                    gflops = 0.0
+                    gflops = NaN  # Use NaN for timed out runs
                     success = true
                     error_msg = ""
                     passed_correctness = true
+                    timed_out = false
 
                     try
                         # Create the linear problem for this test
@@ -147,41 +153,78 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
                             u0 = copy(u0),
                             alias = LinearAliasSpecifier(alias_A = true, alias_b = true))
 
-                        # Warmup run and correctness check
-                        warmup_sol = solve(prob, alg)
+                        # Time the warmup run and correctness check
+                        start_time = time()
                         
-                        # Check correctness if reference solution is available
-                        if check_correctness && reference_solution !== nothing
-                            # Compute relative error
-                            rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u)
-                            
-                            if rel_error > correctness_tol
-                                passed_correctness = false
-                                @warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " *
-                                      "Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " *
-                                      "Algorithm will be excluded from results."
-                                success = false
-                                error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))"
-                            end
+                        # Warmup run and correctness check with timeout
+                        warmup_task = @async begin
+                            solve(prob, alg)
+                        end
+                        
+                        # Wait for warmup to complete or timeout
+                        warmup_sol = nothing
+                        timeout_wait = maxtime
+                        while !istaskdone(warmup_task) && (time() - start_time) < timeout_wait
+                            sleep(0.1)
                         end
                         
-                        # Only benchmark if correctness check passed
-                        if passed_correctness
-                            # Actual benchmark
-                            bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
-                                copy($A), copy($b);
-                                u0 = copy($u0),
-                                alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))
-
-                            # Calculate GFLOPs
-                            min_time_sec = minimum(bench.times) / 1e9
-                            flops = luflop(n, n)
-                            gflops = flops / min_time_sec / 1e9
+                        if !istaskdone(warmup_task)
+                            # Task timed out
+                            timed_out = true
+                            @warn "Algorithm $name timed out (exceeded $(maxtime)s) for size $n, eltype $eltype. Recording as NaN."
+                            success = false
+                            error_msg = "Timed out (exceeded $(maxtime)s)"
+                            gflops = NaN
+                        else
+                            # Get the result
+                            warmup_sol = fetch(warmup_task)
+                            elapsed_time = time() - start_time
+                            
+                            # Check correctness if reference solution is available
+                            if check_correctness && reference_solution !== nothing
+                                # Compute relative error
+                                rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u)
+                                
+                                if rel_error > correctness_tol
+                                    passed_correctness = false
+                                    @warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " *
+                                          "Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " *
+                                          "Algorithm will be excluded from results."
+                                    success = false
+                                    error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))"
+                                    gflops = 0.0
+                                end
+                            end
+                            
+                            # Only benchmark if correctness check passed and we have time remaining
+                            if passed_correctness && !timed_out
+                                # Check if we have enough time remaining for benchmarking
+                                # Allow at least 2x the warmup time for benchmarking
+                                remaining_time = maxtime - elapsed_time
+                                if remaining_time < 2 * elapsed_time
+                                    @warn "Algorithm $name: insufficient time remaining for benchmarking (warmup took $(round(elapsed_time, digits=2))s). Recording as NaN."
+                                    gflops = NaN
+                                    success = false
+                                    error_msg = "Insufficient time for benchmarking"
+                                else
+                                    # Actual benchmark
+                                    bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
+                                        copy($A), copy($b);
+                                        u0 = copy($u0),
+                                        alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))
+
+                                    # Calculate GFLOPs
+                                    min_time_sec = minimum(bench.times) / 1e9
+                                    flops = luflop(n, n)
+                                    gflops = flops / min_time_sec / 1e9
+                                end
+                            end
                         end
 
                     catch e
                         success = false
                         error_msg = string(e)
+                        gflops = 0.0
                         # Don't warn for each failure, just record it
                     end
 

From 30fd92d4eb0f54fca650fb3c4bc2eb21db5d04bf Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <accounts@chrisrackauckas.com>
Date: Mon, 11 Aug 2025 11:30:25 -0400
Subject: [PATCH 2/4] Improve timeout handling: properly kill timed-out tasks

- Use Channel-based communication between warmup and timer tasks
- Properly interrupt timed-out tasks with Base.throwto()
- Clean up timer task when warmup completes successfully
- Handle exceptions from warmup task properly
- Prevents resource leaks from hanging tasks
---
 lib/LinearSolveAutotune/src/benchmarking.jl | 46 +++++++++++++++++----
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/lib/LinearSolveAutotune/src/benchmarking.jl b/lib/LinearSolveAutotune/src/benchmarking.jl
index 1dad9d0b6..6e3cdd9cb 100644
--- a/lib/LinearSolveAutotune/src/benchmarking.jl
+++ b/lib/LinearSolveAutotune/src/benchmarking.jl
@@ -156,28 +156,58 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
                         # Time the warmup run and correctness check
                         start_time = time()
                         
+                        # Create a channel for communication between tasks
+                        result_channel = Channel(1)
+                        
                         # Warmup run and correctness check with timeout
                         warmup_task = @async begin
-                            solve(prob, alg)
+                            try
+                                result = solve(prob, alg)
+                                put!(result_channel, result)
+                            catch e
+                                put!(result_channel, e)
+                            end
+                        end
+                        
+                        # Timer task to enforce timeout
+                        timer_task = @async begin
+                            sleep(maxtime)
+                            if !istaskdone(warmup_task)
+                                try
+                                    Base.throwto(warmup_task, InterruptException())
+                                catch
+                                    # Task might be in non-interruptible state
+                                end
+                                put!(result_channel, :timeout)
+                            end
                         end
                         
-                        # Wait for warmup to complete or timeout
+                        # Wait for result or timeout
                         warmup_sol = nothing
-                        timeout_wait = maxtime
-                        while !istaskdone(warmup_task) && (time() - start_time) < timeout_wait
-                            sleep(0.1)
+                        result = take!(result_channel)
+                        
+                        # Clean up timer task if still running
+                        if !istaskdone(timer_task)
+                            try
+                                Base.throwto(timer_task, InterruptException())
+                            catch
+                                # Timer task might have already finished
+                            end
                         end
                         
-                        if !istaskdone(warmup_task)
+                        if result === :timeout
                             # Task timed out
                             timed_out = true
                             @warn "Algorithm $name timed out (exceeded $(maxtime)s) for size $n, eltype $eltype. Recording as NaN."
                             success = false
                             error_msg = "Timed out (exceeded $(maxtime)s)"
                             gflops = NaN
+                        elseif result isa Exception
+                            # Task threw an error
+                            throw(result)
                         else
-                            # Get the result
-                            warmup_sol = fetch(warmup_task)
+                            # Successful completion
+                            warmup_sol = result
                             elapsed_time = time() - start_time
                             
                             # Check correctness if reference solution is available

From ef0db19b3667f7c9c92f1995d96aaeba452599fc Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Mon, 11 Aug 2025 11:33:31 -0400
Subject: [PATCH 3/4] Update lib/LinearSolveAutotune/src/benchmarking.jl

---
 lib/LinearSolveAutotune/src/benchmarking.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/LinearSolveAutotune/src/benchmarking.jl b/lib/LinearSolveAutotune/src/benchmarking.jl
index 6e3cdd9cb..0a178cb8c 100644
--- a/lib/LinearSolveAutotune/src/benchmarking.jl
+++ b/lib/LinearSolveAutotune/src/benchmarking.jl
@@ -254,7 +254,7 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
                     catch e
                         success = false
                         error_msg = string(e)
-                        gflops = 0.0
+                        gflops = NaN
                         # Don't warn for each failure, just record it
                     end
 

From b74eed8d35d11c03c05553978f96ab821a28f094 Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <accounts@chrisrackauckas.com>
Date: Mon, 11 Aug 2025 11:36:38 -0400
Subject: [PATCH 4/4] Make analysis tools robust to NaN values from timeouts

- Filter out NaN values when computing mean, max, and std statistics
- Exclude NaN values from plots to avoid visualization errors
- Report number of timed-out tests in summary output
- Ensure categorize_results excludes NaN values when selecting best algorithms
- All aggregation functions now properly handle NaN values that indicate timeouts

This ensures the autotuning system works correctly even when some tests timeout,
which is expected behavior for large matrix sizes or slow algorithms.
---
 .../src/LinearSolveAutotune.jl                | 30 +++++++++++++------
 lib/LinearSolveAutotune/src/benchmarking.jl   |  9 +++---
 lib/LinearSolveAutotune/src/plotting.jl       |  2 +-
 lib/LinearSolveAutotune/src/telemetry.jl      |  5 ++--
 4 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
index 42d89cbeb..39276726c 100644
--- a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
+++ b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
@@ -78,13 +78,13 @@ function Base.show(io::IO, results::AutotuneResults)
     println(io, "  • Julia: ", get(results.sysinfo, "julia_version", "Unknown"))
     println(io, "  • Threads: ", get(results.sysinfo, "num_threads", "Unknown"), " (BLAS: ", get(results.sysinfo, "blas_num_threads", "Unknown"), ")")
     
-    # Results summary
-    successful_results = filter(row -> row.success, results.results_df)
+    # Results summary - filter out NaN values
+    successful_results = filter(row -> row.success && !isnan(row.gflops), results.results_df)
     if nrow(successful_results) > 0
         println(io, "\n🏆 Top Performing Algorithms:")
         summary = combine(groupby(successful_results, :algorithm),
-            :gflops => mean => :avg_gflops,
-            :gflops => maximum => :max_gflops,
+            :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
+            :gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
             nrow => :num_tests)
         sort!(summary, :avg_gflops, rev = true)
         
@@ -104,6 +104,12 @@ function Base.show(io::IO, results::AutotuneResults)
     println(io, "📏 Matrix Sizes: ", minimum(sizes), "×", minimum(sizes), 
             " to ", maximum(sizes), "×", maximum(sizes))
     
+    # Report timeouts if any
+    timeout_results = filter(row -> isnan(row.gflops), results.results_df)
+    if nrow(timeout_results) > 0
+        println(io, "⏱️  Timed Out: ", nrow(timeout_results), " tests exceeded time limit")
+    end
+    
     # Call to action - reordered
     println(io, "\n" * "="^60)
     println(io, "🚀 For comprehensive results, consider running:")
@@ -257,15 +263,21 @@ function autotune_setup(;
     results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes;
         samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime)
 
-    # Display results table
-    successful_results = filter(row -> row.success, results_df)
+    # Display results table - filter out NaN values
+    successful_results = filter(row -> row.success && !isnan(row.gflops), results_df)
+    timeout_results = filter(row -> isnan(row.gflops), results_df)
+    
+    if nrow(timeout_results) > 0
+        @info "$(nrow(timeout_results)) tests timed out (exceeded $(maxtime)s limit)"
+    end
+    
     if nrow(successful_results) > 0
         @info "Benchmark completed successfully!"
 
-        # Create summary table for display
+        # Create summary table for display - handle NaN values
         summary = combine(groupby(successful_results, :algorithm),
-            :gflops => mean => :avg_gflops,
-            :gflops => maximum => :max_gflops,
+            :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
+            :gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
             nrow => :num_tests)
         sort!(summary, :avg_gflops, rev = true)
 
diff --git a/lib/LinearSolveAutotune/src/benchmarking.jl b/lib/LinearSolveAutotune/src/benchmarking.jl
index 0a178cb8c..21d868647 100644
--- a/lib/LinearSolveAutotune/src/benchmarking.jl
+++ b/lib/LinearSolveAutotune/src/benchmarking.jl
@@ -325,8 +325,8 @@ Categorize the benchmark results into size ranges and find the best algorithm fo
 For complex types, avoids RFLUFactorization if possible due to known issues.
 """
 function categorize_results(df::DataFrame)
-    # Filter successful results
-    successful_df = filter(row -> row.success, df)
+    # Filter successful results and exclude NaN values
+    successful_df = filter(row -> row.success && !isnan(row.gflops), df)
 
     if nrow(successful_df) == 0
         @warn "No successful benchmark results found!"
@@ -366,8 +366,9 @@ function categorize_results(df::DataFrame)
                 continue
             end
 
-            # Calculate average GFLOPs for each algorithm in this range
-            avg_results = combine(groupby(range_df, :algorithm), :gflops => mean => :avg_gflops)
+            # Calculate average GFLOPs for each algorithm in this range, excluding NaN values
+            avg_results = combine(groupby(range_df, :algorithm), 
+                :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops)
             
             # Sort by performance
             sort!(avg_results, :avg_gflops, rev=true)
diff --git a/lib/LinearSolveAutotune/src/plotting.jl b/lib/LinearSolveAutotune/src/plotting.jl
index 02e1c7a8c..ec841cd79 100644
--- a/lib/LinearSolveAutotune/src/plotting.jl
+++ b/lib/LinearSolveAutotune/src/plotting.jl
@@ -44,7 +44,7 @@ function create_benchmark_plots(df::DataFrame; title_base = "LinearSolve.jl LU F
 
         # Plot each algorithm for this element type
         for alg in algorithms
-            alg_df = filter(row -> row.algorithm == alg, eltype_df)
+            alg_df = filter(row -> row.algorithm == alg && !isnan(row.gflops), eltype_df)
             if nrow(alg_df) > 0
                 # Sort by size for proper line plotting
                 sort!(alg_df, :size)
diff --git a/lib/LinearSolveAutotune/src/telemetry.jl b/lib/LinearSolveAutotune/src/telemetry.jl
index f41634d67..ab6e66513 100644
--- a/lib/LinearSolveAutotune/src/telemetry.jl
+++ b/lib/LinearSolveAutotune/src/telemetry.jl
@@ -365,9 +365,10 @@ function format_detailed_results_markdown(df::DataFrame)
         end
         
         # Create a summary table with average performance per algorithm for this element type
+        # Filter out NaN values when computing statistics
         summary = combine(groupby(eltype_df, :algorithm), 
-                         :gflops => mean => :avg_gflops, 
-                         :gflops => std => :std_gflops,
+                         :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops, 
+                         :gflops => (x -> std(filter(!isnan, x))) => :std_gflops,
                          nrow => :num_tests)
         sort!(summary, :avg_gflops, rev = true)