SciML · ChrisRackauckas · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025
diff --git a/docs/src/tutorials/autotune.md b/docs/src/tutorials/autotune.md
@@ -132,6 +132,37 @@ results = autotune_setup(
 )
 ```
 
+### Time Limits for Algorithm Tests
+
+Control the maximum time allowed for each algorithm test (including accuracy check):
+
+```julia
+# Default: 100 seconds maximum per algorithm test
+results = autotune_setup()  # maxtime = 100.0
+
+# Quick timeout for fast exploration
+results = autotune_setup(maxtime = 10.0)
+
+# Extended timeout for slow algorithms or large matrices
+results = autotune_setup(
+    maxtime = 300.0,  # 5 minutes per test
+    sizes = [:large, :big]
+)
+
+# Conservative timeout for production benchmarking
+results = autotune_setup(
+    maxtime = 200.0,
+    samples = 10,
+    seconds = 2.0
+)
+```
+
+When an algorithm exceeds the `maxtime` limit:
+- The test is skipped to prevent hanging
+- The result is recorded as `NaN` in the benchmark data
+- A warning is displayed indicating the timeout
+- The benchmark continues with the next algorithm
+
 ### Missing Algorithm Handling
 
 By default, autotune expects all algorithms to be available to ensure complete benchmarking. You can relax this requirement:

diff --git a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
@@ -78,13 +78,13 @@ function Base.show(io::IO, results::AutotuneResults)
     println(io, "  • Julia: ", get(results.sysinfo, "julia_version", "Unknown"))
     println(io, "  • Threads: ", get(results.sysinfo, "num_threads", "Unknown"), " (BLAS: ", get(results.sysinfo, "blas_num_threads", "Unknown"), ")")
 
-    # Results summary
-    successful_results = filter(row -> row.success, results.results_df)
+    # Results summary - filter out NaN values
+    successful_results = filter(row -> row.success && !isnan(row.gflops), results.results_df)
     if nrow(successful_results) > 0
         println(io, "\n🏆 Top Performing Algorithms:")
         summary = combine(groupby(successful_results, :algorithm),
-            :gflops => mean => :avg_gflops,
-            :gflops => maximum => :max_gflops,
+            :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
+            :gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
             nrow => :num_tests)
         sort!(summary, :avg_gflops, rev = true)
 
@@ -104,6 +104,12 @@ function Base.show(io::IO, results::AutotuneResults)
     println(io, "📏 Matrix Sizes: ", minimum(sizes), "×", minimum(sizes), 
             " to ", maximum(sizes), "×", maximum(sizes))
 
+    # Report timeouts if any
+    timeout_results = filter(row -> isnan(row.gflops), results.results_df)
+    if nrow(timeout_results) > 0
+        println(io, "⏱️  Timed Out: ", nrow(timeout_results), " tests exceeded time limit")
+    end
+
     # Call to action - reordered
     println(io, "\n" * "="^60)
     println(io, "🚀 For comprehensive results, consider running:")
@@ -158,7 +164,8 @@ end
         seconds::Float64 = 0.5,
         eltypes = (Float32, Float64, ComplexF32, ComplexF64),
         skip_missing_algs::Bool = false,
-        include_fastlapack::Bool = false)
+        include_fastlapack::Bool = false,
+        maxtime::Float64 = 100.0)
 
 Run a comprehensive benchmark of all available LU factorization methods and optionally:
 
@@ -182,6 +189,8 @@ Run a comprehensive benchmark of all available LU factorization methods and opti
   - `eltypes = (Float32, Float64, ComplexF32, ComplexF64)`: Element types to benchmark
   - `skip_missing_algs::Bool = false`: If false, error when expected algorithms are missing; if true, warn instead
   - `include_fastlapack::Bool = false`: If true, includes FastLUFactorization in benchmarks
+  - `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check). 
+    If exceeded, the run is skipped and recorded as NaN
 
 # Returns
 
@@ -216,7 +225,8 @@ function autotune_setup(;
         seconds::Float64 = 0.5,
         eltypes = (Float64,),
         skip_missing_algs::Bool = false,
-        include_fastlapack::Bool = false)
+        include_fastlapack::Bool = false,
+        maxtime::Float64 = 100.0)
     @info "Starting LinearSolve.jl autotune setup..."
     @info "Configuration: sizes=$sizes, set_preferences=$set_preferences"
     @info "Element types to benchmark: $(join(eltypes, ", "))"
@@ -249,18 +259,25 @@ function autotune_setup(;
 
     # Run benchmarks
     @info "Running benchmarks (this may take several minutes)..."
+    @info "Maximum time per algorithm test: $(maxtime)s"
     results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes;
-        samples = samples, seconds = seconds, sizes = sizes)
+        samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime)
 
-    # Display results table
-    successful_results = filter(row -> row.success, results_df)
+    # Display results table - filter out NaN values
+    successful_results = filter(row -> row.success && !isnan(row.gflops), results_df)
+    timeout_results = filter(row -> isnan(row.gflops), results_df)
+
+    if nrow(timeout_results) > 0
+        @info "$(nrow(timeout_results)) tests timed out (exceeded $(maxtime)s limit)"
+    end
+
     if nrow(successful_results) > 0
         @info "Benchmark completed successfully!"
 
-        # Create summary table for display
+        # Create summary table for display - handle NaN values
         summary = combine(groupby(successful_results, :algorithm),
-            :gflops => mean => :avg_gflops,
-            :gflops => maximum => :max_gflops,
+            :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
+            :gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
             nrow => :num_tests)
         sort!(summary, :avg_gflops, rev = true)
 

diff --git a/lib/LinearSolveAutotune/src/benchmarking.jl b/lib/LinearSolveAutotune/src/benchmarking.jl
@@ -73,14 +73,19 @@ end
 
 """
     benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes; 
-                        samples=5, seconds=0.5, sizes=[:small, :medium])
+                        samples=5, seconds=0.5, sizes=[:small, :medium],
+                        maxtime=100.0)
 
 Benchmark the given algorithms across different matrix sizes and element types.
 Returns a DataFrame with results including element type information.
+
+# Arguments
+- `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check). 
+  If the accuracy check exceeds this time, the run is skipped and recorded as NaN.
 """
 function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
         samples = 5, seconds = 0.5, sizes = [:tiny, :small, :medium, :large],
-        check_correctness = true, correctness_tol = 1e0)
+        check_correctness = true, correctness_tol = 1e0, maxtime = 100.0)
 
     # Set benchmark parameters
     old_params = BenchmarkTools.DEFAULT_PARAMETERS
@@ -136,52 +141,120 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
                     ProgressMeter.update!(progress, 
                         desc="Benchmarking $name on $(n)×$(n) $eltype matrix: ")
 
-                    gflops = 0.0
+                    gflops = NaN  # Use NaN for timed out runs
                     success = true
                     error_msg = ""
                     passed_correctness = true
+                    timed_out = false
 
                     try
                         # Create the linear problem for this test
                         prob = LinearProblem(copy(A), copy(b);
                             u0 = copy(u0),
                             alias = LinearAliasSpecifier(alias_A = true, alias_b = true))
 
-                        # Warmup run and correctness check
-                        warmup_sol = solve(prob, alg)
+                        # Time the warmup run and correctness check
+                        start_time = time()
 
-                        # Check correctness if reference solution is available
-                        if check_correctness && reference_solution !== nothing
-                            # Compute relative error
-                            rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u)
-
-                            if rel_error > correctness_tol
-                                passed_correctness = false
-                                @warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " *
-                                      "Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " *
-                                      "Algorithm will be excluded from results."
-                                success = false
-                                error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))"
+                        # Create a channel for communication between tasks
+                        result_channel = Channel(1)
+
+                        # Warmup run and correctness check with timeout
+                        warmup_task = @async begin
+                            try
+                                result = solve(prob, alg)
+                                put!(result_channel, result)
+                            catch e
+                                put!(result_channel, e)
+                            end
+                        end
+
+                        # Timer task to enforce timeout
+                        timer_task = @async begin
+                            sleep(maxtime)
+                            if !istaskdone(warmup_task)
+                                try
+                                    Base.throwto(warmup_task, InterruptException())
+                                catch
+                                    # Task might be in non-interruptible state
+                                end
+                                put!(result_channel, :timeout)
+                            end
+                        end
+
+                        # Wait for result or timeout
+                        warmup_sol = nothing
+                        result = take!(result_channel)
+
+                        # Clean up timer task if still running
+                        if !istaskdone(timer_task)
+                            try
+                                Base.throwto(timer_task, InterruptException())
+                            catch
+                                # Timer task might have already finished
                             end
                         end
 
-                        # Only benchmark if correctness check passed
-                        if passed_correctness
-                            # Actual benchmark
-                            bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
-                                copy($A), copy($b);
-                                u0 = copy($u0),
-                                alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))
-
-                            # Calculate GFLOPs
-                            min_time_sec = minimum(bench.times) / 1e9
-                            flops = luflop(n, n)
-                            gflops = flops / min_time_sec / 1e9
+                        if result === :timeout
+                            # Task timed out
+                            timed_out = true
+                            @warn "Algorithm $name timed out (exceeded $(maxtime)s) for size $n, eltype $eltype. Recording as NaN."
+                            success = false
+                            error_msg = "Timed out (exceeded $(maxtime)s)"
+                            gflops = NaN
+                        elseif result isa Exception
+                            # Task threw an error
+                            throw(result)
+                        else
+                            # Successful completion
+                            warmup_sol = result
+                            elapsed_time = time() - start_time
+
+                            # Check correctness if reference solution is available
+                            if check_correctness && reference_solution !== nothing
+                                # Compute relative error
+                                rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u)
+
+                                if rel_error > correctness_tol
+                                    passed_correctness = false
+                                    @warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " *
+                                          "Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " *
+                                          "Algorithm will be excluded from results."
+                                    success = false
+                                    error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))"
+                                    gflops = 0.0
+                                end
+                            end
+
+                            # Only benchmark if correctness check passed and we have time remaining
+                            if passed_correctness && !timed_out
+                                # Check if we have enough time remaining for benchmarking
+                                # Allow at least 2x the warmup time for benchmarking
+                                remaining_time = maxtime - elapsed_time
+                                if remaining_time < 2 * elapsed_time
+                                    @warn "Algorithm $name: insufficient time remaining for benchmarking (warmup took $(round(elapsed_time, digits=2))s). Recording as NaN."
+                                    gflops = NaN
+                                    success = false
+                                    error_msg = "Insufficient time for benchmarking"
+                                else
+                                    # Actual benchmark
+                                    bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
+                                        copy($A), copy($b);
+                                        u0 = copy($u0),
+                                        alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))
+
+                                    # Calculate GFLOPs
+                                    min_time_sec = minimum(bench.times) / 1e9
+                                    flops = luflop(n, n)
+                                    gflops = flops / min_time_sec / 1e9
+                                end
+                            end
                         end
 
                     catch e
                         success = false
                         error_msg = string(e)
+                        gflops = NaN
                         # Don't warn for each failure, just record it
                     end
 
@@ -252,8 +325,8 @@ Categorize the benchmark results into size ranges and find the best algorithm fo
 For complex types, avoids RFLUFactorization if possible due to known issues.
 """
 function categorize_results(df::DataFrame)
-    # Filter successful results
-    successful_df = filter(row -> row.success, df)
+    # Filter successful results and exclude NaN values
+    successful_df = filter(row -> row.success && !isnan(row.gflops), df)
 
     if nrow(successful_df) == 0
         @warn "No successful benchmark results found!"
@@ -293,8 +366,9 @@ function categorize_results(df::DataFrame)
                 continue
             end
 
-            # Calculate average GFLOPs for each algorithm in this range
-            avg_results = combine(groupby(range_df, :algorithm), :gflops => mean => :avg_gflops)
+            # Calculate average GFLOPs for each algorithm in this range, excluding NaN values
+            avg_results = combine(groupby(range_df, :algorithm), 
+                :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops)
 
             # Sort by performance
             sort!(avg_results, :avg_gflops, rev=true)

diff --git a/lib/LinearSolveAutotune/src/plotting.jl b/lib/LinearSolveAutotune/src/plotting.jl
@@ -44,7 +44,7 @@ function create_benchmark_plots(df::DataFrame; title_base = "LinearSolve.jl LU F
 
         # Plot each algorithm for this element type
         for alg in algorithms
-            alg_df = filter(row -> row.algorithm == alg, eltype_df)
+            alg_df = filter(row -> row.algorithm == alg && !isnan(row.gflops), eltype_df)
             if nrow(alg_df) > 0
                 # Sort by size for proper line plotting
                 sort!(alg_df, :size)

diff --git a/lib/LinearSolveAutotune/src/telemetry.jl b/lib/LinearSolveAutotune/src/telemetry.jl
@@ -365,9 +365,10 @@ function format_detailed_results_markdown(df::DataFrame)
         end
 
         # Create a summary table with average performance per algorithm for this element type
+        # Filter out NaN values when computing statistics
         summary = combine(groupby(eltype_df, :algorithm), 
-                         :gflops => mean => :avg_gflops, 
-                         :gflops => std => :std_gflops,
+                         :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops, 
+                         :gflops => (x -> std(filter(!isnan, x))) => :std_gflops,
                          nrow => :num_tests)
         sort!(summary, :avg_gflops, rev = true)