Skip to content

Commit dbe9b15

Browse files
Add maxtime parameter to LinearSolveAutotune
- Added maxtime parameter with 100s default to autotune_setup() and benchmark_algorithms() - Implements timeout handling during accuracy checks and benchmarking - Records timed out runs as NaN in results - Updated docstrings and documentation to explain the new parameter - Prevents hanging on slow algorithms or large matrices
1 parent 06ce5b1 commit dbe9b15

File tree

3 files changed

+112
-33
lines changed

3 files changed

+112
-33
lines changed

docs/src/tutorials/autotune.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,37 @@ results = autotune_setup(
132132
)
133133
```
134134

135+
### Time Limits for Algorithm Tests
136+
137+
Control the maximum time allowed for each algorithm test (including accuracy check):
138+
139+
```julia
140+
# Default: 100 seconds maximum per algorithm test
141+
results = autotune_setup() # maxtime = 100.0
142+
143+
# Quick timeout for fast exploration
144+
results = autotune_setup(maxtime = 10.0)
145+
146+
# Extended timeout for slow algorithms or large matrices
147+
results = autotune_setup(
148+
maxtime = 300.0, # 5 minutes per test
149+
sizes = [:large, :big]
150+
)
151+
152+
# Conservative timeout for production benchmarking
153+
results = autotune_setup(
154+
maxtime = 200.0,
155+
samples = 10,
156+
seconds = 2.0
157+
)
158+
```
159+
160+
When an algorithm exceeds the `maxtime` limit:
161+
- The test is skipped to prevent hanging
162+
- The result is recorded as `NaN` in the benchmark data
163+
- A warning is displayed indicating the timeout
164+
- The benchmark continues with the next algorithm
165+
135166
### Missing Algorithm Handling
136167

137168
By default, autotune expects all algorithms to be available to ensure complete benchmarking. You can relax this requirement:

lib/LinearSolveAutotune/src/LinearSolveAutotune.jl

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@ end
158158
seconds::Float64 = 0.5,
159159
eltypes = (Float32, Float64, ComplexF32, ComplexF64),
160160
skip_missing_algs::Bool = false,
161-
include_fastlapack::Bool = false)
161+
include_fastlapack::Bool = false,
162+
maxtime::Float64 = 100.0)
162163
163164
Run a comprehensive benchmark of all available LU factorization methods and optionally:
164165
@@ -182,6 +183,8 @@ Run a comprehensive benchmark of all available LU factorization methods and opti
182183
- `eltypes = (Float32, Float64, ComplexF32, ComplexF64)`: Element types to benchmark
183184
- `skip_missing_algs::Bool = false`: If false, error when expected algorithms are missing; if true, warn instead
184185
- `include_fastlapack::Bool = false`: If true, includes FastLUFactorization in benchmarks
186+
- `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check).
187+
If exceeded, the run is skipped and recorded as NaN
185188
186189
# Returns
187190
@@ -216,7 +219,8 @@ function autotune_setup(;
216219
seconds::Float64 = 0.5,
217220
eltypes = (Float64,),
218221
skip_missing_algs::Bool = false,
219-
include_fastlapack::Bool = false)
222+
include_fastlapack::Bool = false,
223+
maxtime::Float64 = 100.0)
220224
@info "Starting LinearSolve.jl autotune setup..."
221225
@info "Configuration: sizes=$sizes, set_preferences=$set_preferences"
222226
@info "Element types to benchmark: $(join(eltypes, ", "))"
@@ -249,8 +253,9 @@ function autotune_setup(;
249253

250254
# Run benchmarks
251255
@info "Running benchmarks (this may take several minutes)..."
256+
@info "Maximum time per algorithm test: $(maxtime)s"
252257
results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes;
253-
samples = samples, seconds = seconds, sizes = sizes)
258+
samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime)
254259

255260
# Display results table
256261
successful_results = filter(row -> row.success, results_df)

lib/LinearSolveAutotune/src/benchmarking.jl

Lines changed: 73 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,19 @@ end
7373

7474
"""
7575
benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
76-
samples=5, seconds=0.5, sizes=[:small, :medium])
76+
samples=5, seconds=0.5, sizes=[:small, :medium],
77+
maxtime=100.0)
7778
7879
Benchmark the given algorithms across different matrix sizes and element types.
7980
Returns a DataFrame with results including element type information.
81+
82+
# Arguments
83+
- `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check).
84+
If the accuracy check exceeds this time, the run is skipped and recorded as NaN.
8085
"""
8186
function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
8287
samples = 5, seconds = 0.5, sizes = [:tiny, :small, :medium, :large],
83-
check_correctness = true, correctness_tol = 1e0)
88+
check_correctness = true, correctness_tol = 1e0, maxtime = 100.0)
8489

8590
# Set benchmark parameters
8691
old_params = BenchmarkTools.DEFAULT_PARAMETERS
@@ -136,52 +141,90 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
136141
ProgressMeter.update!(progress,
137142
desc="Benchmarking $name on $(n)×$(n) $eltype matrix: ")
138143

139-
gflops = 0.0
144+
gflops = NaN # Use NaN for timed out runs
140145
success = true
141146
error_msg = ""
142147
passed_correctness = true
148+
timed_out = false
143149

144150
try
145151
# Create the linear problem for this test
146152
prob = LinearProblem(copy(A), copy(b);
147153
u0 = copy(u0),
148154
alias = LinearAliasSpecifier(alias_A = true, alias_b = true))
149155

150-
# Warmup run and correctness check
151-
warmup_sol = solve(prob, alg)
156+
# Time the warmup run and correctness check
157+
start_time = time()
152158

153-
# Check correctness if reference solution is available
154-
if check_correctness && reference_solution !== nothing
155-
# Compute relative error
156-
rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u)
157-
158-
if rel_error > correctness_tol
159-
passed_correctness = false
160-
@warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " *
161-
"Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " *
162-
"Algorithm will be excluded from results."
163-
success = false
164-
error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))"
165-
end
159+
# Warmup run and correctness check with timeout
160+
warmup_task = @async begin
161+
solve(prob, alg)
162+
end
163+
164+
# Wait for warmup to complete or timeout
165+
warmup_sol = nothing
166+
timeout_wait = maxtime
167+
while !istaskdone(warmup_task) && (time() - start_time) < timeout_wait
168+
sleep(0.1)
166169
end
167170

168-
# Only benchmark if correctness check passed
169-
if passed_correctness
170-
# Actual benchmark
171-
bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
172-
copy($A), copy($b);
173-
u0 = copy($u0),
174-
alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))
175-
176-
# Calculate GFLOPs
177-
min_time_sec = minimum(bench.times) / 1e9
178-
flops = luflop(n, n)
179-
gflops = flops / min_time_sec / 1e9
171+
if !istaskdone(warmup_task)
172+
# Task timed out
173+
timed_out = true
174+
@warn "Algorithm $name timed out (exceeded $(maxtime)s) for size $n, eltype $eltype. Recording as NaN."
175+
success = false
176+
error_msg = "Timed out (exceeded $(maxtime)s)"
177+
gflops = NaN
178+
else
179+
# Get the result
180+
warmup_sol = fetch(warmup_task)
181+
elapsed_time = time() - start_time
182+
183+
# Check correctness if reference solution is available
184+
if check_correctness && reference_solution !== nothing
185+
# Compute relative error
186+
rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u)
187+
188+
if rel_error > correctness_tol
189+
passed_correctness = false
190+
@warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " *
191+
"Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " *
192+
"Algorithm will be excluded from results."
193+
success = false
194+
error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))"
195+
gflops = 0.0
196+
end
197+
end
198+
199+
# Only benchmark if correctness check passed and we have time remaining
200+
if passed_correctness && !timed_out
201+
# Check if we have enough time remaining for benchmarking
202+
# Allow at least 2x the warmup time for benchmarking
203+
remaining_time = maxtime - elapsed_time
204+
if remaining_time < 2 * elapsed_time
205+
@warn "Algorithm $name: insufficient time remaining for benchmarking (warmup took $(round(elapsed_time, digits=2))s). Recording as NaN."
206+
gflops = NaN
207+
success = false
208+
error_msg = "Insufficient time for benchmarking"
209+
else
210+
# Actual benchmark
211+
bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
212+
copy($A), copy($b);
213+
u0 = copy($u0),
214+
alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))
215+
216+
# Calculate GFLOPs
217+
min_time_sec = minimum(bench.times) / 1e9
218+
flops = luflop(n, n)
219+
gflops = flops / min_time_sec / 1e9
220+
end
221+
end
180222
end
181223

182224
catch e
183225
success = false
184226
error_msg = string(e)
227+
gflops = 0.0
185228
# Don't warn for each failure, just record it
186229
end
187230

0 commit comments

Comments
 (0)