Skip to content

Commit 30fd92d

Browse files
Improve timeout handling: properly kill timed-out tasks
- Use Channel-based communication between warmup and timer tasks - Properly interrupt timed-out tasks with Base.throwto() - Clean up timer task when warmup completes successfully - Handle exceptions from warmup task properly - Prevents resource leaks from hanging tasks
1 parent dbe9b15 commit 30fd92d

File tree

1 file changed

+38
-8
lines changed

1 file changed

+38
-8
lines changed

lib/LinearSolveAutotune/src/benchmarking.jl

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -156,28 +156,58 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
156156
# Time the warmup run and correctness check
157157
start_time = time()
158158

159+
# Create a channel for communication between tasks
160+
result_channel = Channel(1)
161+
159162
# Warmup run and correctness check with timeout
160163
warmup_task = @async begin
161-
solve(prob, alg)
164+
try
165+
result = solve(prob, alg)
166+
put!(result_channel, result)
167+
catch e
168+
put!(result_channel, e)
169+
end
170+
end
171+
172+
# Timer task to enforce timeout
173+
timer_task = @async begin
174+
sleep(maxtime)
175+
if !istaskdone(warmup_task)
176+
try
177+
Base.throwto(warmup_task, InterruptException())
178+
catch
179+
# Task might be in non-interruptible state
180+
end
181+
put!(result_channel, :timeout)
182+
end
162183
end
163184

164-
# Wait for warmup to complete or timeout
185+
# Wait for result or timeout
165186
warmup_sol = nothing
166-
timeout_wait = maxtime
167-
while !istaskdone(warmup_task) && (time() - start_time) < timeout_wait
168-
sleep(0.1)
187+
result = take!(result_channel)
188+
189+
# Clean up timer task if still running
190+
if !istaskdone(timer_task)
191+
try
192+
Base.throwto(timer_task, InterruptException())
193+
catch
194+
# Timer task might have already finished
195+
end
169196
end
170197

171-
if !istaskdone(warmup_task)
198+
if result === :timeout
172199
# Task timed out
173200
timed_out = true
174201
@warn "Algorithm $name timed out (exceeded $(maxtime)s) for size $n, eltype $eltype. Recording as NaN."
175202
success = false
176203
error_msg = "Timed out (exceeded $(maxtime)s)"
177204
gflops = NaN
205+
elseif result isa Exception
206+
# Task threw an error
207+
throw(result)
178208
else
179-
# Get the result
180-
warmup_sol = fetch(warmup_task)
209+
# Successful completion
210+
warmup_sol = result
181211
elapsed_time = time() - start_time
182212

183213
# Check correctness if reference solution is available

0 commit comments

Comments
 (0)