Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions docs/src/tutorials/autotune.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,37 @@ results = autotune_setup(
)
```

### Time Limits for Algorithm Tests

Control the maximum time allowed for each algorithm test (including accuracy check):

```julia
# Default: 100 seconds maximum per algorithm test
results = autotune_setup() # maxtime = 100.0

# Quick timeout for fast exploration
results = autotune_setup(maxtime = 10.0)

# Extended timeout for slow algorithms or large matrices
results = autotune_setup(
maxtime = 300.0, # 5 minutes per test
sizes = [:large, :big]
)

# Conservative timeout for production benchmarking
results = autotune_setup(
maxtime = 200.0,
samples = 10,
seconds = 2.0
)
```

When an algorithm exceeds the `maxtime` limit:
- The test is skipped to prevent hanging
- The result is recorded as `NaN` in the benchmark data
- A warning is displayed indicating the timeout
- The benchmark continues with the next algorithm

### Missing Algorithm Handling

By default, autotune expects all algorithms to be available to ensure complete benchmarking. You can relax this requirement:
Expand Down
41 changes: 29 additions & 12 deletions lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@ function Base.show(io::IO, results::AutotuneResults)
println(io, " • Julia: ", get(results.sysinfo, "julia_version", "Unknown"))
println(io, " • Threads: ", get(results.sysinfo, "num_threads", "Unknown"), " (BLAS: ", get(results.sysinfo, "blas_num_threads", "Unknown"), ")")

# Results summary
successful_results = filter(row -> row.success, results.results_df)
# Results summary - filter out NaN values
successful_results = filter(row -> row.success && !isnan(row.gflops), results.results_df)
if nrow(successful_results) > 0
println(io, "\n🏆 Top Performing Algorithms:")
summary = combine(groupby(successful_results, :algorithm),
:gflops => mean => :avg_gflops,
:gflops => maximum => :max_gflops,
:gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
:gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
nrow => :num_tests)
sort!(summary, :avg_gflops, rev = true)

Expand All @@ -104,6 +104,12 @@ function Base.show(io::IO, results::AutotuneResults)
println(io, "📏 Matrix Sizes: ", minimum(sizes), "×", minimum(sizes),
" to ", maximum(sizes), "×", maximum(sizes))

# Report timeouts if any
timeout_results = filter(row -> isnan(row.gflops), results.results_df)
if nrow(timeout_results) > 0
println(io, "⏱️ Timed Out: ", nrow(timeout_results), " tests exceeded time limit")
end

# Call to action - reordered
println(io, "\n" * "="^60)
println(io, "🚀 For comprehensive results, consider running:")
Expand Down Expand Up @@ -158,7 +164,8 @@ end
seconds::Float64 = 0.5,
eltypes = (Float32, Float64, ComplexF32, ComplexF64),
skip_missing_algs::Bool = false,
include_fastlapack::Bool = false)
include_fastlapack::Bool = false,
maxtime::Float64 = 100.0)

Run a comprehensive benchmark of all available LU factorization methods and optionally:

Expand All @@ -182,6 +189,8 @@ Run a comprehensive benchmark of all available LU factorization methods and opti
- `eltypes = (Float32, Float64, ComplexF32, ComplexF64)`: Element types to benchmark
- `skip_missing_algs::Bool = false`: If false, error when expected algorithms are missing; if true, warn instead
- `include_fastlapack::Bool = false`: If true, includes FastLUFactorization in benchmarks
- `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check).
If exceeded, the run is skipped and recorded as NaN

# Returns

Expand Down Expand Up @@ -216,7 +225,8 @@ function autotune_setup(;
seconds::Float64 = 0.5,
eltypes = (Float64,),
skip_missing_algs::Bool = false,
include_fastlapack::Bool = false)
include_fastlapack::Bool = false,
maxtime::Float64 = 100.0)
@info "Starting LinearSolve.jl autotune setup..."
@info "Configuration: sizes=$sizes, set_preferences=$set_preferences"
@info "Element types to benchmark: $(join(eltypes, ", "))"
Expand Down Expand Up @@ -249,18 +259,25 @@ function autotune_setup(;

# Run benchmarks
@info "Running benchmarks (this may take several minutes)..."
@info "Maximum time per algorithm test: $(maxtime)s"
results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes;
samples = samples, seconds = seconds, sizes = sizes)
samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime)

# Display results table
successful_results = filter(row -> row.success, results_df)
# Display results table - filter out NaN values
successful_results = filter(row -> row.success && !isnan(row.gflops), results_df)
timeout_results = filter(row -> isnan(row.gflops), results_df)

if nrow(timeout_results) > 0
@info "$(nrow(timeout_results)) tests timed out (exceeded $(maxtime)s limit)"
end

if nrow(successful_results) > 0
@info "Benchmark completed successfully!"

# Create summary table for display
# Create summary table for display - handle NaN values
summary = combine(groupby(successful_results, :algorithm),
:gflops => mean => :avg_gflops,
:gflops => maximum => :max_gflops,
:gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
:gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
nrow => :num_tests)
sort!(summary, :avg_gflops, rev = true)

Expand Down
140 changes: 107 additions & 33 deletions lib/LinearSolveAutotune/src/benchmarking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,19 @@ end

"""
benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
samples=5, seconds=0.5, sizes=[:small, :medium])
samples=5, seconds=0.5, sizes=[:small, :medium],
maxtime=100.0)

Benchmark the given algorithms across different matrix sizes and element types.
Returns a DataFrame with results including element type information.

# Arguments
- `maxtime::Float64 = 100.0`: Maximum time in seconds for each algorithm test (including accuracy check).
If the accuracy check exceeds this time, the run is skipped and recorded as NaN.
"""
function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
samples = 5, seconds = 0.5, sizes = [:tiny, :small, :medium, :large],
check_correctness = true, correctness_tol = 1e0)
check_correctness = true, correctness_tol = 1e0, maxtime = 100.0)

# Set benchmark parameters
old_params = BenchmarkTools.DEFAULT_PARAMETERS
Expand Down Expand Up @@ -136,52 +141,120 @@ function benchmark_algorithms(matrix_sizes, algorithms, alg_names, eltypes;
ProgressMeter.update!(progress,
desc="Benchmarking $name on $(n)×$(n) $eltype matrix: ")

gflops = 0.0
gflops = NaN # Use NaN for timed out runs
success = true
error_msg = ""
passed_correctness = true
timed_out = false

try
# Create the linear problem for this test
prob = LinearProblem(copy(A), copy(b);
u0 = copy(u0),
alias = LinearAliasSpecifier(alias_A = true, alias_b = true))

# Warmup run and correctness check
warmup_sol = solve(prob, alg)
# Time the warmup run and correctness check
start_time = time()

# Check correctness if reference solution is available
if check_correctness && reference_solution !== nothing
# Compute relative error
rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u)

if rel_error > correctness_tol
passed_correctness = false
@warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " *
"Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " *
"Algorithm will be excluded from results."
success = false
error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))"
# Create a channel for communication between tasks
result_channel = Channel(1)

# Warmup run and correctness check with timeout
warmup_task = @async begin
try
result = solve(prob, alg)
put!(result_channel, result)
catch e
put!(result_channel, e)
end
end

# Timer task to enforce timeout
timer_task = @async begin
sleep(maxtime)
if !istaskdone(warmup_task)
try
Base.throwto(warmup_task, InterruptException())
catch
# Task might be in non-interruptible state
end
put!(result_channel, :timeout)
end
end

# Wait for result or timeout
warmup_sol = nothing
result = take!(result_channel)

# Clean up timer task if still running
if !istaskdone(timer_task)
try
Base.throwto(timer_task, InterruptException())
catch
# Timer task might have already finished
end
end

# Only benchmark if correctness check passed
if passed_correctness
# Actual benchmark
bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
copy($A), copy($b);
u0 = copy($u0),
alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))

# Calculate GFLOPs
min_time_sec = minimum(bench.times) / 1e9
flops = luflop(n, n)
gflops = flops / min_time_sec / 1e9
if result === :timeout
# Task timed out
timed_out = true
@warn "Algorithm $name timed out (exceeded $(maxtime)s) for size $n, eltype $eltype. Recording as NaN."
success = false
error_msg = "Timed out (exceeded $(maxtime)s)"
gflops = NaN
elseif result isa Exception
# Task threw an error
throw(result)
else
# Successful completion
warmup_sol = result
elapsed_time = time() - start_time

# Check correctness if reference solution is available
if check_correctness && reference_solution !== nothing
# Compute relative error
rel_error = norm(warmup_sol.u - reference_solution.u) / norm(reference_solution.u)

if rel_error > correctness_tol
passed_correctness = false
@warn "Algorithm $name failed correctness check for size $n, eltype $eltype. " *
"Relative error: $(round(rel_error, sigdigits=3)) > tolerance: $correctness_tol. " *
"Algorithm will be excluded from results."
success = false
error_msg = "Failed correctness check (rel_error = $(round(rel_error, sigdigits=3)))"
gflops = 0.0
end
end

# Only benchmark if correctness check passed and we have time remaining
if passed_correctness && !timed_out
# Check if we have enough time remaining for benchmarking
# Allow at least 2x the warmup time for benchmarking
remaining_time = maxtime - elapsed_time
if remaining_time < 2 * elapsed_time
@warn "Algorithm $name: insufficient time remaining for benchmarking (warmup took $(round(elapsed_time, digits=2))s). Recording as NaN."
gflops = NaN
success = false
error_msg = "Insufficient time for benchmarking"
else
# Actual benchmark
bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
copy($A), copy($b);
u0 = copy($u0),
alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))

# Calculate GFLOPs
min_time_sec = minimum(bench.times) / 1e9
flops = luflop(n, n)
gflops = flops / min_time_sec / 1e9
end
end
end

catch e
success = false
error_msg = string(e)
gflops = NaN
# Don't warn for each failure, just record it
end

Expand Down Expand Up @@ -252,8 +325,8 @@ Categorize the benchmark results into size ranges and find the best algorithm fo
For complex types, avoids RFLUFactorization if possible due to known issues.
"""
function categorize_results(df::DataFrame)
# Filter successful results
successful_df = filter(row -> row.success, df)
# Filter successful results and exclude NaN values
successful_df = filter(row -> row.success && !isnan(row.gflops), df)

if nrow(successful_df) == 0
@warn "No successful benchmark results found!"
Expand Down Expand Up @@ -293,8 +366,9 @@ function categorize_results(df::DataFrame)
continue
end

# Calculate average GFLOPs for each algorithm in this range
avg_results = combine(groupby(range_df, :algorithm), :gflops => mean => :avg_gflops)
# Calculate average GFLOPs for each algorithm in this range, excluding NaN values
avg_results = combine(groupby(range_df, :algorithm),
:gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops)

# Sort by performance
sort!(avg_results, :avg_gflops, rev=true)
Expand Down
2 changes: 1 addition & 1 deletion lib/LinearSolveAutotune/src/plotting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function create_benchmark_plots(df::DataFrame; title_base = "LinearSolve.jl LU F

# Plot each algorithm for this element type
for alg in algorithms
alg_df = filter(row -> row.algorithm == alg, eltype_df)
alg_df = filter(row -> row.algorithm == alg && !isnan(row.gflops), eltype_df)
if nrow(alg_df) > 0
# Sort by size for proper line plotting
sort!(alg_df, :size)
Expand Down
5 changes: 3 additions & 2 deletions lib/LinearSolveAutotune/src/telemetry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -365,9 +365,10 @@ function format_detailed_results_markdown(df::DataFrame)
end

# Create a summary table with average performance per algorithm for this element type
# Filter out NaN values when computing statistics
summary = combine(groupby(eltype_df, :algorithm),
:gflops => mean => :avg_gflops,
:gflops => std => :std_gflops,
:gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
:gflops => (x -> std(filter(!isnan, x))) => :std_gflops,
nrow => :num_tests)
sort!(summary, :avg_gflops, rev = true)

Expand Down
Loading