Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 52 additions & 12 deletions lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ function Base.show(io::IO, results::AutotuneResults)
println(io, " β€’ Julia: ", get(results.sysinfo, "julia_version", "Unknown"))
println(io, " β€’ Threads: ", get(results.sysinfo, "num_threads", "Unknown"), " (BLAS: ", get(results.sysinfo, "blas_num_threads", "Unknown"), ")")

# Results summary - filter out NaN values
# Results summary - include all results to show what was attempted
all_results = results.results_df
successful_results = filter(row -> row.success && !isnan(row.gflops), results.results_df)
if nrow(successful_results) > 0
println(io, "\nπŸ† Top Performing Algorithms:")
Expand All @@ -95,6 +96,13 @@ function Base.show(io::IO, results::AutotuneResults)
end
end

# Show algorithms that had failures/timeouts to make it clear what was attempted
failed_results = filter(row -> !row.success, all_results)
if nrow(failed_results) > 0
failed_algs = unique(failed_results.algorithm)
println(io, "\n⚠️ Algorithms with failures/timeouts: ", join(failed_algs, ", "))
end

# Element types tested
eltypes = unique(results.results_df.eltype)
println(io, "\nπŸ”¬ Element Types Tested: ", join(eltypes, ", "))
Expand Down Expand Up @@ -263,7 +271,8 @@ function autotune_setup(;
results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes;
samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime)

# Display results table - filter out NaN values
# Display results table - show all results including NaN values to indicate what was tested
all_results = results_df
successful_results = filter(row -> row.success && !isnan(row.gflops), results_df)
exceeded_maxtime_results = filter(row -> isnan(row.gflops) && contains(get(row, :error, ""), "Exceeded maxtime"), results_df)
skipped_results = filter(row -> contains(get(row, :error, ""), "Skipped"), results_df)
Expand All @@ -281,22 +290,53 @@ function autotune_setup(;
if nrow(successful_results) > 0
@info "Benchmark completed successfully!"

# Create summary table for display - handle NaN values
summary = combine(groupby(successful_results, :algorithm),
:gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
:gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
nrow => :num_tests)
sort!(summary, :avg_gflops, rev = true)
# Create summary table for display - include algorithms with NaN values to show what was tested
# Create summary for all algorithms tested (not just successful ones)
full_summary = combine(groupby(all_results, :algorithm),
:gflops => (x -> begin
valid_vals = filter(!isnan, x)
length(valid_vals) > 0 ? mean(valid_vals) : NaN
end) => :avg_gflops,
:gflops => (x -> begin
valid_vals = filter(!isnan, x)
length(valid_vals) > 0 ? maximum(valid_vals) : NaN
end) => :max_gflops,
:success => (x -> count(x)) => :successful_tests,
nrow => :total_tests)

# Sort by average GFLOPs, putting NaN values at the end
sort!(full_summary, [:avg_gflops], rev = true, lt = (a, b) -> begin
if isnan(a) && isnan(b)
return false
elseif isnan(a)
return false
elseif isnan(b)
return true
else
return a < b
end
end)

println("\n" * "="^60)
println("BENCHMARK RESULTS SUMMARY")
println("BENCHMARK RESULTS SUMMARY (including failed attempts)")
println("="^60)
pretty_table(summary,
header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Tests"],
formatters = ft_printf("%.2f", [2, 3]),
pretty_table(full_summary,
header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Success", "Total"],
formatters = (v, i, j) -> begin
if j in [2, 3] && isa(v, Float64)
return isnan(v) ? "NaN" : @sprintf("%.2f", v)
else
return v
end
end,
crop = :none)
else
@warn "No successful benchmark results!"
# Still show what was attempted
if nrow(all_results) > 0
failed_algs = unique(all_results.algorithm)
@info "Algorithms tested (all failed): $(join(failed_algs, ", "))"
end
return results_df, nothing
end

Expand Down
39 changes: 27 additions & 12 deletions lib/LinearSolveAutotune/src/telemetry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,9 @@ Format benchmark results as a markdown table suitable for GitHub issues.
"""
function format_results_for_github(df::DataFrame, system_info::Dict, categories::Dict{
String, String})
# Filter successful results
# Include all results, both successful and failed (with NaN values)
# This shows what algorithms were attempted, making it clear what was tested
all_results_df = df
successful_df = filter(row -> row.success, df)

if nrow(successful_df) == 0
Expand All @@ -180,7 +182,7 @@ function format_results_for_github(df::DataFrame, system_info::Dict, categories:
$(format_categories_markdown(categories))

### Detailed Results
$(format_detailed_results_markdown(successful_df))
$(format_detailed_results_markdown(all_results_df))

### System Information
$(format_system_info_markdown(system_info))
Expand Down Expand Up @@ -365,22 +367,29 @@ function format_detailed_results_markdown(df::DataFrame)
end

# Create a summary table with average performance per algorithm for this element type
# Filter out NaN values when computing statistics
# Include statistics that account for NaN values
summary = combine(groupby(eltype_df, :algorithm),
:gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
:gflops => (x -> std(filter(!isnan, x))) => :std_gflops,
nrow => :num_tests)
:gflops => (x -> begin
valid_vals = filter(!isnan, x)
length(valid_vals) > 0 ? mean(valid_vals) : NaN
end) => :avg_gflops,
:gflops => (x -> begin
valid_vals = filter(!isnan, x)
length(valid_vals) > 1 ? std(valid_vals) : NaN
end) => :std_gflops,
:gflops => (x -> count(!isnan, x)) => :successful_tests,
nrow => :total_tests)
sort!(summary, :avg_gflops, rev = true)

push!(lines, "##### Summary Statistics")
push!(lines, "")
push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Tests |")
push!(lines, "|-----------|------------|---------|-------|")
push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Success/Total |")
push!(lines, "|-----------|------------|---------|---------------|")

for row in eachrow(summary)
avg_str = @sprintf("%.2f", row.avg_gflops)
std_str = @sprintf("%.2f", row.std_gflops)
push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.num_tests) |")
avg_str = isnan(row.avg_gflops) ? "NaN" : @sprintf("%.2f", row.avg_gflops)
std_str = isnan(row.std_gflops) ? "NaN" : @sprintf("%.2f", row.std_gflops)
push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.successful_tests)/$(row.total_tests) |")
end

push!(lines, "")
Expand All @@ -407,7 +416,13 @@ function format_detailed_results_markdown(df::DataFrame)
push!(lines, "|-------------|--------|--------|")

for row in eachrow(algo_df)
gflops_str = row.success ? @sprintf("%.3f", row.gflops) : "N/A"
gflops_str = if row.success
@sprintf("%.3f", row.gflops)
elseif isnan(row.gflops)
"NaN"
else
string(row.gflops)
end
status = row.success ? "βœ… Success" : "❌ Failed"
push!(lines, "| $(row.size) | $gflops_str | $status |")
end
Expand Down
Loading