From 9986b14abd1131ae5ea57dd41580382ab82de317 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Thu, 14 Aug 2025 14:14:19 -0400 Subject: [PATCH] Display NaN values in LinearSolveAutotune telemetry output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the telemetry system filtered out NaN values from failed benchmark results, making it unclear what algorithms were actually tested but unsuccessful. This change improves transparency by: - Including all tested algorithms in telemetry output, not just successful ones - Displaying "NaN" explicitly for failed results instead of hiding them - Showing "Success/Total" counts (e.g., "2/3") in summary statistics - Adding āŒ Failed status indicators alongside āœ… Success markers - Reporting algorithms that had failures/timeouts in console output This allows users to see exactly what was attempted and understand the complete benchmark coverage, including which algorithms failed due to timeouts, convergence issues, or other errors. šŸ¤– Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../src/LinearSolveAutotune.jl | 64 +++++++++++++++---- lib/LinearSolveAutotune/src/telemetry.jl | 39 +++++++---- 2 files changed, 79 insertions(+), 24 deletions(-) diff --git a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl index ba5800b4b..1e4e25507 100644 --- a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl +++ b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl @@ -78,7 +78,8 @@ function Base.show(io::IO, results::AutotuneResults) println(io, " • Julia: ", get(results.sysinfo, "julia_version", "Unknown")) println(io, " • Threads: ", get(results.sysinfo, "num_threads", "Unknown"), " (BLAS: ", get(results.sysinfo, "blas_num_threads", "Unknown"), ")") - # Results summary - filter out NaN values + # Results summary - include all results to show what was attempted + all_results = results.results_df successful_results = filter(row -> row.success && !isnan(row.gflops), results.results_df) if nrow(successful_results) > 0 println(io, "\nšŸ† Top Performing Algorithms:") @@ -95,6 +96,13 @@ function Base.show(io::IO, results::AutotuneResults) end end + # Show algorithms that had failures/timeouts to make it clear what was attempted + failed_results = filter(row -> !row.success, all_results) + if nrow(failed_results) > 0 + failed_algs = unique(failed_results.algorithm) + println(io, "\nāš ļø Algorithms with failures/timeouts: ", join(failed_algs, ", ")) + end + # Element types tested eltypes = unique(results.results_df.eltype) println(io, "\nšŸ”¬ Element Types Tested: ", join(eltypes, ", ")) @@ -263,7 +271,8 @@ function autotune_setup(; results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes; samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime) - # Display results table - filter out NaN values + # Display results table - show all results including NaN values to indicate what was tested + all_results = results_df successful_results = filter(row -> row.success && !isnan(row.gflops), results_df) exceeded_maxtime_results = filter(row -> isnan(row.gflops) && contains(get(row, :error, ""), "Exceeded maxtime"), results_df) skipped_results = filter(row -> contains(get(row, :error, ""), "Skipped"), results_df) @@ -281,22 +290,53 @@ function autotune_setup(; if nrow(successful_results) > 0 @info "Benchmark completed successfully!" - # Create summary table for display - handle NaN values - summary = combine(groupby(successful_results, :algorithm), - :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops, - :gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops, - nrow => :num_tests) - sort!(summary, :avg_gflops, rev = true) + # Create summary table for display - include algorithms with NaN values to show what was tested + # Create summary for all algorithms tested (not just successful ones) + full_summary = combine(groupby(all_results, :algorithm), + :gflops => (x -> begin + valid_vals = filter(!isnan, x) + length(valid_vals) > 0 ? mean(valid_vals) : NaN + end) => :avg_gflops, + :gflops => (x -> begin + valid_vals = filter(!isnan, x) + length(valid_vals) > 0 ? maximum(valid_vals) : NaN + end) => :max_gflops, + :success => (x -> count(x)) => :successful_tests, + nrow => :total_tests) + + # Sort by average GFLOPs, putting NaN values at the end + sort!(full_summary, [:avg_gflops], rev = true, lt = (a, b) -> begin + if isnan(a) && isnan(b) + return false + elseif isnan(a) + return false + elseif isnan(b) + return true + else + return a < b + end + end) println("\n" * "="^60) - println("BENCHMARK RESULTS SUMMARY") + println("BENCHMARK RESULTS SUMMARY (including failed attempts)") println("="^60) - pretty_table(summary, - header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Tests"], - formatters = ft_printf("%.2f", [2, 3]), + pretty_table(full_summary, + header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Success", "Total"], + formatters = (v, i, j) -> begin + if j in [2, 3] && isa(v, Float64) + return isnan(v) ? "NaN" : @sprintf("%.2f", v) + else + return v + end + end, crop = :none) else @warn "No successful benchmark results!" + # Still show what was attempted + if nrow(all_results) > 0 + failed_algs = unique(all_results.algorithm) + @info "Algorithms tested (all failed): $(join(failed_algs, ", "))" + end return results_df, nothing end diff --git a/lib/LinearSolveAutotune/src/telemetry.jl b/lib/LinearSolveAutotune/src/telemetry.jl index e674924af..c16154d82 100644 --- a/lib/LinearSolveAutotune/src/telemetry.jl +++ b/lib/LinearSolveAutotune/src/telemetry.jl @@ -166,7 +166,9 @@ Format benchmark results as a markdown table suitable for GitHub issues. """ function format_results_for_github(df::DataFrame, system_info::Dict, categories::Dict{ String, String}) - # Filter successful results + # Include all results, both successful and failed (with NaN values) + # This shows what algorithms were attempted, making it clear what was tested + all_results_df = df successful_df = filter(row -> row.success, df) if nrow(successful_df) == 0 @@ -180,7 +182,7 @@ function format_results_for_github(df::DataFrame, system_info::Dict, categories: $(format_categories_markdown(categories)) ### Detailed Results -$(format_detailed_results_markdown(successful_df)) +$(format_detailed_results_markdown(all_results_df)) ### System Information $(format_system_info_markdown(system_info)) @@ -365,22 +367,29 @@ function format_detailed_results_markdown(df::DataFrame) end # Create a summary table with average performance per algorithm for this element type - # Filter out NaN values when computing statistics + # Include statistics that account for NaN values summary = combine(groupby(eltype_df, :algorithm), - :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops, - :gflops => (x -> std(filter(!isnan, x))) => :std_gflops, - nrow => :num_tests) + :gflops => (x -> begin + valid_vals = filter(!isnan, x) + length(valid_vals) > 0 ? mean(valid_vals) : NaN + end) => :avg_gflops, + :gflops => (x -> begin + valid_vals = filter(!isnan, x) + length(valid_vals) > 1 ? std(valid_vals) : NaN + end) => :std_gflops, + :gflops => (x -> count(!isnan, x)) => :successful_tests, + nrow => :total_tests) sort!(summary, :avg_gflops, rev = true) push!(lines, "##### Summary Statistics") push!(lines, "") - push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Tests |") - push!(lines, "|-----------|------------|---------|-------|") + push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Success/Total |") + push!(lines, "|-----------|------------|---------|---------------|") for row in eachrow(summary) - avg_str = @sprintf("%.2f", row.avg_gflops) - std_str = @sprintf("%.2f", row.std_gflops) - push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.num_tests) |") + avg_str = isnan(row.avg_gflops) ? "NaN" : @sprintf("%.2f", row.avg_gflops) + std_str = isnan(row.std_gflops) ? "NaN" : @sprintf("%.2f", row.std_gflops) + push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.successful_tests)/$(row.total_tests) |") end push!(lines, "") @@ -407,7 +416,13 @@ function format_detailed_results_markdown(df::DataFrame) push!(lines, "|-------------|--------|--------|") for row in eachrow(algo_df) - gflops_str = row.success ? @sprintf("%.3f", row.gflops) : "N/A" + gflops_str = if row.success + @sprintf("%.3f", row.gflops) + elseif isnan(row.gflops) + "NaN" + else + string(row.gflops) + end status = row.success ? "āœ… Success" : "āŒ Failed" push!(lines, "| $(row.size) | $gflops_str | $status |") end