Display NaN values in LinearSolveAutotune telemetry output

ChrisRackauckas · claude · ChrisRackauckas · commit 9986b14abd11 · 2025-08-14T14:14:19.000-04:00
Previously, the telemetry system filtered out NaN values from failed benchmark results, making it unclear what algorithms were actually tested but unsuccessful. This change improves transparency by: - Including all tested algorithms in telemetry output, not just successful ones - Displaying "NaN" explicitly for failed results instead of hiding them - Showing "Success/Total" counts (e.g., "2/3") in summary statistics - Adding ❌ Failed status indicators alongside ✅ Success markers - Reporting algorithms that had failures/timeouts in console output This allows users to see exactly what was attempted and understand the complete benchmark coverage, including which algorithms failed due to timeouts, convergence issues, or other errors. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
@@ -78,7 +78,8 @@ function Base.show(io::IO, results::AutotuneResults)
     println(io, "  • Julia: ", get(results.sysinfo, "julia_version", "Unknown"))
     println(io, "  • Threads: ", get(results.sysinfo, "num_threads", "Unknown"), " (BLAS: ", get(results.sysinfo, "blas_num_threads", "Unknown"), ")")
     
-    # Results summary - filter out NaN values
+    # Results summary - include all results to show what was attempted
+    all_results = results.results_df
     successful_results = filter(row -> row.success && !isnan(row.gflops), results.results_df)
     if nrow(successful_results) > 0
         println(io, "\n🏆 Top Performing Algorithms:")
@@ -95,6 +96,13 @@ function Base.show(io::IO, results::AutotuneResults)
         end
     end
     
+    # Show algorithms that had failures/timeouts to make it clear what was attempted
+    failed_results = filter(row -> !row.success, all_results)
+    if nrow(failed_results) > 0
+        failed_algs = unique(failed_results.algorithm)
+        println(io, "\n⚠️  Algorithms with failures/timeouts: ", join(failed_algs, ", "))
+    end
+    
     # Element types tested
     eltypes = unique(results.results_df.eltype)
     println(io, "\n🔬 Element Types Tested: ", join(eltypes, ", "))
@@ -263,7 +271,8 @@ function autotune_setup(;
     results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes;
         samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime)
 
-    # Display results table - filter out NaN values
+    # Display results table - show all results including NaN values to indicate what was tested
+    all_results = results_df
     successful_results = filter(row -> row.success && !isnan(row.gflops), results_df)
     exceeded_maxtime_results = filter(row -> isnan(row.gflops) && contains(get(row, :error, ""), "Exceeded maxtime"), results_df)
     skipped_results = filter(row -> contains(get(row, :error, ""), "Skipped"), results_df)
@@ -281,22 +290,53 @@ function autotune_setup(;
     if nrow(successful_results) > 0
         @info "Benchmark completed successfully!"
 
-        # Create summary table for display - handle NaN values
-        summary = combine(groupby(successful_results, :algorithm),
-            :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
-            :gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
-            nrow => :num_tests)
-        sort!(summary, :avg_gflops, rev = true)
+        # Create summary table for display - include algorithms with NaN values to show what was tested
+        # Create summary for all algorithms tested (not just successful ones)
+        full_summary = combine(groupby(all_results, :algorithm),
+            :gflops => (x -> begin
+                valid_vals = filter(!isnan, x)
+                length(valid_vals) > 0 ? mean(valid_vals) : NaN
+            end) => :avg_gflops,
+            :gflops => (x -> begin
+                valid_vals = filter(!isnan, x)
+                length(valid_vals) > 0 ? maximum(valid_vals) : NaN
+            end) => :max_gflops,
+            :success => (x -> count(x)) => :successful_tests,
+            nrow => :total_tests)
+        
+        # Sort by average GFLOPs, putting NaN values at the end
+        sort!(full_summary, [:avg_gflops], rev = true, lt = (a, b) -> begin
+            if isnan(a) && isnan(b)
+                return false
+            elseif isnan(a)
+                return false
+            elseif isnan(b)
+                return true
+            else
+                return a < b
+            end
+        end)
 
         println("\n" * "="^60)
-        println("BENCHMARK RESULTS SUMMARY")
+        println("BENCHMARK RESULTS SUMMARY (including failed attempts)")
         println("="^60)
-        pretty_table(summary,
-            header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Tests"],
-            formatters = ft_printf("%.2f", [2, 3]),
+        pretty_table(full_summary,
+            header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Success", "Total"],
+            formatters = (v, i, j) -> begin
+                if j in [2, 3] && isa(v, Float64)
+                    return isnan(v) ? "NaN" : @sprintf("%.2f", v)
+                else
+                    return v
+                end
+            end,
             crop = :none)
     else
         @warn "No successful benchmark results!"
+        # Still show what was attempted
+        if nrow(all_results) > 0
+            failed_algs = unique(all_results.algorithm)
+            @info "Algorithms tested (all failed): $(join(failed_algs, ", "))"
+        end
         return results_df, nothing
     end
 
diff --git a/lib/LinearSolveAutotune/src/telemetry.jl b/lib/LinearSolveAutotune/src/telemetry.jl
@@ -166,7 +166,9 @@ Format benchmark results as a markdown table suitable for GitHub issues.
 """
 function format_results_for_github(df::DataFrame, system_info::Dict, categories::Dict{
         String, String})
-    # Filter successful results
+    # Include all results, both successful and failed (with NaN values)
+    # This shows what algorithms were attempted, making it clear what was tested
+    all_results_df = df
     successful_df = filter(row -> row.success, df)
 
     if nrow(successful_df) == 0
@@ -180,7 +182,7 @@ function format_results_for_github(df::DataFrame, system_info::Dict, categories:
 $(format_categories_markdown(categories))
 
 ### Detailed Results
-$(format_detailed_results_markdown(successful_df))
+$(format_detailed_results_markdown(all_results_df))
 
 ### System Information
 $(format_system_info_markdown(system_info))
@@ -365,22 +367,29 @@ function format_detailed_results_markdown(df::DataFrame)
         end
         
         # Create a summary table with average performance per algorithm for this element type
-        # Filter out NaN values when computing statistics
+        # Include statistics that account for NaN values
         summary = combine(groupby(eltype_df, :algorithm), 
-                         :gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops, 
-                         :gflops => (x -> std(filter(!isnan, x))) => :std_gflops,
-                         nrow => :num_tests)
+                         :gflops => (x -> begin
+                             valid_vals = filter(!isnan, x)
+                             length(valid_vals) > 0 ? mean(valid_vals) : NaN
+                         end) => :avg_gflops, 
+                         :gflops => (x -> begin
+                             valid_vals = filter(!isnan, x)
+                             length(valid_vals) > 1 ? std(valid_vals) : NaN
+                         end) => :std_gflops,
+                         :gflops => (x -> count(!isnan, x)) => :successful_tests,
+                         nrow => :total_tests)
         sort!(summary, :avg_gflops, rev = true)
 
         push!(lines, "##### Summary Statistics")
         push!(lines, "")
-        push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Tests |")
-        push!(lines, "|-----------|------------|---------|-------|")
+        push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Success/Total |")
+        push!(lines, "|-----------|------------|---------|---------------|")
 
         for row in eachrow(summary)
-            avg_str = @sprintf("%.2f", row.avg_gflops)
-            std_str = @sprintf("%.2f", row.std_gflops)
-            push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.num_tests) |")
+            avg_str = isnan(row.avg_gflops) ? "NaN" : @sprintf("%.2f", row.avg_gflops)
+            std_str = isnan(row.std_gflops) ? "NaN" : @sprintf("%.2f", row.std_gflops)
+            push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.successful_tests)/$(row.total_tests) |")
         end
         
         push!(lines, "")
@@ -407,7 +416,13 @@ function format_detailed_results_markdown(df::DataFrame)
             push!(lines, "|-------------|--------|--------|")
             
             for row in eachrow(algo_df)
-                gflops_str = row.success ? @sprintf("%.3f", row.gflops) : "N/A"
+                gflops_str = if row.success
+                    @sprintf("%.3f", row.gflops)
+                elseif isnan(row.gflops)
+                    "NaN"
+                else
+                    string(row.gflops)
+                end
                 status = row.success ? "✅ Success" : "❌ Failed"
                 push!(lines, "| $(row.size) | $gflops_str | $status |")
             end