Skip to content

Commit 9986b14

Browse files
Display NaN values in LinearSolveAutotune telemetry output
Previously, the telemetry system filtered out NaN values from failed benchmark results, making it unclear what algorithms were actually tested but unsuccessful. This change improves transparency by: - Including all tested algorithms in telemetry output, not just successful ones - Displaying "NaN" explicitly for failed results instead of hiding them - Showing "Success/Total" counts (e.g., "2/3") in summary statistics - Adding ❌ Failed status indicators alongside ✅ Success markers - Reporting algorithms that had failures/timeouts in console output This allows users to see exactly what was attempted and understand the complete benchmark coverage, including which algorithms failed due to timeouts, convergence issues, or other errors. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 5b51b11 commit 9986b14

File tree

2 files changed

+79
-24
lines changed

2 files changed

+79
-24
lines changed

lib/LinearSolveAutotune/src/LinearSolveAutotune.jl

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ function Base.show(io::IO, results::AutotuneResults)
7878
println(io, " • Julia: ", get(results.sysinfo, "julia_version", "Unknown"))
7979
println(io, " • Threads: ", get(results.sysinfo, "num_threads", "Unknown"), " (BLAS: ", get(results.sysinfo, "blas_num_threads", "Unknown"), ")")
8080

81-
# Results summary - filter out NaN values
81+
# Results summary - include all results to show what was attempted
82+
all_results = results.results_df
8283
successful_results = filter(row -> row.success && !isnan(row.gflops), results.results_df)
8384
if nrow(successful_results) > 0
8485
println(io, "\n🏆 Top Performing Algorithms:")
@@ -95,6 +96,13 @@ function Base.show(io::IO, results::AutotuneResults)
9596
end
9697
end
9798

99+
# Show algorithms that had failures/timeouts to make it clear what was attempted
100+
failed_results = filter(row -> !row.success, all_results)
101+
if nrow(failed_results) > 0
102+
failed_algs = unique(failed_results.algorithm)
103+
println(io, "\n⚠️ Algorithms with failures/timeouts: ", join(failed_algs, ", "))
104+
end
105+
98106
# Element types tested
99107
eltypes = unique(results.results_df.eltype)
100108
println(io, "\n🔬 Element Types Tested: ", join(eltypes, ", "))
@@ -263,7 +271,8 @@ function autotune_setup(;
263271
results_df = benchmark_algorithms(matrix_sizes, all_algs, all_names, eltypes;
264272
samples = samples, seconds = seconds, sizes = sizes, maxtime = maxtime)
265273

266-
# Display results table - filter out NaN values
274+
# Display results table - show all results including NaN values to indicate what was tested
275+
all_results = results_df
267276
successful_results = filter(row -> row.success && !isnan(row.gflops), results_df)
268277
exceeded_maxtime_results = filter(row -> isnan(row.gflops) && contains(get(row, :error, ""), "Exceeded maxtime"), results_df)
269278
skipped_results = filter(row -> contains(get(row, :error, ""), "Skipped"), results_df)
@@ -281,22 +290,53 @@ function autotune_setup(;
281290
if nrow(successful_results) > 0
282291
@info "Benchmark completed successfully!"
283292

284-
# Create summary table for display - handle NaN values
285-
summary = combine(groupby(successful_results, :algorithm),
286-
:gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
287-
:gflops => (x -> maximum(filter(!isnan, x))) => :max_gflops,
288-
nrow => :num_tests)
289-
sort!(summary, :avg_gflops, rev = true)
293+
# Create summary table for display - include algorithms with NaN values to show what was tested
294+
# Create summary for all algorithms tested (not just successful ones)
295+
full_summary = combine(groupby(all_results, :algorithm),
296+
:gflops => (x -> begin
297+
valid_vals = filter(!isnan, x)
298+
length(valid_vals) > 0 ? mean(valid_vals) : NaN
299+
end) => :avg_gflops,
300+
:gflops => (x -> begin
301+
valid_vals = filter(!isnan, x)
302+
length(valid_vals) > 0 ? maximum(valid_vals) : NaN
303+
end) => :max_gflops,
304+
:success => (x -> count(x)) => :successful_tests,
305+
nrow => :total_tests)
306+
307+
# Sort by average GFLOPs, putting NaN values at the end
308+
sort!(full_summary, [:avg_gflops], rev = true, lt = (a, b) -> begin
309+
if isnan(a) && isnan(b)
310+
return false
311+
elseif isnan(a)
312+
return false
313+
elseif isnan(b)
314+
return true
315+
else
316+
return a < b
317+
end
318+
end)
290319

291320
println("\n" * "="^60)
292-
println("BENCHMARK RESULTS SUMMARY")
321+
println("BENCHMARK RESULTS SUMMARY (including failed attempts)")
293322
println("="^60)
294-
pretty_table(summary,
295-
header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Tests"],
296-
formatters = ft_printf("%.2f", [2, 3]),
323+
pretty_table(full_summary,
324+
header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Success", "Total"],
325+
formatters = (v, i, j) -> begin
326+
if j in [2, 3] && isa(v, Float64)
327+
return isnan(v) ? "NaN" : @sprintf("%.2f", v)
328+
else
329+
return v
330+
end
331+
end,
297332
crop = :none)
298333
else
299334
@warn "No successful benchmark results!"
335+
# Still show what was attempted
336+
if nrow(all_results) > 0
337+
failed_algs = unique(all_results.algorithm)
338+
@info "Algorithms tested (all failed): $(join(failed_algs, ", "))"
339+
end
300340
return results_df, nothing
301341
end
302342

lib/LinearSolveAutotune/src/telemetry.jl

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,9 @@ Format benchmark results as a markdown table suitable for GitHub issues.
166166
"""
167167
function format_results_for_github(df::DataFrame, system_info::Dict, categories::Dict{
168168
String, String})
169-
# Filter successful results
169+
# Include all results, both successful and failed (with NaN values)
170+
# This shows what algorithms were attempted, making it clear what was tested
171+
all_results_df = df
170172
successful_df = filter(row -> row.success, df)
171173

172174
if nrow(successful_df) == 0
@@ -180,7 +182,7 @@ function format_results_for_github(df::DataFrame, system_info::Dict, categories:
180182
$(format_categories_markdown(categories))
181183
182184
### Detailed Results
183-
$(format_detailed_results_markdown(successful_df))
185+
$(format_detailed_results_markdown(all_results_df))
184186
185187
### System Information
186188
$(format_system_info_markdown(system_info))
@@ -365,22 +367,29 @@ function format_detailed_results_markdown(df::DataFrame)
365367
end
366368

367369
# Create a summary table with average performance per algorithm for this element type
368-
# Filter out NaN values when computing statistics
370+
# Include statistics that account for NaN values
369371
summary = combine(groupby(eltype_df, :algorithm),
370-
:gflops => (x -> mean(filter(!isnan, x))) => :avg_gflops,
371-
:gflops => (x -> std(filter(!isnan, x))) => :std_gflops,
372-
nrow => :num_tests)
372+
:gflops => (x -> begin
373+
valid_vals = filter(!isnan, x)
374+
length(valid_vals) > 0 ? mean(valid_vals) : NaN
375+
end) => :avg_gflops,
376+
:gflops => (x -> begin
377+
valid_vals = filter(!isnan, x)
378+
length(valid_vals) > 1 ? std(valid_vals) : NaN
379+
end) => :std_gflops,
380+
:gflops => (x -> count(!isnan, x)) => :successful_tests,
381+
nrow => :total_tests)
373382
sort!(summary, :avg_gflops, rev = true)
374383

375384
push!(lines, "##### Summary Statistics")
376385
push!(lines, "")
377-
push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Tests |")
378-
push!(lines, "|-----------|------------|---------|-------|")
386+
push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Success/Total |")
387+
push!(lines, "|-----------|------------|---------|---------------|")
379388

380389
for row in eachrow(summary)
381-
avg_str = @sprintf("%.2f", row.avg_gflops)
382-
std_str = @sprintf("%.2f", row.std_gflops)
383-
push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.num_tests) |")
390+
avg_str = isnan(row.avg_gflops) ? "NaN" : @sprintf("%.2f", row.avg_gflops)
391+
std_str = isnan(row.std_gflops) ? "NaN" : @sprintf("%.2f", row.std_gflops)
392+
push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.successful_tests)/$(row.total_tests) |")
384393
end
385394

386395
push!(lines, "")
@@ -407,7 +416,13 @@ function format_detailed_results_markdown(df::DataFrame)
407416
push!(lines, "|-------------|--------|--------|")
408417

409418
for row in eachrow(algo_df)
410-
gflops_str = row.success ? @sprintf("%.3f", row.gflops) : "N/A"
419+
gflops_str = if row.success
420+
@sprintf("%.3f", row.gflops)
421+
elseif isnan(row.gflops)
422+
"NaN"
423+
else
424+
string(row.gflops)
425+
end
411426
status = row.success ? "✅ Success" : "❌ Failed"
412427
push!(lines, "| $(row.size) | $gflops_str | $status |")
413428
end

0 commit comments

Comments
 (0)