Skip to content

Commit 176c4a4

Browse files
committed
Improvements in benchmark of agentic cuda example
1 parent a5ac42e commit 176c4a4

File tree

3 files changed

+127
-48
lines changed

3 files changed

+127
-48
lines changed

examples/agentic/generate-cuda-linear-solver/Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
[deps]
22
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
33
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
4+
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
5+
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
46
SmartSolve = "4fbb3a3c-2fa1-4c19-8d57-bae8bc1e16ac"
57
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
68

examples/agentic/generate-cuda-linear-solver/benchmark.jl

Lines changed: 125 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,56 +2,133 @@ using LinearAlgebra
22
using SparseArrays
33
using CUDA
44
using BenchmarkTools
5+
using OrderedCollections
6+
using Plots
57

6-
println("Simple GPU benchmark:\n")
8+
println("GPU benchmark with error-vs-time plot:\n")
79

810
include("solver.jl")
911

10-
# Problem setup on CPU (sparse)
12+
# Configuration
1113
N = 10_000
12-
A = sprand(N, N, 0.3)
13-
b = rand(N)
14-
Ad = CuArray(Matrix(A))
15-
bd = CuArray(b)
16-
x2_d = CuArray(zeros(N))
17-
18-
# --- warm-up (compile kernels, allocate, etc.) ---
19-
x1_d = Ad \ bd
20-
CUDA.CUSOLVER.gesv!(x2_d, Ad, bd)
21-
CUDA.synchronize()
22-
bd = CuArray(b) # reset rhs
23-
x3_d = proposed_fn(Ad, bd)
24-
CUDA.synchronize()
25-
26-
println("GPU benchmark (default solver):")
27-
display(@benchmark begin
28-
x = $Ad \ $bd
29-
CUDA.synchronize()
30-
end seconds = 10)
31-
32-
println("\nGPU benchmark (gesv! solver):")
33-
display(@benchmark begin
34-
CUDA.CUSOLVER.gesv!(x2_d, Ad, bd)
35-
CUDA.synchronize()
36-
end seconds = 10)
37-
38-
bd = CuArray(b)
39-
println("\nGPU benchmark (generated solver):")
40-
display(@benchmark begin
41-
x = proposed_fn($Ad, $bd)
42-
CUDA.synchronize()
43-
end seconds = 10)
44-
45-
# --- error computation on GPU ---
46-
x1_d = Ad \ bd
47-
CUDA.CUSOLVER.gesv!(x2_d, Ad, bd)
48-
x3_d = proposed_fn(Ad, bd)
49-
CUDA.synchronize()
50-
51-
e1 = norm(Ad * x1_d - bd)
52-
e2 = norm(Ad * x2_d - bd)
53-
e3 = norm(Ad * x3_d - bd)
54-
55-
println("\nError of default GPU solver: $e1")
56-
println("Error of gesv! GPU solver: $e2")
57-
println("Error of generated GPU solver: $e3")
14+
sparsity_levels = [0.1, 0.5, 0.9]
15+
solvers = OrderedDict(
16+
"Default" => (Ad, bd) -> (Ad \ bd),
17+
"gesv!" => (Ad, bd) -> begin
18+
x = CuArray(zeros(size(Ad, 1)))
19+
CUDA.CUSOLVER.gesv!(x, Ad, bd)
20+
x
21+
end,
22+
"Generated" => (Ad, bd) -> proposed_fn(Ad, bd)
23+
)
24+
25+
# Store results for plotting
26+
results = Dict()
27+
28+
for sparsity in sparsity_levels
29+
println("\n=== Sparsity: $sparsity ===")
30+
31+
# Generate problem
32+
A = sprand(N, N, sparsity)
33+
b = rand(N)
34+
Ad = CuArray(Matrix(A))
35+
bd = CuArray(b)
36+
37+
results[sparsity] = Dict()
38+
39+
for (solver_name, solver_fn) in solvers
40+
println(" $solver_name...")
41+
42+
# Warm-up
43+
bd_warm = CuArray(copy(b))
44+
try
45+
x_warm = solver_fn(Ad, bd_warm)
46+
CUDA.synchronize()
47+
catch e
48+
println(" Warning: solver failed during warm-up: $e")
49+
continue
50+
end
51+
52+
# Benchmark
53+
bd_bench = CuArray(copy(b))
54+
try
55+
bench = @benchmark begin
56+
x = $(solver_fn)($Ad, $bd_bench)
57+
CUDA.synchronize()
58+
end seconds = 5 samples = 10
59+
60+
time_ms = median(bench.times) / 1e6 # Convert to ms
61+
62+
# Compute error
63+
bd_err = CuArray(copy(b))
64+
x_sol = solver_fn(Ad, bd_err)
65+
CUDA.synchronize()
66+
error = norm(Ad*x_sol - bd_err) / norm(bd_err)
67+
68+
results[sparsity][solver_name] = (time=time_ms, error=error)
69+
println(" Time: $(round(time_ms, digits=3)) ms, Error: $(round(error, sigdigits=3))")
70+
catch e
71+
println(" Error during benchmark: $e")
72+
end
73+
end
74+
end
75+
76+
# Create error-vs-time plot
77+
p = plot(
78+
size=(800, 800),
79+
#legend=:topright,
80+
legend=:bottomright,
81+
xlabel="Time (ms)",
82+
ylabel="Relative residual: ||Ax - b||₂ / ||b||₂",
83+
# xscale=:log10,
84+
yscale=:log10,
85+
guidefontsize=22,#18,
86+
tickfontsize=20, #16,
87+
legendfontsize=18, #14,
88+
margin=5*Plots.mm,
89+
framestyle=:box
90+
)
91+
92+
# Symbols encode sparsity levels; colors encode solvers.
93+
# Define marker for each sparsity and a color for each solver.
94+
## Sparsity shapes
95+
marker_map_sparsity = OrderedDict(0.1=>:circle, 0.5=>:square, 0.9=>:utriangle)
96+
## Solver color shades
97+
color_map_solver = OrderedDict("Default"=>:red, "gesv!"=>:blue, "Generated"=>:green)
98+
99+
# Plot each point individually so marker shape shows sparsity and color shows solver.
100+
for solver_name in keys(solvers)
101+
for sparsity in sparsity_levels
102+
if sparsity in keys(results) && solver_name in keys(results[sparsity])
103+
t = results[sparsity][solver_name].time
104+
e = results[sparsity][solver_name].error
105+
scatter!(p, [t], [e];
106+
label="",
107+
marker=marker_map_sparsity[sparsity],
108+
markersize=15,
109+
color=color_map_solver[solver_name],
110+
markerstrokecolor=:black,
111+
markerstrokewidth=0.0,#0.8,
112+
alpha=0.45)
113+
end
114+
end
115+
end
116+
117+
# Create a combined legend
118+
for solver_name in keys(solvers)
119+
for s in sparsity_levels
120+
lbl = "$(solver_name), ρ:$(s)"
121+
scatter!(p, [NaN], [NaN]; label=lbl,
122+
marker=marker_map_sparsity[s],
123+
markersize=15,
124+
color=color_map_solver[solver_name],
125+
markerstrokecolor=:black,
126+
markerstrokewidth=0.0,
127+
alpha=0.45)
128+
end
129+
end
130+
131+
savefig(p, "error_vs_time.pdf")
132+
println("\n✓ Plot saved as error_vs_time.pdf")
133+
134+
display(p)
24.7 KB
Binary file not shown.

0 commit comments

Comments
 (0)