Skip to content

Commit 0f1b4ed

Browse files
committed
Starting new dagger example
1 parent 3e5398d commit 0f1b4ed

File tree

10 files changed

+299
-40
lines changed

10 files changed

+299
-40
lines changed

Project.toml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
99
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
1010
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
1111
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
12+
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
1213
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
1314
DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
1415
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
16+
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
1517
MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2"
1618
MatrixDepot = "b51810bb-c9f3-55da-ae3c-350fc1fbce05"
1719
OpenAI = "e9f21f70-7185-4079-aca2-91159181367c"
@@ -21,15 +23,20 @@ ScikitLearn = "3646fa90-6ef7-5e7e-9f22-8aca16db6324"
2123
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
2224
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2325

26+
[sources]
27+
Dagger = {rev = "master", url = "https://github.com/JuliaParallel/Dagger.jl"}
28+
2429
[compat]
30+
BSON = "0.3"
2531
BenchmarkTools = "1"
2632
CSV = "0.10"
27-
BSON = "0.3"
2833
CUDA = "5.9.2"
2934
CairoMakie = "0.15"
30-
DecisionTree = "0.12"
35+
Dagger = "0.19.2"
3136
DataFrames = "1"
37+
DecisionTree = "0.12"
3238
LinearAlgebra = "1.12.0"
39+
Logging = "1.11.0"
3340
MKL = "0.9"
3441
MatrixDepot = "1.0.13"
3542
OpenAI = "0.12.0"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[deps]
2+
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
3+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
4+
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
5+
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
6+
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
7+
SmartSolve = "4fbb3a3c-2fa1-4c19-8d57-bae8bc1e16ac"
8+
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
9+
10+
[sources]
11+
Dagger = {rev = "master", url = "https://github.com/JuliaParallel/Dagger.jl"}
12+
SmartSolve = {path = "../../.."}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
using LinearAlgebra
2+
using SparseArrays
3+
using CUDA
4+
using BenchmarkTools
5+
using OrderedCollections
6+
using Plots
7+
8+
println("GPU benchmark with error-vs-time plot:\n")
9+
10+
include("solver.jl")
11+
12+
# Configuration
13+
N = 15_000
14+
sparsity_levels = [0.1, 0.5, 0.9]
15+
solvers = OrderedDict(
16+
"Default" => (Ad, bd) -> (Ad \ bd),
17+
"gesv!" => (Ad, bd) -> begin
18+
x = CuArray(zeros(size(Ad, 1)))
19+
CUDA.CUSOLVER.gesv!(x, Ad, bd)
20+
x
21+
end,
22+
"Generated" => (Ad, bd) -> proposed_fn(Ad, bd)
23+
)
24+
25+
# Store results for plotting
26+
results = Dict()
27+
28+
for sparsity in sparsity_levels
29+
println("\n=== Sparsity: $sparsity ===")
30+
31+
# Generate problem
32+
A = sprand(N, N, sparsity)
33+
b = rand(N)
34+
Ad = CuArray(Matrix(A))
35+
bd = CuArray(b)
36+
37+
results[sparsity] = Dict()
38+
39+
for (solver_name, solver_fn) in solvers
40+
println(" $solver_name...")
41+
42+
# Warm-up
43+
bd_warm = CuArray(copy(b))
44+
try
45+
x_warm = solver_fn(Ad, bd_warm)
46+
CUDA.synchronize()
47+
catch e
48+
println(" Warning: solver failed during warm-up: $e")
49+
continue
50+
end
51+
52+
# Benchmark
53+
bd_bench = CuArray(copy(b))
54+
try
55+
bench = @benchmark begin
56+
x = $(solver_fn)($Ad, $bd_bench)
57+
CUDA.synchronize()
58+
end seconds = 5 samples = 10
59+
60+
time_ms = median(bench.times) / 1e9 # Convert to s
61+
62+
# Compute error
63+
bd_err = CuArray(copy(b))
64+
x_sol = solver_fn(Ad, bd_err)
65+
CUDA.synchronize()
66+
error = norm(Ad*x_sol - bd_err) / norm(bd_err)
67+
68+
results[sparsity][solver_name] = (time=time_ms, error=error)
69+
println(" Time: $(round(time_ms, digits=3)) s, Error: $(round(error, sigdigits=3))")
70+
catch e
71+
println(" Error during benchmark: $e")
72+
end
73+
end
74+
end
75+
76+
# Create error-vs-time plot
77+
p = plot(
78+
size=(800, 800),
79+
#legend=:topright,
80+
legend=:bottomright,
81+
xlabel="Time (s)",
82+
ylabel="Relative residual: ||Ax - b||₂ / ||b||₂",
83+
# xscale=:log10,
84+
yscale=:log10,
85+
guidefontsize=22,#18,
86+
tickfontsize=20, #16,
87+
legendfontsize=18, #14,
88+
margin=5*Plots.mm,
89+
framestyle=:box,
90+
title="Random Matrices of Size $(N)x$(N),\n Varying Sparsity Levels (ρ) and\n GPU Solvers",
91+
titlefontsize=22,
92+
)
93+
94+
# Symbols encode sparsity levels; colors encode solvers.
95+
# Define marker for each sparsity and a color for each solver.
96+
## Sparsity shapes
97+
marker_map_sparsity = OrderedDict(0.1=>:circle, 0.5=>:square, 0.9=>:utriangle)
98+
## Solver color shades
99+
color_map_solver = OrderedDict("Default"=>:red, "gesv!"=>:blue, "Generated"=>:green)
100+
101+
# Plot each point individually so marker shape shows sparsity and color shows solver.
102+
for solver_name in keys(solvers)
103+
for sparsity in sparsity_levels
104+
if sparsity in keys(results) && solver_name in keys(results[sparsity])
105+
t = results[sparsity][solver_name].time
106+
e = results[sparsity][solver_name].error
107+
scatter!(p, [t], [e];
108+
label="",
109+
marker=marker_map_sparsity[sparsity],
110+
markersize=15,
111+
color=color_map_solver[solver_name],
112+
markerstrokecolor=:black,
113+
markerstrokewidth=0.0,#0.8,
114+
alpha=0.45)
115+
end
116+
end
117+
end
118+
119+
# Create a combined legend
120+
for solver_name in keys(solvers)
121+
for s in sparsity_levels
122+
lbl = "$(solver_name), ρ:$(s)"
123+
scatter!(p, [NaN], [NaN]; label=lbl,
124+
marker=marker_map_sparsity[s],
125+
markersize=15,
126+
color=color_map_solver[solver_name],
127+
markerstrokecolor=:black,
128+
markerstrokewidth=0.0,
129+
alpha=0.45)
130+
end
131+
end
132+
133+
savefig(p, "error_vs_time.pdf")
134+
println("\n✓ Plot saved as error_vs_time.pdf")
135+
136+
display(p)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
using SmartSolve
2+
using LinearAlgebra
3+
using SparseArrays
4+
using CUDA
5+
using BenchmarkTools
6+
using Dagger
7+
8+
prompt = """
9+
Generate a high-performance Dagger.jl (https://juliaparallel.org/Dagger.jl/dev/) implementation in Julia of a linear solver for sparse matrices
10+
based on LU with iterative refinement (at least 5 refinement iterations), using the following
11+
reference: https://nhigham.com/2023/03/13/what-is-iterative-refinement
12+
"""
13+
14+
secret_key = ENV["OPENAI_API_KEY"]
15+
solver, hist, conv = gen_linear_solver_dagger(prompt, secret_key; max_iters = 5)
16+
17+
println("Generated Code:\n")
18+
println(solver)
19+
write("solver.jl", solver)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This example generates a high-performance Dagger.jl implementation in Julia for solving sparse linear systems using an LU-based method with iterative refinement.

src/Agentic.jl

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ function error_prompt_maker(err_message)
2020
end
2121

2222
proposed_fn(x) = x
23-
function generate_default_code(prompt, secret_key, checker_filename, model = "gpt-5-mini", dev_prompt_fn = dev_prompt_maker; max_iters = 3)
23+
evaluator(x) = (true, "")
24+
function generate_default_code(prompt, secret_key, checker_filename;
25+
model = "gpt-5-mini", dev_prompt_fn = dev_prompt_maker, max_iters = 3)
2426
"""
2527
- checker_fn: proposed_fn -> check : Bool, performance_description : String
2628
"""
@@ -81,18 +83,64 @@ function ls_cuda_dev_prompt_maker(fn_str)
8183
" Assume that LinearAlgebra and SparseArrays is already imported."
8284
end
8385

86+
function ls_dagger_dev_prompt_maker(fn_str)
87+
return "You are a numerical linear algebra expert, and an expert Julia programmer. You are very experienced in GPU programming using CUDA." *
88+
" The user will ask you to generate a function and use the following code the check if your solution is accurate and fast." *
89+
" Make sure the code you produce uses Dagger." *
90+
" Here is the code: \n" * fn_str * "\nOnly return the function. Make sure the function name is proposed_fn. Do not return extra text." *
91+
" Assume that LinearAlgebra and SparseArrays is already imported." *
92+
" Assume that Dagger is already imported." *
93+
" Use the following Dagger.jl documentation: https://juliaparallel.org/Dagger.jl/dev/" *
94+
" Use the following Dagger.jl implementation of Cholesky as an example: https://github.com/JuliaParallel/Dagger.jl/blob/67211816781d59109d74940550ca2d80af96b13d/src/array/cholesky.jl"
95+
end
96+
8497
src_dir = @__DIR__
8598

86-
function gen_linear_solver(prompt, secret_key, checker_filename = src_dir * "/test_performance.jl", model = "gpt-5-mini"; max_iters = 10)
87-
return generate_default_code(prompt, secret_key, checker_filename, model, ls_dev_prompt_maker; max_iters = max_iters)
99+
function gen_linear_solver(prompt, secret_key;
100+
checker_filename = src_dir * "/test_performance.jl",
101+
model = "gpt-5-mini",
102+
max_iters = 10)
103+
return generate_default_code(prompt, secret_key, checker_filename;
104+
model = model,
105+
dev_prompt_fn=ls_dev_prompt_maker,
106+
max_iters = max_iters)
88107
end
89108

90-
function gen_linear_solver_cuda(prompt, secret_key,checker_filename = src_dir *"/test_performance_cuda.jl", model = "gpt-5-mini"; max_iters = 10)
91-
return generate_default_code(prompt, secret_key, checker_filename, model, ls_cuda_dev_prompt_maker; max_iters = max_iters)
109+
function gen_linear_solver_cuda(prompt, secret_key;
110+
checker_filename = src_dir *"/test_performance_cuda.jl",
111+
model = "gpt-5-mini",
112+
max_iters = 10)
113+
return generate_default_code(prompt, secret_key, checker_filename;
114+
model=model,
115+
dev_prompt_fn=ls_cuda_dev_prompt_maker,
116+
max_iters = max_iters)
117+
end
118+
119+
function gen_linear_solver_dagger(prompt, secret_key;
120+
checker_filename = src_dir *"/test_performance_dagger.jl",
121+
model = "gpt-5-mini",
122+
max_iters = 10)
123+
return generate_default_code(prompt, secret_key, checker_filename;
124+
model=model,
125+
dev_prompt_fn=ls_dagger_dev_prompt_maker,
126+
max_iters = max_iters)
92127
end
93128

94129
function printhist(hist)
95130
for (i, (role, message)) in enumerate(hist)
96131
println("Message $i $(role[2]):\n$(message[2])\n")
97132
end
133+
end
134+
135+
function get_report(m_err, m_runtime, m_alloc,
136+
err_threshold, runtime_threshold, alloc_threshold)
137+
report = """
138+
Median error ratio (error_default / error_gen): $(m_err)
139+
Desired median error ratio: >= $err_threshold
140+
Median runtime ratio or speedup (runtime_default / runtime_gen): $(m_runtime)
141+
Desired median runtime ratio: >= $runtime_threshold
142+
Allocation median ratio (alloc_default / alloc_gen): $(m_alloc)
143+
Desired median allocation ratio: >= $alloc_threshold
144+
"""
145+
return report
98146
end

src/SmartSolve.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module SmartSolve
22

3+
34
using MatrixDepot
45
using LinearAlgebra
56
using DataFrames
@@ -13,15 +14,14 @@ using BSON
1314
using SparseArrays
1415
using OpenAI
1516
using CUDA
17+
using Dagger
1618

1719
include("SmartDiscovery.jl")
1820
include("SmartDB.jl")
1921
include("SmartModel.jl")
2022
include("Utils.jl")
2123
include("Agentic.jl")
22-
include("test_performance.jl")
23-
# include("test_performance_cuda.jl")
2424

25-
export generate_default_code, gen_linear_solver, gen_linear_solver_cuda, printhist
25+
export generate_default_code, gen_linear_solver, gen_linear_solver_cuda, gen_linear_solver_dagger, printhist
2626

2727
end # module SmartSolve

src/test_performance.jl

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,6 @@ push!(test_matrices, sprand(N, N, 0.1))
44
push!(test_matrices, sprand(N, N, 0.2))
55
push!(test_matrices, sprand(N, N, 0.3))
66

7-
function get_report(m_err, m_runtime, m_alloc,
8-
err_threshold, runtime_threshold, alloc_threshold)
9-
report = """
10-
Median error ratio (error_default / error_gen): $(m_err)
11-
Desired median error ratio: >= $err_threshold
12-
Median Runtime ratio or speedup (runtime_default / runtime_gen): $(m_runtime)
13-
Desired median runtime ratio: >= $runtime_threshold
14-
Allocation median ratio (alloc_default / alloc_gen): $(m_alloc)
15-
Desired median allocation ratio: >= $alloc_threshold
16-
"""
17-
return report
18-
end
19-
207
function evaluator(proposed_fn, err_threshold=1.0,
218
runtime_threshold=1.1,
229
alloc_threshold=0.0)

src/test_performance_cuda.jl

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,10 @@ push!(test_matrices, sprand(N, N, 0.1))
44
push!(test_matrices, sprand(N, N, 0.2))
55
push!(test_matrices, sprand(N, N, 0.3))
66

7-
function get_report(m_err, m_runtime, m_alloc,
8-
err_threshold, runtime_threshold, alloc_threshold)
9-
report = """
10-
Median error ratio (error_default / error_gen): $(m_err)
11-
Desired median error ratio: >= $err_threshold
12-
Median runtime ratio or speedup (runtime_default / runtime_gen): $(m_runtime)
13-
Desired median runtime ratio: >= $runtime_threshold
14-
Allocation median ratio (alloc_default / alloc_gen): $(m_alloc)
15-
Desired median allocation ratio: >= $alloc_threshold
16-
"""
17-
return report
18-
end
19-
20-
function evaluator_cuda(proposed_fn;
21-
err_threshold::Float64 = 1.0,
22-
runtime_threshold::Float64 = 1.1,
23-
alloc_threshold::Float64 = 0.0)
7+
function evaluator( proposed_fn;
8+
err_threshold::Float64 = 1.0,
9+
runtime_threshold::Float64 = 1.1,
10+
alloc_threshold::Float64 = 0.0)
2411

2512
error_ratios = Float64[]
2613
runtime_ratios = Float64[]

0 commit comments

Comments
 (0)