Skip to content

SparseArray in-place send/recv #624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: yg/faster-mpi
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
506 changes: 506 additions & 0 deletions Demo/mpi_dagger_bench.jl

Large diffs are not rendered by default.

72 changes: 72 additions & 0 deletions benchmarks/DaggerMPI_Strong_scale.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using Dagger, MPI, LinearAlgebra
using CSV, DataFrames

Dagger.accelerate!(:mpi)
comm = MPI.COMM_WORLD
rank = MPI.Comm_rank(comm)
sz = MPI.Comm_size(comm)

mpidagger_all_results = []

# Define constants
# You need to define the MPI workers before running the benchmark
# Example: mpirun -n 4 julia --project benchmarks/DaggerMPI_Strong_scale.jl
datatype = [Float32, Float64]
datasize = [128]
blocksize = 4

for T in datatype
println(" Testing data type: $T")

for N in datasize
A = rand(T, N, N)
A = A * A'
A[diagind(A)] .+= size(A, 1)
B = copy(A)
@assert ishermitian(B)
DA = distribute(A, Blocks(blocksize,blocksize))
DB = distribute(B, Blocks(blocksize,blocksize))


LinearAlgebra._chol!(DA, UpperTriangular)
elapsed_time = @elapsed chol_DB = LinearAlgebra._chol!(DB, UpperTriangular)



# Verify results
#@show chol_DB

#@assert chol_DA isa Cholesky
#@assert chol_DB isa UpperTriangular
#@assert chol_A.L ≈ chol_DA.L
#@assert chol_A.U ≈ chol_DA.U
#@assert UpperTriangular(collect(DB)) ≈ UpperTriangular(collect(chol_DB))

# Store results
result = (
procs = sz,
dtype = T,
size = N,
blocksize = "$(blocksize) x $(blocksize)",
time = elapsed_time,
gflops = (N^3 / 3) / (elapsed_time * 1e9)
)
push!(mpidagger_all_results, result)

end
println()
end

# Write results to CSV
if !isempty(mpidagger_all_results)
df = DataFrame(mpidagger_all_results)
CSV.write("benchmarks/results/DaggerMPI_Weak_scale_results.csv", df)
println("Results written to benchmarks/results/DaggerMPI_Weak_scale_results.csv")
end

# Summary statistics
for result in mpidagger_all_results
println(result.procs, " ", result.dtype, " ", result.size, " ", result.blocksize, " ", result.time, " ", result.gflops)
end
println("\nAll Cholesky tests completed!")

64 changes: 64 additions & 0 deletions benchmarks/DaggerMPI_Weak_scale.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
using Dagger, MPI, LinearAlgebra
using CSV, DataFrames, Logging
disable_logging(LogLevel(2999))

Dagger.accelerate!(:mpi)
comm = MPI.COMM_WORLD
rank = MPI.Comm_rank(comm)
sz = MPI.Comm_size(comm)

mpidagger_all_results = []

# Define constants
# You need to define the MPI workers before running the benchmark
# Example: mpirun -n 4 julia --project benchmarks/DaggerMPI_Weak_scale.jl
datatype = [Float32, Float64]
datasize = [8, 16]
blocksize = 4

for T in datatype
#println(" Testing data type: $T")

for N in datasize
A = rand(T, N, N)
A = A * A'
A[diagind(A)] .+= size(A, 1)
B = copy(A)
@assert ishermitian(B)
DA = distribute(A, Blocks(blocksize,blocksize))
DB = distribute(B, Blocks(blocksize,blocksize))


LinearAlgebra._chol!(DA, UpperTriangular)
elapsed_time = @elapsed chol_DB = LinearAlgebra._chol!(DB, UpperTriangular)

# Store results
result = (
procs = sz,
dtype = T,
size = N,
blocksize = blocksize,
time = elapsed_time,
gflops = (N^3 / 3) / (elapsed_time * 1e9)
)
push!(mpidagger_all_results, result)

end
#println()
end

# Write results to CSV
mkpath("benchmarks/results")
if !isempty(mpidagger_all_results)
df = DataFrame(mpidagger_all_results)
CSV.write("benchmarks/results/DaggerMPI_Weak_scale_results.csv", df)
println("Results written to benchmarks/results/DaggerMPI_Weak_scale_results.csv")
end


# Summary statistics
for result in mpidagger_all_results
println(result.procs, ",", result.dtype, ",", result.size, ",", result.blocksize, ",", result.time, ",", result.gflops)
end
#println("\nAll Cholesky tests completed!")

93 changes: 93 additions & 0 deletions benchmarks/DaggerTCP_Strong_scale.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
using Distributed
using Dates

using Logging
disable_logging(LogLevel(2999))

println("Standalone Dagger.jl Cholesky Test with Multiple Configurations")
base_system_info = Dict(
"julia_version" => string(VERSION),
"num_threads" => Threads.nthreads(),
"hostname" => gethostname(),
"cpu_info" => Sys.cpu_info()[1].model,
"total_memory" => Sys.total_memory(),
"timestamp" => Dates.now()
)
println("Base System Info:")
for (key, value) in base_system_info
println(" $key: $value")
end

# Define constants
DaggerTCP_results = []
number_of_processes = [16, 32, 64]
data_size = [8192]
blocksize = 64

addprocs(1)
for target_procs in number_of_processes
println("TESTING WITH $target_procs PROCESSES")
# Add only missing workers
needed_workers = target_procs - 1
current_workers = nworkers()
if current_workers < needed_workers
addprocs(needed_workers - current_workers)
end
@everywhere using Dagger, LinearAlgebra, Random, Test

println()
println("Active workers: $(nworkers()) (Total processes: $(nprocs()))")

for T in (Float32, Float64)

for N in data_size
println(" Testing data type: $T, size: $N")

try
A = rand(T, N, N)
A = A * A'
A[diagind(A)] .+= size(A, 1)
B = copy(A)
@assert ishermitian(B)
DA = distribute(A, Blocks(blocksize,blocksize))
DB = distribute(B, Blocks(blocksize,blocksize))


LinearAlgebra._chol!(DA, UpperTriangular)
elapsed_time = @elapsed chol_DB = LinearAlgebra._chol!(DB, UpperTriangular)

# Verify results
#@show chol_DA isa Cholesky
#@show chol_A.L ≈ chol_DA.L
#@show chol_A.U ≈ chol_DA.U
#@show UpperTriangular(collect(DA)) ≈ UpperTriangular(collect(A))

# Store results
result = (
procs = nprocs(),
dtype = T,
size = N,
blocksize = "$(blocksize) x $(blocksize)",
time = elapsed_time,
gflops = (N^3 / 3) / (elapsed_time * 1e9)
)
push!(DaggerTCP_results, result)


catch e
println("ERROR: $e")
end
end
println()
end
println()
end
# Clean up workers at the end
if nworkers() > 0
rmprocs(workers())
end
# Summary statistics
for result in DaggerTCP_results
println(result.procs, " ", result.dtype, " ", result.size, " ", result.blocksize, " ", result.time)
end
println("\nAll Cholesky tests completed!")
76 changes: 76 additions & 0 deletions benchmarks/DaggerTCP_Weak_scale.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
using Distributed
using Dates

#=
println("Standalone Dagger.jl Cholesky Test with Multiple Configurations")
base_system_info = Dict(
"julia_version" => string(VERSION),
"num_threads" => Threads.nthreads(),
"hostname" => gethostname(),
"cpu_info" => Sys.cpu_info()[1].model,
"total_memory" => Sys.total_memory(),
"timestamp" => Dates.now()
)
println("Base System Info:")
for (key, value) in base_system_info
println(" $key: $value")
end
=#
all_results = []

#Define constants
addprocs(3)
datasize = [8, 16]
blocksize = 4
@everywhere using Dagger, LinearAlgebra, Random, Test, Logging
@everywhere disable_logging(LogLevel(2999))

#println("\nActive workers: $(nworkers()) (Total processes: $(nprocs()))")

for T in (Float32, Float64)
#println(" Testing data type: $T")

for N in datasize
try
A = rand(T, N, N)
A = A * A'
A[diagind(A)] .+= size(A, 1)
B = copy(A)
@assert ishermitian(A)
DA = distribute(A, Blocks(blocksize, blocksize))
DB = distribute(B, Blocks(blocksize,blocksize))

LinearAlgebra._chol!(DA, UpperTriangular)

elapsed_time = @elapsed LinearAlgebra._chol!(DB, UpperTriangular)

# Store results
result = (
procs = nprocs(),
dtype = T,
size = N,
blocksize = blocksize,
time = elapsed_time,
gflops = 2 * N^3 / elapsed_time * 1e-9
)
push!(all_results, result)


catch e
#println("ERROR: $e")
end
end
#println()
end

#= Clean up workers at the end
if nworkers() > 0
rmprocs(workers())
end
=#
# Summary statistics
for result in all_results
println(result.procs, ",", result.dtype, ",", result.size, ",", result.blocksize, ",", result.time, ",", result.gflops)
end
#println("\nAll Cholesky tests completed!")

5 changes: 5 additions & 0 deletions benchmarks/results/DaggerMPI_Weak_scale_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
procs,dtype,size,blocksize,time,gflops
4,Float32,8,4,0.011709834,1.457464441141238e-5
4,Float32,16,4,0.039333791,3.471146051834498e-5
4,Float64,8,4,0.005815791,2.934539199683528e-5
4,Float64,16,4,0.022347208,6.109637200912675e-5
Loading
Loading