Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
506 changes: 506 additions & 0 deletions Demo/mpi_dagger_bench.jl

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.18.14"
[deps]
AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
DistributedNext = "fab6aee4-877b-4bac-a744-3eca44acbb6f"
Expand All @@ -15,6 +16,7 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
MemPool = "f9f48841-c794-520a-933b-121f7ba6ed94"
MetricsTracker = "9a9c6fec-044d-4a27-aa18-2b01ca4026eb"
OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
Expand All @@ -32,12 +34,10 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[weakdeps]
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
GraphViz = "f526b714-d49f-11e8-06ff-31ed36ee7ee0"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"

[extensions]
Expand Down
7 changes: 7 additions & 0 deletions benchmarks/MPI_benchmarks/bench_scaling.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
set -eux

./benchmarks/MPI_benchmarks/weak_scaling/weak_scale.sh

./benchmarks/MPI_benchmarks/strong_scaling/strong_scale.sh


19 changes: 19 additions & 0 deletions benchmarks/MPI_benchmarks/collect_environment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#! /bin/sh

# Linux data-gathering commands; adjust as necessary for your platform.
#
# Be sure to remove any information from the output that would violate
# SC's double-blind review policies.

env | sed "s/$USER/USER/g"
cat /etc/os-release
uname -a
lscpu || cat /proc/cpuinfo
free -h
cat /proc/meminfo
lsblk -a
lspci
lsmod | head -20
julia --project -e 'using InteractiveUtils; versioninfo()'
julia --project -e 'using Pkg; Pkg.status()'
julia --project -e 'using MPI; MPI.versioninfo()'
8 changes: 8 additions & 0 deletions benchmarks/MPI_benchmarks/run-mpi-bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/sh

set -eux

CMD=$1
NP=$2

julia --project -e "using MPI; run(\`\$(mpiexec()) -np $NP julia --project $CMD\`)"
29 changes: 29 additions & 0 deletions benchmarks/MPI_benchmarks/scaling_results/strong_scale_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
benchmark,procs,dtype,size,time,gflops
MPI,2,Float32,18000,3.192444732,608.9377148847694
MPI,2,Float64,18000,4.694455439,414.10553902586526
MPI,4,Float32,18000,10.958503871,177.3964788336205
MPI,4,Float64,18000,14.021654097,138.64270124991373
MPI,8,Float32,18000,14.974417746,129.82140828275513
MPI,8,Float64,18000,16.985438362,114.45097609898235
MPI,16,Float32,18000,17.121160798,113.54370319488429
MPI,16,Float64,18000,20.63605886,94.20403446164623
MPI,32,Float32,18000,20.309791354,95.71737917519918
MPI,32,Float64,18000,25.849663573,75.2040735273054
MPI,64,Float32,18000,25.609332064,75.9098283056259
MPI,64,Float64,18000,33.751518665,57.597408261688365
MPI,81,Float32,18000,32.39996995,60.00005564819976
MPI,81,Float64,18000,69.292101133,28.05514579892253
TCP,2,Float32,18000,11.020147432,176.40417353719482
TCP,2,Float64,18000,19.848274148,97.94302444154249
TCP,4,Float32,18000,10.30064722,188.72600512184127
TCP,4,Float64,18000,19.605200572,99.15736351998389
TCP,8,Float32,18000,10.247436443,189.70598264387792
TCP,8,Float64,18000,17.90332039,108.58321013379351
TCP,16,Float32,18000,10.620012628,183.05062979629443
TCP,16,Float64,18000,18.595432778,104.54179922609381
TCP,32,Float32,18000,10.096993021,192.53256845447115
TCP,32,Float64,18000,18.282867609,106.32905305527883
TCP,64,Float32,18000,10.513992556,184.8964596128253
TCP,64,Float64,18000,19.610685054,99.12963237372891
TCP,81,Float32,18000,11.069212168,175.6222548177288
TCP,81,Float64,18000,18.878335941,102.97517779509462
38 changes: 38 additions & 0 deletions benchmarks/MPI_benchmarks/scaling_results/weak_scale_results.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
benchmark,procs,dtype,size,time,gflops
MPI,1,Float32,256,0.002007487,2.7857741212437905
MPI,4,Float32,512,0.145154067,0.30821900888706527
MPI,9,Float32,768,0.403228874,0.37446461237297207
MPI,16,Float32,1024,0.472109778,0.7581159256001119
MPI,25,Float32,1280,0.951790282,0.7344587141589103
MPI,36,Float32,1536,1.752771286,0.6891712350883411
MPI,49,Float32,1792,2.917590174,0.6574586953394823
MPI,64,Float32,2048,4.934124422,0.5803079301972792
MPI,81,Float32,2304,9.376669704,0.43478800221157926
TCP,1,Float32,256,0.01264368,2.6538501448945246
TCP,4,Float32,512,0.01676833,16.00847884076709
TCP,9,Float32,768,0.034800746,26.03305296961163
TCP,16,Float32,1024,0.062671975,34.26545354602276
TCP,25,Float32,1280,0.102954032,40.739579776729876
TCP,36,Float32,1536,0.204707002,35.40551735499502
TCP,49,Float32,1792,0.269937825,42.63637441696064
TCP,64,Float32,2048,0.3658052,46.96452971144205
TCP,81,Float32,2304,0.475091924,51.48725897517046

MPI,1,Float64,256,0.002090059,2.675716490937975
MPI,4,Float64,512,0.167346827,0.2673444335258694
MPI,9,Float64,768,0.320509358,0.471109314692771
MPI,16,Float64,1024,0.586005239,0.610769183470275
MPI,25,Float64,1280,0.928163809,0.7531544107713282
MPI,36,Float64,1536,1.611667861,0.7495089907981978
MPI,49,Float64,1792,2.82393033,0.6792642895454625
MPI,64,Float64,2048,5.173215466,0.553487777473266
MPI,81,Float64,2304,8.260910867,0.49351258640084295
TCP,1,Float64,256,0.009581393,3.502041091519782
TCP,4,Float64,512,0.023808695,11.27468162366732
TCP,9,Float64,768,0.041865372,21.640071990761246
TCP,16,Float64,1024,0.074942948,28.654912907882945
TCP,25,Float64,1280,0.351894368,11.919213211164552
TCP,36,Float64,1536,0.517019701,14.018338755721805
TCP,49,Float64,1792,0.491128477,23.434133256337322
TCP,64,Float64,2048,0.678788217,25.309616097829227
TCP,81,Float64,2304,0.879469254,27.81357144294211
65 changes: 65 additions & 0 deletions benchmarks/MPI_benchmarks/strong_scaling/DaggerMPI_Strong_scale.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
using Dagger, MPI, LinearAlgebra
using CSV, DataFrames, Logging
disable_logging(LogLevel(2999))

a = Dagger.accelerate!(:mpi)
comm = a.comm
rank = MPI.Comm_rank(comm)
sz = MPI.Comm_size(comm)

mpidagger_all_results = []

# Define constants
# You need to define the MPI workers before running the benchmark
# Example: mpirun -n 4 julia --project benchmarks/DaggerMPI_Weak_scale.jl
datatype = [Float32, Float64]
datasize = 18000

for T in datatype
#println(" Testing data type: $T")
if rank == 0
#blocksize = div(datasize, 4)
A = rand(T, datasize, datasize)
A = A * A'
A[diagind(A)] .+= size(A, 1)
B = copy(A)
@assert ishermitian(B)
DA = distribute(A, Blocks(2000,2000))
DB = distribute(B, Blocks(2000,2000))
else
DA = distribute(nothing, Blocks(2000,2000))
DB = distribute(nothing, Blocks(2000,2000))
end


LinearAlgebra._chol!(DA, UpperTriangular)
elapsed_time = @elapsed chol_DB = LinearAlgebra._chol!(DB, UpperTriangular)

# Store results
result = (
procs = sz,
dtype = T,
size = datasize,
time = elapsed_time,
gflops = (datasize^3 / 3) / (elapsed_time * 1e9)
)
push!(mpidagger_all_results, result)


end

if rank == 0
#= Write results to CSV
mkpath("benchmarks/results")
if !isempty(mpidagger_all_results)
df = DataFrame(mpidagger_all_results)
CSV.write("benchmarks/results/DaggerMPI_Weak_scale_results.csv", df)

end
=#
# Summary statistics
for result in mpidagger_all_results
println(result.procs, ",", result.dtype, ",", result.size, ",", result.time, ",", result.gflops)
end
#println("\nAll Cholesky tests completed!")
end
58 changes: 58 additions & 0 deletions benchmarks/MPI_benchmarks/strong_scaling/DaggerTCP_Strong_scale.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
using Distributed
using Dates

all_results = []

#Define constants
addprocs(1)
number_of_processes = [2, 4, 8, 16, 32, 64, 81]
for target_workers in number_of_processes
current_workers = nworkers()
if current_workers < target_workers
addprocs(target_workers - current_workers)
elseif current_workers > target_workers
rmprocs(workers()[1:(current_workers - target_workers)])
end
@everywhere using Dagger, LinearAlgebra, Random, Test, Logging
@everywhere disable_logging(LogLevel(2999))

#Define constants
datatype = [Float32, Float64]
datasize = 18000
#blocksize = 4

for T in datatype
#println(" Testing data type: $T")

#blocksize = div(datasize, 4)
A = rand(T, datasize, datasize)
A = A * A'
A[diagind(A)] .+= size(A, 1)
B = copy(A)
@assert ishermitian(B)
DA = distribute(A, Blocks(2000,2000))
DB = distribute(B, Blocks(2000,2000))


LinearAlgebra._chol!(DA, UpperTriangular)
elapsed_time = @elapsed chol_DB = LinearAlgebra._chol!(DB, UpperTriangular)

# Store results
result = (
procs = nworkers(),
dtype = T,
size = datasize,
time = elapsed_time,
gflops = (datasize^3 / 3) / (elapsed_time * 1e9)
)
push!(all_results, result)

end
end

# Summary statistics
for result in all_results
println(result.procs, ",", result.dtype, ",", result.size, ",", result.time, ",", result.gflops)
end
#println("\nAll Cholesky tests completed!")

24 changes: 24 additions & 0 deletions benchmarks/MPI_benchmarks/strong_scaling/strong_scale.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

set -eux

CMD="benchmarks/MPI_benchmarks/strong_scaling/DaggerMPI_Strong_scale.jl"
BENCHMARK_NAME="DaggerMPI_Strong_scale"
OUTPUT_FILE="benchmarks/MPI_benchmarks/scaling_results/strong_scale_results.csv"

# Create the CSV header if the file doesn't exist.
if [ ! -f "$OUTPUT_FILE" ]; then
echo "benchmark,procs,dtype,size,time,gflops" > "$OUTPUT_FILE"
fi

for procs in 2 4 8 16 32 64 81; do
echo "Running $BENCHMARK_NAME with $procs processes..."

julia --project -e "using MPI; run(\`\$(mpiexec()) -np $procs julia --project $CMD\`)" | sed "s/^/$BENCHMARK_NAME,/" >> "$OUTPUT_FILE"
done

# RUn the TCP benchmark
DAGGERTCP_NAME="DaggerTCP_Strong_scale"
julia --project benchmarks/MPI_benchmarks/strong_scaling/DaggerTCP_Strong_scale.jl | sed "s/^/$DAGGERTCP_NAME,/" >> "$OUTPUT_FILE"

echo "All benchmarks are complete. Results are in $OUTPUT_FILE"
66 changes: 66 additions & 0 deletions benchmarks/MPI_benchmarks/weak_scaling/DaggerMPI_Weak_scale.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
using Dagger, MPI, LinearAlgebra
using CSV, DataFrames, Logging
disable_logging(LogLevel(2999))

a = Dagger.accelerate!(:mpi)
comm = a.comm
rank = MPI.Comm_rank(comm)
sz = MPI.Comm_size(comm)

mpidagger_all_results = []

# Define constants
# You need to define the MPI workers before running the benchmark
# Example: mpirun -n 4 julia --project benchmarks/DaggerMPI_Weak_scale.jl
datatype = [Float32, Float64]
datasize = 256 * floor(Int, sqrt(sz))

for T in datatype
if rank == 0
#blocksize = div(datasize, 4)
A = rand(T, datasize, datasize)
A = A * A'
A[diagind(A)] .+= size(A, 1)
B = copy(A)
@assert ishermitian(B)
DA = distribute(A, Blocks(256,256))
DB = distribute(B, Blocks(256,256))
else
DA = distribute(nothing, Blocks(256,256))
DB = distribute(nothing, Blocks(256,256))
end


LinearAlgebra._chol!(DA, UpperTriangular)
elapsed_time = @elapsed chol_DB = LinearAlgebra._chol!(DB, UpperTriangular)

# Store results
result = (
procs = sz,
dtype = T,
size = datasize,
time = elapsed_time,
gflops = (datasize^3 / 3) / (elapsed_time * 1e9)
)
push!(mpidagger_all_results, result)


end

if rank == 0
#= Write results to CSV
mkpath("benchmarks/results")
if !isempty(mpidagger_all_results)
df = DataFrame(mpidagger_all_results)
CSV.write("benchmarks/results/DaggerMPI_Weak_scale_results.csv", df)

end
=#
# Summary statistics
for result in mpidagger_all_results
println(result.procs, ",", result.dtype, ",", result.size, ",", result.time, ",", result.gflops)
end
#println("\nAll Cholesky tests completed!")
end
a.comm.finalize()

Loading
Loading