Skip to content

Commit b7f8da7

Browse files
authored
Merge pull request #202 from barche/update-mpi
Upgrade linux mpich and open-mpi versions
2 parents 35ece71 + 6613fc3 commit b7f8da7

File tree

5 files changed

+42
-22
lines changed

5 files changed

+42
-22
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ env:
1414
- MPI_IMPL=openmpi
1515
before_install:
1616
- sh ./conf/travis-install-mpi.sh $MPI_IMPL
17-
- export PATH=$HOME/OpenMPI/bin:$PATH
17+
- export PATH=$HOME/OpenMPI/bin:$HOME/MPICH/bin:$PATH
1818
# Work around OpenMPI attempting to create overly long temporary
1919
# file names - and erroring as a result
2020
- export TMPDIR=/tmp

conf/travis-install-mpi.sh

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@ set -x
77

88
MPI_IMPL="$1"
99
os=`uname`
10+
OMPIVER=openmpi-3.0.0
11+
MPICHVER=mpich-3.2.1
1012

1113
case "$os" in
1214
Darwin)
1315
brew update
1416
brew upgrade cmake
17+
brew cask uninstall oclint # Prevent conflict with gcc
1518
case "$MPI_IMPL" in
1619
mpich|mpich3)
1720
brew install mpich
@@ -36,16 +39,19 @@ case "$os" in
3639
sudo apt-get install -y gfortran mpich2 libmpich2-3 libmpich2-dev
3740
;;
3841
mpich|mpich3)
39-
sudo apt-get install -y gfortran libcr0 default-jdk hwloc libmpich10 libmpich-dev
40-
wget -q http://de.archive.ubuntu.com/ubuntu/pool/universe/m/mpich/mpich_3.0.4-6ubuntu1_amd64.deb
41-
sudo dpkg -i ./mpich_3.0.4-6ubuntu1_amd64.deb
42-
# rm -f ./mpich_3.1-1ubuntu_amd64.deb
42+
sudo apt-get install -y gfortran hwloc
43+
wget http://www.mpich.org/static/downloads/3.2.1/$MPICHVER.tar.gz
44+
tar -zxf $MPICHVER.tar.gz
45+
cd $MPICHVER
46+
sh ./configure --prefix=$HOME/MPICH --enable-shared > /dev/null
47+
make -j > /dev/null
48+
sudo make install > /dev/null
4349
;;
4450
openmpi)
4551
sudo apt-get install -y gfortran
46-
wget --no-check-certificate https://www.open-mpi.org/software/ompi/v1.10/downloads/openmpi-1.10.2.tar.gz
47-
tar -zxf openmpi-1.10.2.tar.gz
48-
cd openmpi-1.10.2
52+
wget --no-check-certificate https://www.open-mpi.org/software/ompi/v3.0/downloads/$OMPIVER.tar.gz
53+
tar -zxf $OMPIVER.tar.gz
54+
cd $OMPIVER
4955
sh ./configure --prefix=$HOME/OpenMPI > /dev/null
5056
make -j > /dev/null
5157
sudo make install > /dev/null

src/cman.jl

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,12 @@ end
245245

246246
# Kill a worker
247247
function kill(mgr::MPIManager, pid::Int, config::WorkerConfig)
248-
# Do nothing, as the worker will self-terminate after calling MPI.Finalize
249-
Base.set_worker_state(Base.Worker(pid), Base.W_TERMINATED)
248+
# Exit the worker to avoid EOF errors on the workers
249+
@spawnat pid begin
250+
MPI.Finalize()
251+
exit()
252+
end
253+
Distributed.set_worker_state(Distributed.Worker(pid), Distributed.W_TERMINATED)
250254
end
251255

252256
# Set up a connection to a worker
@@ -418,14 +422,7 @@ end
418422
function stop_main_loop(mgr::MPIManager)
419423
if mgr.mode == TCP_TRANSPORT_ALL
420424
# Shut down all workers
421-
for i in workers()
422-
if i != myid()
423-
@spawnat i begin
424-
MPI.Finalize()
425-
exit()
426-
end
427-
end
428-
end
425+
rmprocs(workers())
429426
# Poor man's flush of the send queue
430427
sleep(1)
431428
put!(mgr.initiate_shutdown, nothing)

test/runtests.jl

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,34 @@ const coverage_opts =
1414
JL_LOG_USER => "user",
1515
JL_LOG_ALL => "all")
1616

17+
# Files to run without mpiexec
18+
juliafiles = ["test_cman_julia.jl"]
19+
# Files to run with mpiexec -n 1
20+
singlefiles = []
21+
1722
function runtests()
1823
nprocs = clamp(Sys.CPU_CORES, 2, 4)
1924
exename = joinpath(JULIA_HOME, Base.julia_exename())
2025
testdir = dirname(@__FILE__)
21-
istest(f) = endswith(f, ".jl") && f != "runtests.jl"
26+
istest(f) = endswith(f, ".jl") && startswith(f, "test_")
2227
testfiles = sort(filter(istest, readdir(testdir)))
28+
29+
extra_args = []
30+
if contains(readlines(open(`mpiexec --version`)[1])[1], "OpenRTE")
31+
push!(extra_args,"--oversubscribe")
32+
end
33+
2334
nfail = 0
2435
print_with_color(:white, "Running MPI.jl tests\n")
2536
for f in testfiles
2637
try
2738
coverage_opt = coverage_opts[Base.JLOptions().code_coverage]
28-
if f == "test_cman_julia.jl"
39+
if f singlefiles
40+
run(`mpiexec $extra_args -n 1 $exename --code-coverage=$coverage_opt $(joinpath(testdir, f))`)
41+
elseif f juliafiles
2942
run(`$exename --code-coverage=$coverage_opt $(joinpath(testdir, f))`)
3043
else
31-
run(`mpiexec -n $nprocs $exename --code-coverage=$coverage_opt $(joinpath(testdir, f))`)
44+
run(`mpiexec $extra_args -n $nprocs $exename --code-coverage=$coverage_opt $(joinpath(testdir, f))`)
3245
end
3346
Base.with_output_color(:green,STDOUT) do io
3447
println(io,"\tSUCCESS: $f")

test/test_cman_julia.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ using MPI
33

44
# Start workers via `mpiexec` that communicate among themselves via MPI;
55
# communicate with the workers via TCP
6-
mgr = MPI.MPIManager(np=4)
6+
if contains(readlines(open(`mpiexec --version`)[1])[1], "OpenRTE")
7+
mgr = MPI.MPIManager(np=4, mpirun_cmd=`mpiexec --oversubscribe -n 4`)
8+
else
9+
mgr = MPI.MPIManager(np=4)
10+
end
711
addprocs(mgr)
812

913
refs = []

0 commit comments

Comments
 (0)