Skip to content

Commit 278b4e9

Browse files
committed
Merge branch 'performance'
2 parents b227da6 + 6ca52ac commit 278b4e9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1814
-8
lines changed

.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
/doc/latex
33
/doc/html
44

5-
*.pdf
6-
75
# Created by https://www.gitignore.io
86

97
### Python ###
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import dolfin as df
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
5+
6+
N = 128 # Number of elements
7+
k = 127 # Wave frequency
8+
d = 1 # FE order
9+
10+
# Get mesh and function space (CG or DG)
11+
mesh = df.UnitIntervalMesh(N)
12+
V = df.FunctionSpace(mesh, "CG", d)
13+
# V = df.FunctionSpace(mesh, "DG", d)
14+
15+
# Build mass matrix
16+
u = df.TrialFunction(V)
17+
v = df.TestFunction(V)
18+
a_M = u * v * df.dx
19+
M = df.assemble(a_M)
20+
21+
# Create vector with sine function
22+
u0 = df.Expression('sin(k*pi*x[0])', pi=np.pi, k=k, degree=d)
23+
w = df.interpolate(u0, V)
24+
25+
# Apply mass matrix to this vector
26+
Mw = df.Function(V)
27+
M.mult(w.vector(), Mw.vector())
28+
29+
# Do FFT to get the frequencies
30+
fw = np.fft.fft(w.vector()[:])
31+
fMw = np.fft.fft(Mw.vector()[:])
32+
# Shift to have zero frequency in the middle of the plot
33+
fw2 = np.fft.fftshift(fw)
34+
fMw2 = np.fft.fftshift(fMw)
35+
36+
# Plot
37+
plt.figure()
38+
plt.plot(abs(fw2))
39+
40+
plt.figure()
41+
plt.plot(abs(fMw2))
42+
43+
plt.show()
44+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash -x
2+
#SBATCH --nodes=1
3+
#SBATCH --ntasks-per-node=1
4+
#SBATCH --output=run.out
5+
#SBATCH --error=run.err
6+
#SBATCH --time=00:05:00
7+
#SBATCH --partition=devel
8+
9+
source /p/home/jusers/speck1/juwels/venv/dask/bin/activate
10+
11+
srun --cpu_bind=sockets python dask_test.py
12+
touch ready
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import cupy
2+
import dask.array as da
3+
import time
4+
5+
from dask.distributed import LocalCluster, Client
6+
7+
# generate chunked dask arrays of mamy numpy random arrays
8+
# rs = da.random.RandomState()
9+
# x = rs.normal(10, 1, size=(5000, 5000), chunks=(1000, 1000))
10+
#
11+
# print(f'{x.nbytes / 1e9} GB of data')
12+
#
13+
# t0 = time.time()
14+
# (x + 1)[::2, ::2].sum().compute(scheduler='single-threaded')
15+
# print(time.time() - t0)
16+
#
17+
# t0 = time.time()
18+
# (x + 1)[::2, ::2].sum().compute(scheduler='threads')
19+
# print(time.time() - t0)
20+
21+
if __name__ == '__main__':
22+
c = LocalCluster(n_workers=2, processes=True, threads_per_worker=24)
23+
print(c)
24+
25+
c = Client()
26+
print(c)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from mpi4py import MPI
2+
import numpy as np
3+
import time
4+
5+
6+
def sleep(n):
7+
tmp = np.random.rand(n)
8+
9+
10+
comm = MPI.COMM_WORLD
11+
rank = comm.Get_rank()
12+
13+
comm.Barrier()
14+
15+
t0 = time.time()
16+
17+
if rank == 0:
18+
sbuf = np.empty(40000000)
19+
sbuf[0] = 0
20+
sbuf[1:4] = np.random.rand(3)
21+
req = comm.Isend(sbuf[:], dest=1, tag=99)
22+
sleep(100000000)
23+
req.wait()
24+
print("[%02d] Original data %s" % (rank, sbuf))
25+
else:
26+
rbuf = np.empty(40000000)
27+
sleep(10000000)
28+
comm.Recv(rbuf[:], source=0, tag=99)
29+
print("[%02d] Received data %s" % (rank, rbuf))
30+
31+
t1 = time.time()
32+
33+
print(f'Rank: {rank} -- Time: {t1-t0}')
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash -x
2+
#SBATCH --nodes=2
3+
#SBATCH --ntasks-per-node=24
4+
#SBATCH --cpus-per-task=1
5+
#SBATCH --output=run.out
6+
#SBATCH --error=run.err
7+
#SBATCH --time=00:05:00
8+
#SBATCH --partition=devel
9+
10+
export HWT=1
11+
export PIN=`./correct_pinning.sh`
12+
13+
14+
#export SCOREP_ENABLE_TRACING=1
15+
#export PATH=/p/project/ccstma/scorep/6.0-trunk-mrobefix_intel-parastation-papi/bin:$PATH
16+
#export PATH=/p/project/ccstma/scorep/6.0-trunk-mprobefix_intel-impi-papi/bin:$PATH
17+
18+
#srun python -m scorep --mpp=mpi rma.py
19+
#srun python -m scorep --mpp=mpi isend.py
20+
#srun python -m scorep --mpp=mpi thread.py
21+
22+
#srun python rma.py
23+
#srun python isend.py
24+
#srun --cpu_bind=sockets python thread.py -n 12
25+
#srun --cpu_bind=sockets --hint=multithread python thread.py -n 12
26+
27+
echo -e "\n\nDEFAULT PINNING\n---------------------------\n"
28+
srun --label python thread.py -n 24
29+
echo -e "\n\nSOCKET PINNING\n---------------------------\n"
30+
srun --cpu_bind=sockets python thread.py -n 24
31+
echo -e "\n\nBROEMMEL PINNING\n---------------------------\n"
32+
srun $PIN --label python thread.py -n 24
33+
#srun $PIN --label ./show_affinity_jureca.x
34+
35+
touch ready
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import mpi4py
2+
mpi4py.rc.threaded = True
3+
mpi4py.rc.thread_level = "funneled"
4+
# mpi4py.rc.profile('vt-hyb', logfile='threads')
5+
6+
from mpi4py import MPI
7+
from threading import Thread
8+
9+
MPI.COMM_WORLD.Barrier()
10+
11+
# Understanding the Python GIL
12+
# David Beazley, http://www.dabeaz.com
13+
# PyCon 2010, Atlanta, Georgia
14+
# http://www.dabeaz.com/python/UnderstandingGIL.pdf
15+
16+
# Consider this trivial CPU-bound function
17+
def countdown(n):
18+
while n > 0:
19+
n -= 1
20+
21+
# Run it once with a lot of work
22+
COUNT = 10000000 # 10 millon
23+
tic = MPI.Wtime()
24+
countdown(COUNT)
25+
toc = MPI.Wtime()
26+
print ("sequential: %f seconds" % (toc-tic))
27+
28+
# Now, subdivide the work across two threads
29+
t1 = Thread(target=countdown, args=(COUNT//2,))
30+
t2 = Thread(target=countdown, args=(COUNT//2,))
31+
tic = MPI.Wtime()
32+
for t in (t1, t2): t.start()
33+
for t in (t1, t2): t.join()
34+
toc = MPI.Wtime()
35+
print ("threaded: %f seconds" % (toc-tic))

pySDC/playgrounds/parallel/rma.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from mpi4py import MPI
2+
import numpy as np
3+
import time
4+
5+
6+
def sleep(n):
7+
tmp = np.random.rand(n)
8+
9+
10+
comm = MPI.COMM_WORLD
11+
rank = comm.Get_rank()
12+
13+
t0 = time.time()
14+
15+
if rank == 0:
16+
sbuf = np.empty(40000000)
17+
win = MPI.Win.Create(sbuf, comm=comm)
18+
win.Lock(0, MPI.LOCK_EXCLUSIVE)
19+
sbuf[0] = 0
20+
sbuf[1:4] = np.random.rand(3)
21+
win.Unlock(0)
22+
sleep(100000000)
23+
print("[%02d] Original data %s" % (rank, sbuf))
24+
else:
25+
rbuf = np.empty(40000000)
26+
win = MPI.Win.Create(None, comm=comm)
27+
sleep(1000000)
28+
win.Lock(0, MPI.LOCK_EXCLUSIVE)
29+
win.Get(rbuf, 0)
30+
win.Unlock(0)
31+
print("[%02d] Received data %s" % (rank, rbuf))
32+
33+
t1 = time.time()
34+
35+
win.Free()
36+
37+
print(f'Rank: {rank} -- Time: {t1-t0}')
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from mpi4py import MPI
2+
import numpy as np
3+
import time
4+
5+
def sleep(n):
6+
tmp = np.random.rand(n)
7+
8+
comm = MPI.COMM_WORLD
9+
rank = comm.Get_rank()
10+
11+
if rank == 0:
12+
sbuf = np.empty(4)
13+
win = MPI.Win.Create(sbuf, comm=comm)
14+
else:
15+
rbuf = np.empty(4)
16+
win = MPI.Win.Create(None, comm=comm)
17+
# tmp = np.random.rand(int(10000000/2))
18+
19+
group = win.Get_group()
20+
21+
t0 = time.time()
22+
23+
if rank == 0:
24+
sleep(10000000)
25+
# tmp = np.random.rand(100000000)
26+
for i in range(3):
27+
if i > 0:
28+
sleep(100000000)
29+
win.Wait()
30+
sbuf[0] = i
31+
sbuf[1:] = np.random.rand(3)
32+
print("[%02d] Original data %s" % (rank, sbuf))
33+
win.Post(group.Incl([1]))
34+
win.Wait()
35+
else:
36+
# tmp = np.random.rand(10000)
37+
# tmp = np.random.rand(10000000)
38+
# tmp = np.random.rand(1)
39+
for i in range(3):
40+
win.Start(group.Excl([1]))
41+
win.Get(rbuf, 0)
42+
win.Complete()
43+
sleep(70000000)
44+
print("[%02d] Received data %s" % (rank, rbuf))
45+
46+
t1 = time.time()
47+
group.Free()
48+
win.Free()
49+
50+
print(f'Rank: {rank} -- Time: {t1-t0}')

0 commit comments

Comments
 (0)