Merge pull request #25 from gjbex/development

gjbex · web-flow · commit 59d9679d681b · 2025-12-10T15:36:11.000+01:00
Development
diff --git a/docs/README.md b/docs/README.md
@@ -21,20 +21,21 @@ When you complete this training you will
 
 ## Schedule
 
-Total duration: 8 hours.
-
-  | Subject                                     |  Duration |
-  |---------------------------------------------|-----------|
-  | introduction and motivation                 |   5 min.  |
-  | performance and profiling                   |  25 min.  |
-  | libraries                                   |  10 min.  |
-  | Cython                                      |  90 min.  |
-  | interfacing with C/C++/Fortran              |  60 min.  |
-  | multi-threaded programming                  |  60 min.  |
-  | MPI                                         | 120 min.  |
-  | dask                                        |  30 min.  |
-  | pyspark                                     |  20 min.  |
-  | wrap up                                     |  10 min.  |
+Total duration: 4 hours.
+
+  | Subject                                     | Duration |
+  |---------------------------------------------|----------|
+  | introduction and motivation                 |  5 min.  |
+  | performance and profiling                   | 25 min.  |
+  | libraries                                   | 10 min.  |
+  | Cython                                      | 60 min.  |
+  | coffee break                                | 10 min.  |
+  | interfacing with C/C++/Fortran              | 30 min.  |
+  | multi-threaded programming                   | 10 min.  |
+  | MPI                                         | 45 min.  |
+  | dask                                        | 15 min.  |
+  | pyspark                                     | 20 min.  |
+  | wrap up                                     | 10 min.  |
 
 
 ## Training materials
@@ -65,13 +66,6 @@ If you plan to do Python programming in a Linux or HPC environment you should
 be familiar with these as well.
 
 
-## Level
-
-* Introductory: 10 %
-* Intermeidate: 30 %
-* Advanced: 60 %
-
-
 ## Trainer(s)
 
   * Geert Jan Bex ([geertjan.bex@uhasselt.be](mailto:geertjan.bex@uhasselt.be))
diff --git a/python_for_hpc.pptx b/python_for_hpc.pptx
diff --git a/source-code/dask/README.md b/source-code/dask/README.md
@@ -10,6 +10,7 @@ CSV or HDF5 files.
 * `create_csv_data.py`: non-Dask script to generate a large CSV data set for
     experimenting with Dask.
 * `create_csv_data.pbs`: PBS script to run `create_csv_data.py`.
+* `create_csv_data.slurm`: Slurm script to run `create_csv_data.py`.
 * `dask_avg_csv.py`: Dask computation of the average value of columns in
     a large number of CSV files.
 * `dask_avg_csv.pbs`: PBS script to run `dask_avg_csv.py`.
@@ -27,6 +28,8 @@ CSV or HDF5 files.
     futures in a distributed setting.
 * `dask_distr_test.pbs`: PBS script that will launch a scheduler, workers,
     and run the `dask_distr_test.py` script.
+* `dask_distr_test.slurm`: Slurm script that will launch a scheduler, workers,
+    and run the `dask_distr_test.py` script.
 * `dask_sum_aarays.py`: somewhat artificial example of a Dask computation
     on `numpy` arrays.
 * `dask_sum_aarays.pbs`: PBS script to execute `dask_sum_aarays.py`.
diff --git a/source-code/dask/create_csv_data.slurm b/source-code/dask/create_csv_data.slurm
@@ -0,0 +1,13 @@
+#!/usr/bin/env -S bash -l
+#SBATCH --account=lpt2_sysadmin
+#SBATCH --nodes=1
+#SBATCH --cpus-per-task=8
+#SBATCH --time=02:00:00
+
+source .mamba_init.sh
+mamba activate python_for_hpc
+
+DATA_DIR=$VSC_SCRATCH/data/time_series
+mkdir -p $DATA_DIR
+
+./create_csv_data.py --files 800 --rows 200000 --cols 100 $DATA_DIR
diff --git a/source-code/dask/dask_distr_test.slurm b/source-code/dask/dask_distr_test.slurm
@@ -0,0 +1,46 @@
+#!/usr/bin/env -S bash -l
+#SBATCH --account=lpt2_sysadmin
+#SBATCH --cluster=wice
+#SBATCH --time=00:10:00
+#SBATCH --ntasks=1 --cpus-per-task=1
+#SBATCH --mem=1G
+#SBATCH hetjob
+#SBATCH --ntasks=4 --cpus-per-task=8
+
+# file name to store scheduler information for workers and client
+scheduler_file="$(pwd)/scheduler_${SLURM_JOB_ID}.json"
+
+# activate environment that has dask installed
+mamba activate python_for_hpc
+
+# launch dask server process
+echo "launching dask-server"
+srun --exclusive \
+     --het-group=0 \
+    --ntasks=$SLURM_NTASKS_HET_GROUP_0 \
+    --cpus-per-task=$SLURM_CPUS_PER_TASK_HET_GROUP_0 \
+    --mem=$SLURM_MEM_PER_NODE_PACK_GROUP_0 \
+    dask scheduler --scheduler-file $scheduler_file &
+
+# give server time to start
+sleep 5
+
+# launch dask worker processes
+for i in $(seq $SLURM_NTASKS_HET_GROUP_1)
+do
+    echo "launching dask-worker $i"
+    srun --exclusive \
+        --het-group=1 \
+        --ntasks=1 \
+        --cpus-per-task=$SLURM_CPUS_PER_TASK_HET_GROUP_1 \
+        --mem=$SLURM_MEM_PER_NODE_PACK_GROUP_1 \
+        dask worker --scheduler-file $scheduler_file &
+done
+
+# give workers time to start
+sleep 20
+
+# start the client process
+python dask_distr_test.py \
+    --scheduler-file $scheduler_file \
+    --verbose
diff --git a/source-code/dask/launch_scheduler.sh b/source-code/dask/launch_scheduler.sh
@@ -1,10 +1,6 @@
-#!/bin/bash
+#!/usr/bin/env -S bash -l
 
-source "${VSC_DATA}/miniconda3/setenv.sh"
-source activate science 2> /dev/null
-if [ $? -ne 0 ]
-then
-    (>&2 echo '### error: conda environment not sourced correctly' )
-fi
+source ~/.mamba_init.sh
+mamba activate python_for_hpc
 
 nohup dask-scheduler &> "scheduler-${PBS_JOBID}.log" &
diff --git a/source-code/interfacing-c-c++-fortran/Pybind11/README.md b/source-code/interfacing-c-c++-fortran/Pybind11/README.md
@@ -5,7 +5,7 @@ pybind11 is a wrapper generator for C++ code that has a lot of nice features.
 ## What is it?
 
 1. `Simple`: very simple illustration of wrapping C++ functions.
-1. `Spectrum`: illustration of warpping to support the buffer protocol.
+1. `Spectrum`: illustration of wrapping to support the buffer protocol.
 1. `Stats`: illustration of wrapping a C++ class.
 1. `Convolution`: illustration of using the buffer protocol.
 1. `environment.yml`: conda environment specification for this directory.
diff --git a/source-code/ising/.gitignore b/source-code/ising/.gitignore
@@ -1,3 +1,7 @@
 *.pyc
 _ising_cxx.so
 ising_cxx.py
+
+result-domains.txt
+result-magn.txt
+
diff --git a/source-code/ising/src/.gitignore b/source-code/ising/src/.gitignore
@@ -1 +1,2 @@
 ising_cxx_wrap.cxx
+*.o
diff --git a/source-code/profiling/README.md b/source-code/profiling/README.md
@@ -22,5 +22,3 @@ functions use memory.
 1. `run_memory_prof.sh`: Bash shell script to create a memory profile.
     Note that this generates a lot of overhead in terms of CPU time.
 1. `cellular_automata.py`: example code to illustrate snakeviz.
-1. `microbenchmarking.ipynb`: some pitfalls when microbenchmarking
-    Python code.
diff --git a/source-code/profiling/microbenchmarking.ipynb b/source-code/profiling/microbenchmarking.ipynb