diff --git a/.cscs-ci/container/build.Containerfile b/.cscs-ci/container/build.Containerfile new file mode 100644 index 00000000..fe3e707f --- /dev/null +++ b/.cscs-ci/container/build.Containerfile @@ -0,0 +1,20 @@ +ARG DEPS_IMAGE +FROM $DEPS_IMAGE + +COPY . /oomph +WORKDIR /oomph + +ARG BACKEND +ARG NUM_PROCS +RUN spack -e ci build-env oomph -- \ + cmake -G Ninja -B build \ + -DCMAKE_BUILD_TYPE=Debug \ + -DOOMPH_WITH_TESTING=ON \ + -DOOMPH_WITH_$(echo $BACKEND | tr '[:lower:]' '[:upper:]')=ON \ + -DOOMPH_USE_BUNDLED_LIBS=ON \ + -DOOMPH_USE_BUNDLED_HWMALLOC=OFF \ + -DMPIEXEC_EXECUTABLE="" \ + -DMPIEXEC_NUMPROC_FLAG="" \ + -DMPIEXEC_PREFLAGS="" \ + -DMPIEXEC_POSTFLAGS="" && \ + spack -e ci build-env oomph -- cmake --build build -j$NUM_PROCS diff --git a/.cscs-ci/container/deps.Containerfile b/.cscs-ci/container/deps.Containerfile new file mode 100644 index 00000000..f5867ac5 --- /dev/null +++ b/.cscs-ci/container/deps.Containerfile @@ -0,0 +1,24 @@ +ARG BASE_IMAGE +FROM $BASE_IMAGE + +ARG SPACK_SHA +RUN mkdir -p /opt/spack && \ + curl -fLsS "https://api.github.com/repos/spack/spack/tarball/$SPACK_SHA" | tar --strip-components=1 -xz -C /opt/spack + +ENV PATH="/opt/spack/bin:$PATH" + +ARG SPACK_PACKAGES_SHA +RUN mkdir -p /opt/spack-packages && \ + curl -fLsS "https://api.github.com/repos/spack/spack-packages/tarball/$SPACK_PACKAGES_SHA" | tar --strip-components=1 -xz -C /opt/spack-packages + +RUN spack repo remove --scope defaults:base builtin && \ + spack repo add --scope site /opt/spack-packages/repos/spack_repo/builtin + +ARG SPACK_ENV_FILE +COPY $SPACK_ENV_FILE /spack_environment/spack.yaml + +ARG NUM_PROCS +RUN spack external find --all && \ + spack env create ci /spack_environment/spack.yaml && \ + spack -e ci concretize -f && \ + spack -e ci install --jobs $NUM_PROCS --fail-fast --only=dependencies diff --git a/.cscs-ci/default.yaml b/.cscs-ci/default.yaml new file mode 100644 index 00000000..6c7f56c7 --- /dev/null +++ b/.cscs-ci/default.yaml @@ -0,0 +1,188 @@ +include: + - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' + +variables: + BASE_IMAGE: jfrog.svc.cscs.ch/docker-group-csstaff/alps-images/ngc-pytorch:26.01-py3-alps3 + SPACK_SHA: v1.1.1 + SPACK_PACKAGES_SHA: bc93746ce936d6653271b6e98f6df6ee28f64e84 # develop on 2026-03-25 + FF_TIMESTAMPS: true + +.build_deps_template: + timeout: 1 hour + before_script: + - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true + - export DOCKERFILE_SHA=`sha256sum .cscs-ci/container/deps.Containerfile | head -c 16` + - export ENV_FILE_SHA=`sha256sum ${SPACK_ENV_FILE} | head -c 16` + - export CONFIG_TAG=`echo $DOCKERFILE_SHA-$BASE_IMAGE-$SPACK_SHA-$SPACK_PACKAGES_SHA-$ENV_FILE_SHA | sha256sum - | head -c 16` + - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/oomph-spack-deps-$BACKEND:$CONFIG_TAG + - echo -e "CONFIG_TAG=$CONFIG_TAG" >> base-${BACKEND}.env + - echo -e "DEPS_IMAGE=$PERSIST_IMAGE_NAME" >> base-${BACKEND}.env + variables: + DOCKERFILE: .cscs-ci/container/deps.Containerfile + DOCKER_BUILD_ARGS: '["BASE_IMAGE", "SPACK_SHA", "SPACK_PACKAGES_SHA", "SPACK_ENV_FILE"]' + SPACK_ENV_FILE: .cscs-ci/spack/$BACKEND.yaml + artifacts: + reports: + dotenv: base-${BACKEND}.env + +# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 +# build_deps_nccl: +# variables: +# BACKEND: nccl +# extends: +# - .container-builder-cscs-gh200 +# - .build_deps_template + +build_deps_mpi: + variables: + BACKEND: mpi + extends: + - .container-builder-cscs-gh200 + - .build_deps_template + +build_deps_ucx: + variables: + BACKEND: ucx + extends: + - .container-builder-cscs-gh200 + - .build_deps_template + +build_deps_libfabric: + variables: + BACKEND: libfabric + extends: + - .container-builder-cscs-gh200 + - .build_deps_template + +.build_template: + extends: .container-builder-cscs-gh200 + timeout: 15 minutes + before_script: + - echo $DOCKERHUB_TOKEN | podman login docker.io -u $DOCKERHUB_USERNAME --password-stdin || true + - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/oomph-build-$BACKEND:$CI_COMMIT_SHA + - echo -e "BUILD_IMAGE=$PERSIST_IMAGE_NAME" >> build-${BACKEND}.env + variables: + DOCKERFILE: .cscs-ci/container/build.Containerfile + DOCKER_BUILD_ARGS: '["DEPS_IMAGE", "BACKEND"]' + artifacts: + reports: + dotenv: build-${BACKEND}.env + +# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 +# build_nccl: +# variables: +# BACKEND: nccl +# extends: .build_template +# needs: +# - job: build_deps_nccl +# artifacts: true + +build_mpi: + variables: + BACKEND: mpi + extends: .build_template + needs: + - job: build_deps_mpi + artifacts: true + +build_ucx: + variables: + BACKEND: ucx + extends: .build_template + needs: + - job: build_deps_ucx + artifacts: true + +build_libfabric: + variables: + BACKEND: libfabric + extends: .build_template + needs: + - job: build_deps_libfabric + artifacts: true + +.test_template_base: + extends: .container-runner-clariden-gh200 + variables: + SLURM_JOB_NUM_NODES: 1 + SLURM_GPUS_PER_TASK: 1 + SLURM_TIMELIMIT: '5:00' + SLURM_PARTITION: normal + SLURM_MPI_TYPE: pmix + SLURM_NETWORK: disable_rdzv_get + SLURM_LABELIO: 1 + SLURM_UNBUFFEREDIO: 1 + PMIX_MCA_psec: native + PMIX_MCA_gds: "^shmem2" + USE_MPI: NO + +.test_serial_template: + extends: .test_template_base + variables: + SLURM_NTASKS: 1 + script: + - ctest --test-dir /oomph/build -L "serial" --output-on-failure --timeout 60 --parallel 8 + +.test_parallel_template: + extends: .test_template_base + variables: + SLURM_NTASKS: 4 + script: + - ctest --test-dir /oomph/build -L "parallel-ranks-4" --output-on-failure --timeout 60 + +# TODO: NCCL will be enabled in https://github.com/ghex-org/oomph/pull/55 +# test_serial_nccl: +# extends: .test_serial_template +# needs: +# - job: build_nccl +# artifacts: true +# image: $BUILD_IMAGE + +# test_parallel_nccl: +# extends: .test_parallel_template +# needs: +# - job: build_nccl +# artifacts: true +# image: $BUILD_IMAGE + +test_serial_mpi: + extends: .test_serial_template + needs: + - job: build_mpi + artifacts: true + image: $BUILD_IMAGE + +test_parallel_mpi: + extends: .test_parallel_template + needs: + - job: build_mpi + artifacts: true + image: $BUILD_IMAGE + +test_serial_ucx: + extends: .test_serial_template + needs: + - job: build_ucx + artifacts: true + image: $BUILD_IMAGE + +test_parallel_ucx: + extends: .test_parallel_template + needs: + - job: build_ucx + artifacts: true + image: $BUILD_IMAGE + +test_serial_libfabric: + extends: .test_serial_template + needs: + - job: build_libfabric + artifacts: true + image: $BUILD_IMAGE + +test_parallel_libfabric: + extends: .test_parallel_template + needs: + - job: build_libfabric + artifacts: true + image: $BUILD_IMAGE diff --git a/.cscs-ci/spack/libfabric.yaml b/.cscs-ci/spack/libfabric.yaml new file mode 100644 index 00000000..fac7f88f --- /dev/null +++ b/.cscs-ci/spack/libfabric.yaml @@ -0,0 +1,6 @@ +spack: + specs: + - oomph@main backend=libfabric +cuda + view: false + concretizer: + unify: true diff --git a/.cscs-ci/spack/mpi.yaml b/.cscs-ci/spack/mpi.yaml new file mode 100644 index 00000000..d59aab13 --- /dev/null +++ b/.cscs-ci/spack/mpi.yaml @@ -0,0 +1,6 @@ +spack: + specs: + - oomph@main backend=mpi +cuda + view: false + concretizer: + unify: true diff --git a/.cscs-ci/spack/nccl.yaml b/.cscs-ci/spack/nccl.yaml new file mode 100644 index 00000000..94f0dd31 --- /dev/null +++ b/.cscs-ci/spack/nccl.yaml @@ -0,0 +1,6 @@ +spack: + specs: + - oomph@main backend=nccl +cuda + view: false + concretizer: + unify: true diff --git a/.cscs-ci/spack/ucx.yaml b/.cscs-ci/spack/ucx.yaml new file mode 100644 index 00000000..51377dd8 --- /dev/null +++ b/.cscs-ci/spack/ucx.yaml @@ -0,0 +1,6 @@ +spack: + specs: + - oomph@main backend=ucx +cuda + view: false + concretizer: + unify: true diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5217bbaf..cb4e6f0e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -48,6 +48,7 @@ function(reg_serial_test t) add_test( NAME ${t} COMMAND $) + set_tests_properties(${t} PROPERTIES LABELS "serial") endfunction() foreach(t ${serial_tests}) @@ -61,11 +62,15 @@ function(reg_parallel_test t_ lib n) oomph_target_compile_options(${t}) target_link_libraries(${t} PRIVATE gtest_main_mpi) target_link_libraries(${t} PRIVATE oomph_${lib}) - add_test( - NAME ${t} - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} - $ ${MPIEXEC_POSTFLAGS}) - set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE) + if("${MPIEXEC_EXECUTABLE}" STREQUAL "") + add_test(NAME ${t} COMMAND $) + else() + add_test( + NAME ${t} + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} + $ ${MPIEXEC_POSTFLAGS}) + endif() + set_tests_properties(${t} PROPERTIES RUN_SERIAL TRUE LABELS "parallel-ranks-${n}") endfunction() if (OOMPH_WITH_MPI) diff --git a/test/bindings/fortran/CMakeLists.txt b/test/bindings/fortran/CMakeLists.txt index 974d2f7c..2a5980c5 100644 --- a/test/bindings/fortran/CMakeLists.txt +++ b/test/bindings/fortran/CMakeLists.txt @@ -25,12 +25,17 @@ function(reg_parallel_test_f t_ lib n nthr) $ $ $) - add_test( - NAME ${t} - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} - $ ${MPIEXEC_POSTFLAGS}) + if("${MPIEXEC_EXECUTABLE}" STREQUAL "") + add_test(NAME ${t} COMMAND $) + else() + add_test( + NAME ${t} + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${n} ${MPIEXEC_PREFLAGS} + $ ${MPIEXEC_POSTFLAGS}) + endif() set_tests_properties(${t} PROPERTIES - ENVIRONMENT OMP_NUM_THREADS=${nthr}) + ENVIRONMENT OMP_NUM_THREADS=${nthr} + LABELS "parallel-ranks-${n}") endfunction() if (OOMPH_WITH_MPI)