Add GitLab CI pipeline that runs on HPC

brownbaerchen · web-flow · commit 73c927442fa7 · 2024-04-24T11:42:27.000+02:00
diff --git a/.github/workflows/ci_pipeline.yml b/.github/workflows/ci_pipeline.yml
@@ -37,24 +37,6 @@ jobs:
         run: |
           flakeheaven lint --benchmark pySDC
 
-#   mirror_to_gitlab:
-
-#     runs-on: ubuntu-latest
-
-#     steps:
-#       - name: Checkout
-#         uses: actions/checkout@v1
-
-#       - name: Mirror
-#         uses: jakob-fritz/github2lab_action@main
-#         env:
-#           MODE: 'mirror' # Either 'mirror', 'get_status', or 'both'
-#           GITLAB_TOKEN: ${{ secrets.GITLAB_SECRET_H }}
-#           FORCE_PUSH: "true"
-#           GITLAB_HOSTNAME: "codebase.helmholtz.cloud"
-#           GITLAB_PROJECT_ID: "3525"
-#           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
   user_cpu_tests_linux:
     runs-on: ubuntu-latest
 
@@ -215,24 +197,6 @@ jobs:
 #        run: |
 #          pytest --continue-on-collection-errors -v --durations=0 pySDC/tests -m ${{ matrix.env }}
 
-
-#   wait_for_gitlab:
-#     runs-on: ubuntu-latest
-
-#     needs:
-#       - mirror_to_gitlab
-
-#     steps:
-#       - name: Wait
-#         uses: jakob-fritz/github2lab_action@main
-#         env:
-#           MODE: 'get_status' # Either 'mirror', 'get_status', or 'both'
-#           GITLAB_TOKEN: ${{ secrets.GITLAB_SECRET_H }}
-#           FORCE_PUSH: "true"
-#           GITLAB_HOSTNAME: "codebase.helmholtz.cloud"
-#           GITLAB_PROJECT_ID: "3525"
-#           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
 # #      - name: Get and prepare artifacts
 # #        run: |
 # #          pipeline_id=$(curl --header "PRIVATE-TOKEN: ${{ secrets.GITLAB_SECRET_H }}" --silent "https://gitlab.hzdr.de/api/v4/projects/3525/repository/commits/${{ github.head_ref || github.ref_name }}" | jq '.last_pipeline.id')
@@ -393,4 +357,3 @@ jobs:
 #          rm -rf data
 #          unzip artifacts.zip
 #
-
diff --git a/.github/workflows/gitlab_ci.yml b/.github/workflows/gitlab_ci.yml
@@ -0,0 +1,116 @@
+---
+
+name: Mirror to Gitlab to trigger CI
+
+on:
+  push:
+  pull_request_target:
+  schedule:
+    - cron: '1 5 2 * *'
+
+jobs:
+  check_permission:
+    runs-on: ubuntu-latest
+    if: >-
+        (github.repository_owner == 'Parallel-in-Time') &&
+        ((github.event_name == 'push') ||
+         (github.event_name == 'schedule') ||
+         ((github.event_name == 'pull_request_target') &&
+          (contains(github.event.pull_request.labels.*.name, 'gitlab-mirror'))
+         )
+        )
+    steps:
+      - name: Query permissions of triggering actor
+        id: query_permission_triggering_actor
+        if: github.event_name == 'pull_request_target'
+        uses: actions-cool/check-user-permission@v2
+        with:
+          username: ${{ github.triggering_actor }}
+          require: 'write'
+          token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Interpret the queried result
+        if: github.event_name == 'pull_request_target'
+        run: |
+          echo "Current permission level is ${{ steps.query_permission_triggering_actor.outputs.user-permission }}"
+          echo "Job originally triggered by ${{ github.actor }}"
+          echo "Checking permission returned ${{ steps.query_permission_triggering_actor.outputs.require-result }}"
+          if ${{ steps.query_permission_triggering_actor.outputs.require-result }}
+            then
+              echo 'Permissions granted'
+              exit 0
+            else
+              echo 'Not enough permissions. Please ask a member of Parallel-in-Time to rerun the job.'
+              exit 1
+          fi
+      - name: Pass if workflow from push or schedule
+        if: >-
+          (github.event_name == 'push') ||
+          (github.event_name == 'schedule')
+        run: exit 0
+      # - name: Fail for other triggers
+      #   if: >-
+      #     (github.event_name != 'push') &&
+      #     (github.event_name != 'schedule') &&
+      #     (github.event_name != 'pull_request_target')
+      #   run: exit 1
+
+  mirror_to_gitlab:
+    runs-on: ubuntu-latest
+    if: >-
+        (github.repository_owner == 'Parallel-in-Time') &&
+        ((github.event_name == 'push') ||
+         (github.event_name == 'schedule') ||
+         ((github.event_name == 'pull_request_target') &&
+          (contains(github.event.pull_request.labels.*.name, 'gitlab-mirror'))
+         )
+        )
+    needs:
+      - check_permission
+    steps:
+      - name: set proper sha
+        run: |
+          echo "${{ github.event_name }}"
+          if [ "${{ github.event_name }}" == 'push' ] || [ "${{ github.event_name }}" == 'schedule' ]
+          then
+            echo "USED_SHA=${{ github.sha }}" >> "$GITHUB_ENV"
+          fi
+          if [ "${{ github.event_name }}" == 'pull_request_target' ]
+          then
+            echo "USED_SHA=${{ github.event.pull_request.head.sha }}" >> "$GITHUB_ENV"
+          fi
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: "${{ env.USED_SHA }}"
+          persist-credentials: false
+      - name: check if merge is possible (merge is used for testing)
+        if: github.event_name == 'pull_request_target'
+        run: |
+          if $(git rev-parse --is-shallow-repository); then
+            git fetch --unshallow
+          else
+            git fetch
+          fi
+          echo "Checkout of ${{ github.base_ref }}"
+          git checkout "${{ github.base_ref }}"
+          echo "Git pull"
+          git pull
+          MIRROR_BRANCH="TEMPORARY_MERGE_PR_${{ github.event.number }}"
+          echo MIRROR_BRANCH="$MIRROR_BRANCH" >> $GITHUB_ENV
+          echo "Create new branch $MIRROR_BRANCH and check it out"
+          git checkout -b "$MIRROR_BRANCH"
+          echo "Setting git committer info, so that merge-commit can be created"
+          git config user.email "unused@example.com"
+          git config user.name "Sync bot"
+          echo "Merge the two parts of the Merge-Request to test the resulting version"
+          git merge "${{ github.event.pull_request.head.sha }}"
+      - name: Mirror and wait for Gitlab-CI
+        uses: jakob-fritz/github2lab_action@v0.7
+        env:
+          MODE: 'all'  # Either 'mirror', 'get_status', 'get_artifact', or 'all'
+          GITLAB_TOKEN: ${{ secrets.GITLAB_SECRET }}
+          FORCE_PUSH: "true"
+          GITLAB_HOSTNAME: "gitlab.jsc.fz-juelich.de"
+          GITLAB_PROJECT_ID: "6029"
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          MIRROR_BRANCH: ${{ env.MIRROR_BRANCH }}
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -1,8 +1,84 @@
+---
+
 stages:
   - test
   - benchmark
   - upload
 
+
+variables:
+  JUWELS_ACCOUNT: "cstma"
+
+
+prepare_JUWELS:
+  stage: benchmark
+  rules:
+    - if: $CI_COMMIT_MESSAGE !~ /.*\[CI-no-benchmarks\]/
+  tags:
+    - jacamar
+    - juwels
+    - login
+    - shell
+  script:
+    - mkdir -p benchmarks
+    # load the latest Python module (currently 3.11)
+    - module --force purge
+    - module load Stages/2024
+    - module load GCC
+    - module load OpenMPI
+    - module load FFTW
+    - module load mpi4py
+    - module load SciPy-Stack
+    - module load CuPy
+    - pip install -e .
+    - pip install pytest-benchmark coverage
+
+
+test_JUWELS:
+  stage: benchmark
+  needs:
+    - prepare_JUWELS
+  rules:
+    - if: $CI_COMMIT_MESSAGE !~ /.*\[CI-no-benchmarks\]/
+  tags:
+    - jacamar
+    - juwels
+    - login
+    - shell
+  parallel:
+    matrix:
+      - SHELL_SCRIPT: ['benchmark', 'cupy']
+  artifacts:
+    when: always
+    paths:
+      - coverage_*.dat
+      - sbatch.err
+      - sbatch.out
+  before_script:
+    - mkdir -p benchmarks
+    # load the latest Python module (currently 3.11)
+    - module --force purge
+    - module load Stages/2024
+    - module load GCC
+    - module load OpenMPI
+    - module load FFTW
+    - module load mpi4py
+    - module load SciPy-Stack
+    - module load CuPy
+  script:
+    # - touch benchmarks/output.json
+    - echo $SYSTEMNAME
+    - sbatch --wait etc/juwels_${SHELL_SCRIPT}.sh
+    - touch .coverage.empty
+    - python -m coverage combine
+    - mv .coverage coverage_${SHELL_SCRIPT}.dat
+  after_script:
+    - echo "Following Errors occured:"
+    - cat sbatch.err
+    - echo "Following was written to stdout:"
+    - cat sbatch.out
+
+
 #test_kit:
 #  image: rcaspart/micromamba-cuda
 #  stage: benchmark
@@ -64,6 +140,9 @@ stages:
 benchmark:
   image: mambaorg/micromamba
   stage: benchmark
+  when: manual
+  tags:
+    - docker
   rules:
     - if: $CI_COMMIT_MESSAGE !~ /.*\[CI-no-benchmarks\]/
   artifacts:
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -61,7 +61,7 @@ representative at an online or offline event.
 ## Enforcement
 
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported to the community leaders responsible for enforcement 
+reported to the community leaders responsible for enforcement
 [here](mailto:r.speck@fz-juelich.de).
 All complaints will be reviewed and investigated promptly and fairly.
 
@@ -118,15 +118,15 @@ the community.
 
 This Code of Conduct is adapted from the [Contributor Covenant][homepage],
 version 2.0, available at
-https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+<https://www.contributor-covenant.org/version/2/0/code_of_conduct.html>.
 
 Community Impact Guidelines were inspired by [Mozilla's code of conduct
-enforcement ladder](https://github.com/mozilla/diversity).
+enforcement ladder](<https://github.com/mozilla/diversity>).
 
-[homepage]: https://www.contributor-covenant.org
+[homepage]: <https://www.contributor-covenant.org>
 
 For answers to common questions about this code of conduct, see the FAQ at
-https://www.contributor-covenant.org/faq. Translations are available at
-https://www.contributor-covenant.org/translations.
+<https://www.contributor-covenant.org/faq>. Translations are available at
+<https://www.contributor-covenant.org/translations>.
 
-:arrow_left: [Back to main page](./README.md)
+:arrow_left: [Back to main page](./README.md)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -16,4 +16,4 @@ This follows a specific OOP framework, you can look at the page on [custom imple
 5. [Custom Implementations](./docs/contrib/04_custom_implementations.md)
 6. [Documenting Code](./docs/contrib/05_documenting_code.md)
 
-:arrow_left: [Back to main page](./README.md)
+:arrow_left: [Back to main page](./README.md)
diff --git a/README.md b/README.md
@@ -4,6 +4,7 @@
 [![zenodo](https://zenodo.org/badge/26165004.svg)](https://zenodo.org/badge/latestdoi/26165004)
 [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu)
 [![SQAaaS badge shields.io](https://img.shields.io/badge/sqaaas%20software-silver-lightgrey)](https://api.eu.badgr.io/public/assertions/aS8J0NDTTjCyYP6iVufviQ "SQAaaS silver badge achieved")
+
 # Welcome to pySDC!
 
 The `pySDC` project is a Python implementation of the
@@ -95,7 +96,6 @@ Checkout the [Changelog](./CHANGELOG.md) to see pySDC's evolution since 2016.
 Any contribution is dearly welcome ! If you want to take part of this, please take the time to read our [Contribution Guidelines](./CONTRIBUTING.md)
 (and don't forget to take a peek at our nice [Code of Conduct](./CODE_OF_CONDUCT.md) :wink:).
 
-
 ## Acknowledgements
 
 This project has received funding from the [European High-Performance
diff --git a/docs/contrib/02_continuous_integration.md b/docs/contrib/02_continuous_integration.md
@@ -110,6 +110,37 @@ pytest -v pySDC/tests
 > pytest -v pySDC/tests/test_nodes.py::test_nodesGeneration[LEGENDRE]   # only test_nodesGeneration with LEGENDRE nodes
 > ```
 
+## Running CI on HPC from pull requests
+
+By syncing the GitHub repository to a certain Gitlab instance, CI-Jobs can be run on HPC machines. This can be helpful for benchmarks or when running on accelerators that are not available as GitHub runners.
+
+For security and accounting reasons, a few extra steps are needed in order to run the contents of a pull request on HPC:
+
+- The pull request needs to have the tag "gitlab-mirror" assigned to it.
+- A person with write-permission for the Parallel-in-Time pySDC repository needs to trigger the workflow. Ask for someone with the required permissions to rerun the workflow if needed.
+- The workflow checks if the code can be merged. If this is not the case, the code is not mirrored and the workflow fails. In this case, please merge upstream changes, fix all conflicts, and rerun the workflow.
+
+> :bell: Note that direct pushes to Parallel-in-Time/pySDC will always trigger the HPC pipeline on Gitlab
+
+Regardless of why the Gitlab pipeline was triggered, the following holds true:
+
+- The return-state from Gitlab is transmitted to GitHub (Success/Failure) leading to the same result in GitHub
+- Logs from Gitlab are also transferred. The full logs of all jobs can be read from within GitHub. For better overview, these are folded, so unfolding is needed before reading.
+- Artifacts from Gitlab jobs are also transferred back to GitHub
+- Information, such as coverage is transferred to GitHub, but not yet merged across multiple GitHub workflows. Therefore, there is no complete summary of e.g. coverage-reports across all jobs in all workflows.
+
+> :warning: The coverage report from the HPC tests is not yet merged with other reports. The test coverage will not show up on the respective website or in the badge. We are working on this.
+
+### HPC test environments
+
+In order to run tests on GPUs, please use the pytest marker `cupy`.
+
+If you want to create a new HPC test environment, the following steps need to be completed:
+
+- Create a new slurm job-script in `etc/juwels_*.sh`. The name and location of the file is important.
+- Adapt `.gitlab-ci.yml` to include the new job-script. For this, add a name in the job "test_JUWELS" in the section `parallel: matrix: SHELL_SCRIPT`. The name there must match the name of the newly created file.
+As a starting point it is recommended to copy and adapt an existing file (e.g. `etc/juwels_cupy.sh`).
+
 ## Code coverage
 
 This stage allows to checks how much of the `pySDC` code is tested by the previous stage. It is based on the [coverage](https://pypi.org/project/coverage/) library and currently applied to the following directories :
diff --git a/etc/juwels_benchmark.sh b/etc/juwels_benchmark.sh
@@ -0,0 +1,9 @@
+#!/bin/bash -x
+#SBATCH --account=cstma
+#SBATCH --nodes=1
+#SBATCH --time=00:10:00
+#SBATCH --partition=devel
+#SBATCH --output=sbatch.out
+#SBATCH --error=sbatch.err
+
+srun python -m pytest --continue-on-collection-errors -v pySDC/tests -m "benchmark" --benchmark-json=benchmarks.json
diff --git a/etc/juwels_cupy.sh b/etc/juwels_cupy.sh
@@ -0,0 +1,9 @@
+#!/bin/bash -x
+#SBATCH --account=cstma
+#SBATCH --nodes=1
+#SBATCH --time=00:10:00
+#SBATCH --partition=develgpus
+#SBATCH --output=sbatch.out
+#SBATCH --error=sbatch.err
+
+srun python -m coverage run -m pytest --continue-on-collection-errors -v pySDC/tests -m "cupy"