Introduce cache modifiers #544
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Iris Tests | |
| on: | |
| push: | |
| branches: [ main ] | |
| pull_request: | |
| branches: [ main ] | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} | |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
| env: | |
| DOCKER_IMAGE_NAME: ${{ vars.DOCKER_IMAGE_NAME || 'iris-dev-triton-aafec41' }} | |
| jobs: | |
| test-git: | |
| name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks, git install) | |
| runs-on: [linux-mi325-8gpu-ossci-rad] | |
| timeout-minutes: 180 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # Test each subdirectory with each rank count using git install | |
| - test_dir: examples | |
| num_ranks: 1 | |
| - test_dir: examples | |
| num_ranks: 2 | |
| - test_dir: examples | |
| num_ranks: 4 | |
| - test_dir: examples | |
| num_ranks: 8 | |
| - test_dir: unittests | |
| num_ranks: 1 | |
| - test_dir: unittests | |
| num_ranks: 2 | |
| - test_dir: unittests | |
| num_ranks: 4 | |
| - test_dir: unittests | |
| num_ranks: 8 | |
| - test_dir: ccl | |
| num_ranks: 1 | |
| - test_dir: ccl | |
| num_ranks: 2 | |
| - test_dir: ccl | |
| num_ranks: 4 | |
| - test_dir: ccl | |
| num_ranks: 8 | |
| - test_dir: x | |
| num_ranks: 1 | |
| - test_dir: x | |
| num_ranks: 2 | |
| - test_dir: x | |
| num_ranks: 4 | |
| - test_dir: x | |
| num_ranks: 8 | |
| - test_dir: ops | |
| num_ranks: 1 | |
| - test_dir: ops | |
| num_ranks: 2 | |
| - test_dir: ops | |
| num_ranks: 4 | |
| - test_dir: ops | |
| num_ranks: 8 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Setup Apptainer (if not available) | |
| run: | | |
| if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then | |
| echo "Neither Apptainer nor Docker found, installing Apptainer..." | |
| apt-get update && apt-get install -y software-properties-common | |
| add-apt-repository -y ppa:apptainer/ppa | |
| apt-get update && apt-get install -y apptainer | |
| else | |
| echo "Container runtime already available" | |
| fi | |
| - name: Build Iris container | |
| run: | | |
| bash .github/scripts/container_build.sh | |
| - name: Acquire GPUs | |
| run: | | |
| bash .github/scripts/acquire_gpus.sh "${{ matrix.num_ranks }}" | |
| - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (git install) | |
| env: | |
| GITHUB_REPOSITORY: ${{ github.repository }} | |
| GITHUB_SHA: ${{ github.sha }} | |
| run: | | |
| set -e | |
| echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (install: git)" | |
| bash .github/scripts/run_tests.sh \ | |
| "${{ matrix.test_dir }}" \ | |
| "${{ matrix.num_ranks }}" \ | |
| "" \ | |
| "git" | |
| echo "::endgroup::" | |
| echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (git) passed!" | |
| - name: Release GPUs | |
| if: always() | |
| run: | | |
| bash .github/scripts/release_gpus.sh | |
| test-editable: | |
| name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks, editable install) | |
| needs: [test-git] | |
| runs-on: [linux-mi325-8gpu-ossci-rad] | |
| timeout-minutes: 180 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # Test each subdirectory with each rank count using editable install | |
| - test_dir: examples | |
| num_ranks: 1 | |
| - test_dir: examples | |
| num_ranks: 2 | |
| - test_dir: examples | |
| num_ranks: 4 | |
| - test_dir: examples | |
| num_ranks: 8 | |
| - test_dir: unittests | |
| num_ranks: 1 | |
| - test_dir: unittests | |
| num_ranks: 2 | |
| - test_dir: unittests | |
| num_ranks: 4 | |
| - test_dir: unittests | |
| num_ranks: 8 | |
| - test_dir: ccl | |
| num_ranks: 1 | |
| - test_dir: ccl | |
| num_ranks: 2 | |
| - test_dir: ccl | |
| num_ranks: 4 | |
| - test_dir: ccl | |
| num_ranks: 8 | |
| - test_dir: x | |
| num_ranks: 1 | |
| - test_dir: x | |
| num_ranks: 2 | |
| - test_dir: x | |
| num_ranks: 4 | |
| - test_dir: x | |
| num_ranks: 8 | |
| - test_dir: ops | |
| num_ranks: 1 | |
| - test_dir: ops | |
| num_ranks: 2 | |
| - test_dir: ops | |
| num_ranks: 4 | |
| - test_dir: ops | |
| num_ranks: 8 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Setup Apptainer (if not available) | |
| run: | | |
| if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then | |
| echo "Neither Apptainer nor Docker found, installing Apptainer..." | |
| apt-get update && apt-get install -y software-properties-common | |
| add-apt-repository -y ppa:apptainer/ppa | |
| apt-get update && apt-get install -y apptainer | |
| else | |
| echo "Container runtime already available" | |
| fi | |
| - name: Build Iris container | |
| run: | | |
| bash .github/scripts/container_build.sh | |
| - name: Acquire GPUs | |
| run: | | |
| bash .github/scripts/acquire_gpus.sh "${{ matrix.num_ranks }}" | |
| - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (editable install) | |
| run: | | |
| set -e | |
| echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (install: editable)" | |
| bash .github/scripts/run_tests.sh \ | |
| "${{ matrix.test_dir }}" \ | |
| "${{ matrix.num_ranks }}" \ | |
| "" \ | |
| "editable" | |
| echo "::endgroup::" | |
| echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (editable) passed!" | |
| - name: Release GPUs | |
| if: always() | |
| run: | | |
| bash .github/scripts/release_gpus.sh | |
| test-install: | |
| name: Test ${{ matrix.test_dir }} (${{ matrix.num_ranks }} ranks, pip install) | |
| needs: [test-editable] | |
| runs-on: [linux-mi325-8gpu-ossci-rad] | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # Test each subdirectory with each rank count using pip install | |
| - test_dir: examples | |
| num_ranks: 1 | |
| - test_dir: examples | |
| num_ranks: 2 | |
| - test_dir: examples | |
| num_ranks: 4 | |
| - test_dir: examples | |
| num_ranks: 8 | |
| - test_dir: unittests | |
| num_ranks: 1 | |
| - test_dir: unittests | |
| num_ranks: 2 | |
| - test_dir: unittests | |
| num_ranks: 4 | |
| - test_dir: unittests | |
| num_ranks: 8 | |
| - test_dir: ccl | |
| num_ranks: 1 | |
| - test_dir: ccl | |
| num_ranks: 2 | |
| - test_dir: ccl | |
| num_ranks: 4 | |
| - test_dir: ccl | |
| num_ranks: 8 | |
| - test_dir: x | |
| num_ranks: 1 | |
| - test_dir: x | |
| num_ranks: 2 | |
| - test_dir: x | |
| num_ranks: 4 | |
| - test_dir: x | |
| num_ranks: 8 | |
| - test_dir: ops | |
| num_ranks: 1 | |
| - test_dir: ops | |
| num_ranks: 2 | |
| - test_dir: ops | |
| num_ranks: 4 | |
| - test_dir: ops | |
| num_ranks: 8 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Setup Apptainer (if not available) | |
| run: | | |
| if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then | |
| echo "Neither Apptainer nor Docker found, installing Apptainer..." | |
| apt-get update && apt-get install -y software-properties-common | |
| add-apt-repository -y ppa:apptainer/ppa | |
| apt-get update && apt-get install -y apptainer | |
| else | |
| echo "Container runtime already available" | |
| fi | |
| - name: Build Iris container | |
| run: | | |
| bash .github/scripts/container_build.sh | |
| - name: Acquire GPUs | |
| run: | | |
| bash .github/scripts/acquire_gpus.sh "${{ matrix.num_ranks }}" | |
| - name: Run ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (pip install) | |
| run: | | |
| set -e | |
| echo "::group::Running ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (install: install)" | |
| bash .github/scripts/run_tests.sh \ | |
| "${{ matrix.test_dir }}" \ | |
| "${{ matrix.num_ranks }}" \ | |
| "" \ | |
| "install" | |
| echo "::endgroup::" | |
| echo "✅ ${{ matrix.test_dir }} tests with ${{ matrix.num_ranks }} ranks (install) passed!" | |
| - name: Release GPUs | |
| if: always() | |
| run: | | |
| bash .github/scripts/release_gpus.sh | |
| test-new-examples: | |
| name: New examples (${{ matrix.num_ranks }} ranks, ${{ matrix.install_method }}) | |
| runs-on: [linux-mi325-8gpu-ossci-rad] | |
| timeout-minutes: 180 | |
| permissions: | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - num_ranks: 2 | |
| install_method: editable | |
| - num_ranks: 4 | |
| install_method: editable | |
| - num_ranks: 8 | |
| install_method: editable | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Setup Apptainer (if not available) | |
| run: | | |
| if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then | |
| echo "Neither Apptainer nor Docker found, installing Apptainer..." | |
| apt-get update && apt-get install -y software-properties-common | |
| add-apt-repository -y ppa:apptainer/ppa | |
| apt-get update && apt-get install -y apptainer | |
| else | |
| echo "Container runtime already available" | |
| fi | |
| - name: Build Iris container | |
| run: | | |
| bash .github/scripts/container_build.sh | |
| - name: Acquire GPUs | |
| run: | | |
| bash .github/scripts/acquire_gpus.sh "${{ matrix.num_ranks }}" | |
| - name: Run new examples with ${{ matrix.num_ranks }} ranks (${{ matrix.install_method }}) | |
| env: | |
| GITHUB_REPOSITORY: ${{ github.repository }} | |
| GITHUB_SHA: ${{ github.sha }} | |
| run: | | |
| set -e | |
| echo "::group::Running new examples with ${{ matrix.num_ranks }} ranks (install: ${{ matrix.install_method }})" | |
| bash .github/scripts/run_new_examples.sh \ | |
| "${{ matrix.num_ranks }}" \ | |
| "${{ matrix.install_method }}" | |
| echo "::endgroup::" | |
| echo "✅ New examples with ${{ matrix.num_ranks }} ranks passed!" | |
| - name: Release GPUs | |
| if: always() | |
| run: | | |
| bash .github/scripts/release_gpus.sh |