Skip to content

Introduce cache modifiers #955

Introduce cache modifiers

Introduce cache modifiers #955

name: Iris External Validation Test
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
DOCKER_IMAGE_NAME: ${{ vars.DOCKER_IMAGE_NAME || 'iris-dev-triton-aafec41' }}
jobs:
external-validation-test:
name: External Validation Test
runs-on: [linux-mi325-8gpu-ossci-rad]
timeout-minutes: 180
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Apptainer (if not available)
run: |
if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then
echo "Neither Apptainer nor Docker found, installing Apptainer..."
apt-get update && apt-get install -y software-properties-common
add-apt-repository -y ppa:apptainer/ppa
apt-get update && apt-get install -y apptainer
else
echo "Container runtime already available"
fi
- name: Build Iris container
run: |
bash .github/scripts/container_build.sh
- name: Acquire GPUs
run: |
bash .github/scripts/acquire_gpus.sh 2
- name: Run External Validation Test
run: |
set -e
echo "::group::Running external validation test"
bash .github/scripts/container_exec.sh "
set -e
cd /iris_workspace
pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
wget -O test_iris_distributed.py https://gist.githubusercontent.com/mawad-amd/6375dc078e39e256828f379e03310ec7/raw/0827d023eaf8e9755b17cbe8ab06f2ce258e746a/test_iris_distributed.py
torchrun --nproc_per_node=2 test_iris_distributed.py
"
echo "::endgroup::"
echo "✅ External validation test passed!"
- name: Release GPUs
if: always()
run: |
bash .github/scripts/release_gpus.sh
external-gluon-validation-test:
name: External Gluon Validation Test
runs-on: [linux-mi325-8gpu-ossci-rad]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Apptainer (if not available)
run: |
if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then
echo "Neither Apptainer nor Docker found, installing Apptainer..."
apt-get update && apt-get install -y software-properties-common
add-apt-repository -y ppa:apptainer/ppa
apt-get update && apt-get install -y apptainer
else
echo "Container runtime already available"
fi
- name: Build Iris container
run: |
bash .github/scripts/container_build.sh
- name: Acquire GPUs
run: |
bash .github/scripts/acquire_gpus.sh 2
- name: Run External Gluon Validation Test
run: |
set -e
echo "::group::Running external gluon validation test"
bash .github/scripts/container_exec.sh --gpus "$GPU_DEVICES" "
set -e
cd /iris_workspace
pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
wget -O test_iris_gluon_distributed.py https://gist.githubusercontent.com/mawad-amd/2666dde8ebe2755eb0c4f2108709fcd5/raw/c5544943e2832c75252160bd9084600bf01a6b06/test_iris_gluon_distributed.py
torchrun --nproc_per_node=2 test_iris_gluon_distributed.py
"
echo "::endgroup::"
echo "✅ External gluon validation test passed!"
- name: Release GPUs
if: always()
run: |
bash .github/scripts/release_gpus.sh