diff --git a/.github/workflows/iris-tests-apptainer.yml b/.github/workflows/iris-tests-apptainer.yml new file mode 100644 index 00000000..f9b8f3ac --- /dev/null +++ b/.github/workflows/iris-tests-apptainer.yml @@ -0,0 +1,76 @@ +name: Iris Tests with Apptainer + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + build-apptainer-image: + runs-on: [self-hosted, mi3008x] + timeout-minutes: 90 + concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}-build + cancel-in-progress: true + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Apptainer + run: | + apt-get update && apt-get install -y software-properties-common + add-apt-repository -y ppa:apptainer/ppa + apt-get update && apt-get install -y apptainer + + - name: Build Iris Apptainer container + run: | + # Create persistent Apptainer directory + mkdir -p ~/apptainer + + # Build Apptainer image from definition file (only if it doesn't exist) + if [ ! -f ~/apptainer/iris-dev.sif ]; then + echo "Building new Apptainer image..." + apptainer build ~/apptainer/iris-dev.sif apptainer/iris.def + else + echo "Using existing Apptainer image" + fi + run-tests: + name: ${{ matrix.ranks }}-rank Iris Test + needs: build-apptainer-image + runs-on: [self-hosted, mi3008x] + timeout-minutes: 20 + strategy: + matrix: + ranks: [1, 2, 4, 8] + concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}-${{ matrix.ranks }} + cancel-in-progress: true + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Run Iris Tests with ${{ matrix.ranks }} MPI ranks + run: | + apptainer exec ~/apptainer/iris-dev.sif bash -c " + # Install iris first + pip install -e . + + # Create function for mpirun with root permissions + mpirun-root() { mpirun --allow-run-as-root \"\$@\"; } + + # Run examples tests one at a time + echo 'Running examples tests one at a time...' + for test_file in tests/examples/test_*.py; do + echo \"Testing: \$test_file with ${{ matrix.ranks }} MPI ranks\" + mpirun-root -np ${{ matrix.ranks }} python -m pytest \"\$test_file\" -v --tb=short + done + + # Run unit tests one at a time + echo 'Running unit tests one at a time...' + for test_file in tests/unittests/test_*.py; do + echo \"Testing: \$test_file with ${{ matrix.ranks }} MPI ranks\" + mpirun-root -np ${{ matrix.ranks }} python -m pytest \"\$test_file\" -v --tb=short + done + " \ No newline at end of file diff --git a/docker/build.sh b/docker/build.sh index 050a1998..973c9366 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -10,4 +10,4 @@ pushd "$SCRIPT_DIR" > /dev/null docker build -t $IMAGE_NAME . -popd > /dev/null \ No newline at end of file +popd > /dev/null diff --git a/pyproject.toml b/pyproject.toml index 82a5c0dc..b700c83d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "requests", "mpi4py", "ruff", - "triton" + "triton @ git+https://github.com/triton-lang/triton.git@dd5823453bcc7973eabadb65f9d827c43281c434" ] [project.optional-dependencies] @@ -54,4 +54,4 @@ select = ["E", "F", "W"] ignore = ["E501", "E701", "E731", "E741", "F841", "F401"] [tool.ruff.format] -quote-style = "double" \ No newline at end of file +quote-style = "double" diff --git a/tests/examples/test_load_bench.py b/tests/examples/test_load_bench.py index ec2bb472..16d6c403 100644 --- a/tests/examples/test_load_bench.py +++ b/tests/examples/test_load_bench.py @@ -51,6 +51,8 @@ def test_load_bench(dtype, buffer_size, heap_size, block_size): source_buffer = shmem.ones(buffer_size // element_size_bytes, dtype=dtype) result_buffer = shmem.zeros_like(source_buffer) + shmem.barrier() + for source_rank in range(num_ranks): for destination_rank in range(num_ranks): bandwidth_gbps = module.bench_load( diff --git a/tests/unittests/test_atomic_add.py b/tests/unittests/test_atomic_add.py index e09d078e..2bcbea02 100644 --- a/tests/unittests/test_atomic_add.py +++ b/tests/unittests/test_atomic_add.py @@ -75,6 +75,8 @@ def test_atomic_add_api(dtype, sem, scope, BLOCK_SIZE): results = shmem.zeros(BLOCK_SIZE, dtype=dtype) + shmem.barrier() + grid = lambda meta: (1,) atomic_add_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_atomic_and.py b/tests/unittests/test_atomic_and.py index 19c44ce0..29b2f02b 100644 --- a/tests/unittests/test_atomic_and.py +++ b/tests/unittests/test_atomic_and.py @@ -76,6 +76,8 @@ def test_atomic_and_api(dtype, sem, scope, BLOCK_SIZE): results = shmem.full((BLOCK_SIZE,), initial_mask, dtype=dtype) + shmem.barrier() + grid = lambda meta: (1,) atomic_and_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_atomic_cas.py b/tests/unittests/test_atomic_cas.py index 52db3dd0..38eb5aec 100644 --- a/tests/unittests/test_atomic_cas.py +++ b/tests/unittests/test_atomic_cas.py @@ -59,6 +59,8 @@ def test_atomic_cas_api(dtype, sem, scope): results = shmem.zeros((1,), dtype=dtype) + shmem.barrier() + grid = lambda meta: (1,) atomic_cas_kernel[grid](results, sem, scope, cur_rank, num_ranks, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_atomic_max.py b/tests/unittests/test_atomic_max.py index 61359afd..32f381e5 100644 --- a/tests/unittests/test_atomic_max.py +++ b/tests/unittests/test_atomic_max.py @@ -71,6 +71,8 @@ def test_atomic_max_api(dtype, sem, scope, BLOCK_SIZE): min_val = torch.iinfo(dtype).min results = shmem.full((BLOCK_SIZE,), min_val, dtype=dtype) + shmem.barrier() + grid = lambda meta: (1,) atomic_max_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_atomic_min.py b/tests/unittests/test_atomic_min.py index da7ef176..15d862d9 100644 --- a/tests/unittests/test_atomic_min.py +++ b/tests/unittests/test_atomic_min.py @@ -71,6 +71,8 @@ def test_atomic_min_api(dtype, sem, scope, BLOCK_SIZE): max_val = torch.iinfo(dtype).max results = shmem.full((BLOCK_SIZE,), max_val, dtype=dtype) + shmem.barrier() + grid = lambda meta: (1,) atomic_min_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_atomic_or.py b/tests/unittests/test_atomic_or.py index 8d17be65..18705763 100644 --- a/tests/unittests/test_atomic_or.py +++ b/tests/unittests/test_atomic_or.py @@ -71,6 +71,8 @@ def test_atomic_or_api(dtype, sem, scope, BLOCK_SIZE): results = shmem.zeros(BLOCK_SIZE, dtype=dtype) + shmem.barrier() + grid = lambda meta: (1,) atomic_or_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_atomic_xchg.py b/tests/unittests/test_atomic_xchg.py index f11ad798..f47a4dc2 100644 --- a/tests/unittests/test_atomic_xchg.py +++ b/tests/unittests/test_atomic_xchg.py @@ -58,6 +58,8 @@ def test_atomic_xchg_api(dtype, sem, scope): results = shmem.zeros((1,), dtype=dtype) + shmem.barrier() + grid = lambda meta: (1,) atomic_xchg_kernel[grid](results, sem, scope, cur_rank, num_ranks, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_atomic_xor.py b/tests/unittests/test_atomic_xor.py index 702002b9..774df798 100644 --- a/tests/unittests/test_atomic_xor.py +++ b/tests/unittests/test_atomic_xor.py @@ -72,6 +72,8 @@ def test_atomic_xor_api(dtype, sem, scope, BLOCK_SIZE): results = shmem.zeros(BLOCK_SIZE, dtype=dtype) + shmem.barrier() + grid = lambda meta: (1,) atomic_xor_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_get.py b/tests/unittests/test_get.py index f7df2340..75cf1e26 100644 --- a/tests/unittests/test_get.py +++ b/tests/unittests/test_get.py @@ -66,6 +66,8 @@ def test_get_api(dtype, BLOCK_SIZE): data = shmem.ones(BLOCK_SIZE, dtype=dtype) results = shmem.zeros_like(data) + shmem.barrier() + grid = lambda meta: (1,) get_kernel[grid](data, results, cur_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_load.py b/tests/unittests/test_load.py index 4b0f0b36..a379a9e3 100644 --- a/tests/unittests/test_load.py +++ b/tests/unittests/test_load.py @@ -59,6 +59,8 @@ def test_load_api(dtype, BLOCK_SIZE): data = shmem.full((BLOCK_SIZE,), source_rank, dtype=dtype) results = shmem.zeros_like(data) + shmem.barrier() + grid = lambda meta: (1,) load_kernel[grid](data, results, source_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_put.py b/tests/unittests/test_put.py index c41f2f7e..a305af4a 100644 --- a/tests/unittests/test_put.py +++ b/tests/unittests/test_put.py @@ -60,6 +60,8 @@ def test_put_api(dtype, BLOCK_SIZE): data = shmem.ones(BLOCK_SIZE, dtype=dtype) results = shmem.zeros_like(data) + shmem.barrier() + grid = lambda meta: (1,) put_kernel[grid](data, results, cur_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier() diff --git a/tests/unittests/test_store.py b/tests/unittests/test_store.py index 361500a7..fd9762a5 100644 --- a/tests/unittests/test_store.py +++ b/tests/unittests/test_store.py @@ -61,6 +61,8 @@ def test_store_api(dtype, BLOCK_SIZE): src = shmem.ones(BLOCK_SIZE, dtype=dtype) results = shmem.zeros_like(src) + shmem.barrier() + grid = lambda meta: (1,) store_kernel[grid](src, results, destination_rank, num_ranks, BLOCK_SIZE, heap_bases) shmem.barrier()