Skip to content
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
553ae69
Add single node CI workflow
mawad-amd Aug 18, 2025
8759a7e
Run on the branch
mawad-amd Aug 18, 2025
79a5acf
Check docker
mawad-amd Aug 18, 2025
4fc89c0
Start the docker daemon
mawad-amd Aug 18, 2025
ba69f6f
Check for version
mawad-amd Aug 18, 2025
76a097a
Install docker
mawad-amd Aug 19, 2025
f9527ad
Add docker setup step
mawad-amd Aug 19, 2025
f629465
Build the container
mawad-amd Aug 19, 2025
b1222ed
Checkout the repo
mawad-amd Aug 19, 2025
9764984
Add ranks matrix
mawad-amd Aug 19, 2025
371387a
Rename runner label
mawad-amd Aug 19, 2025
09d8685
Rerun
mawad-amd Aug 19, 2025
d31b0db
Push to hub
mawad-amd Aug 19, 2025
997c4a8
Change the dockerfile
mawad-amd Aug 19, 2025
82a7f54
Add test
mawad-amd Aug 19, 2025
c11c538
Remove CI on branch
mawad-amd Aug 19, 2025
e5a9c7c
Do not use the cache
mawad-amd Aug 19, 2025
312ad15
Remove unused args
mawad-amd Aug 19, 2025
8af7ddc
Space cleanup
mawad-amd Aug 20, 2025
c29a02c
Disable test
mawad-amd Aug 20, 2025
bd68b83
Use docker prune
mawad-amd Aug 20, 2025
c7b7b3e
Remove bad files
mawad-amd Aug 20, 2025
a634ef4
Desto old docker
mawad-amd Aug 20, 2025
126ec2a
Test new runner
mawad-amd Aug 20, 2025
1b6c31a
Do not delete files
mawad-amd Aug 20, 2025
8da0d08
Go back to default
mawad-amd Aug 20, 2025
7082414
Remove prune command
mawad-amd Aug 20, 2025
1b5b2c3
Add apptainer flow
mawad-amd Aug 20, 2025
4f8d8e8
Remove bad command
mawad-amd Aug 20, 2025
e7da8aa
Fix apptainer install command
mawad-amd Aug 20, 2025
0848a1e
Fix apptianer install command
mawad-amd Aug 20, 2025
8c7d70b
Run tests
mawad-amd Aug 20, 2025
3687b99
Run tests
mawad-amd Aug 20, 2025
3d473ac
Install and test in one go
mawad-amd Aug 20, 2025
f4e3bce
Remove apptainer install
mawad-amd Aug 20, 2025
4ff36d2
Barrier after initialization
mawad-amd Aug 20, 2025
92a243c
Change alias to function
mawad-amd Aug 20, 2025
3459e68
Run one test at a time
mawad-amd Aug 20, 2025
bde698d
Cleanup
mawad-amd Aug 20, 2025
9047677
Use source branch for PRs
mawad-amd Aug 20, 2025
27dd2ce
Revert dockerfile changes
mawad-amd Aug 20, 2025
3ff980c
Remove debug code
mawad-amd Aug 20, 2025
d5bc6b5
Remove unneded if
mawad-amd Aug 20, 2025
0a1179f
Remove rank from concurrency setting
mawad-amd Aug 20, 2025
c0cdfa9
Fix concurrency groups
mawad-amd Aug 20, 2025
db54fc2
Improve name
mawad-amd Aug 20, 2025
b08b56f
Improve name
mawad-amd Aug 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions .github/workflows/iris-tests-apptainer.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
name: Iris Tests with Apptainer

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:

jobs:
build-apptainer-image:
runs-on: [self-hosted, mi3008x]
timeout-minutes: 90
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Setup Apptainer
run: |
apt-get update && apt-get install -y software-properties-common
add-apt-repository -y ppa:apptainer/ppa
apt-get update && apt-get install -y apptainer
apptainer --version

- name: Build Iris Apptainer container
run: |
df -h /
# Create persistent Apptainer directory
mkdir -p ~/apptainer

# Build Apptainer image from definition file (only if it doesn't exist)
if [ ! -f ~/apptainer/iris-dev.sif ]; then
echo "Building new Apptainer image..."
apptainer build ~/apptainer/iris-dev.sif apptainer/iris.def
else
echo "Using existing Apptainer image"
fi
run-tests:
needs: build-apptainer-image
runs-on: [self-hosted, mi3008x]
timeout-minutes: 20
strategy:
matrix:
ranks: [1, 2, 4, 8]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.ranks }}
cancel-in-progress: true
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Run Iris Tests with ${{ matrix.ranks }} MPI ranks
run: |
apptainer exec ~/apptainer/iris-dev.sif bash -c "
# Install iris first
pip install -e .

# Create function for mpirun with root permissions
mpirun-root() { mpirun --allow-run-as-root \"\$@\"; }

# Run examples tests one at a time
echo 'Running examples tests one at a time...'
for test_file in tests/examples/test_*.py; do
if [ -f \"\$test_file\" ]; then
echo \"Testing: \$test_file with ${{ matrix.ranks }} MPI ranks\"
mpirun-root -np ${{ matrix.ranks }} python -m pytest \"\$test_file\" -v --tb=short
fi
done

# Run unit tests one at a time
echo 'Running unit tests one at a time...'
for test_file in tests/unittests/test_*.py; do
if [ -f \"\$test_file\" ]; then
echo \"Testing: \$test_file with ${{ matrix.ranks }} MPI ranks\"
mpirun-root -np ${{ matrix.ranks }} python -m pytest \"\$test_file\" -v --tb=short
fi
done
"
14 changes: 7 additions & 7 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ RUN pip3 install --upgrade pip && \
# This needs sudo, I can only get it to install with sudo
# or using conda, but conda runs into issues with too many requests.
# https://stackoverflow.com/a/54052470/5729690
RUN sudo pip3 install mpi4py
#RUN sudo pip3 install mpi4py

# Clone and install Triton
WORKDIR $TRITON_PATH
RUN git clone https://github.com/triton-lang/triton.git $TRITON_PATH
RUN git checkout dd5823453bcc7973eabadb65f9d827c43281c434
RUN pip3 install -e .
ENV PYTHONPATH=$TRITON_PATH
#WORKDIR $TRITON_PATH
#RUN git clone https://github.com/triton-lang/triton.git $TRITON_PATH
#RUN git checkout dd5823453bcc7973eabadb65f9d827c43281c434
#RUN pip3 install -e .
#ENV PYTHONPATH=$TRITON_PATH

# Install rocprofiler-systems
WORKDIR /workspace
Expand All @@ -58,4 +58,4 @@ RUN echo '#!/bin/bash' > /entrypoint.sh && \
chmod +x /entrypoint.sh

# Set the entrypoint
ENTRYPOINT ["/bin/bash", "-c", "source /entrypoint.sh && exec bash"]
ENTRYPOINT ["/bin/bash", "-c", "source /entrypoint.sh && exec bash"]
2 changes: 1 addition & 1 deletion docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ pushd "$SCRIPT_DIR" > /dev/null

docker build -t $IMAGE_NAME .

popd > /dev/null
popd > /dev/null
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"requests",
"mpi4py",
"ruff",
"triton"
"triton @ git+https://github.com/triton-lang/triton.git@dd5823453bcc7973eabadb65f9d827c43281c434"
]

[project.optional-dependencies]
Expand All @@ -54,4 +54,4 @@ select = ["E", "F", "W"]
ignore = ["E501", "E701", "E731", "E741", "F841", "F401"]

[tool.ruff.format]
quote-style = "double"
quote-style = "double"
2 changes: 2 additions & 0 deletions tests/examples/test_load_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def test_load_bench(dtype, buffer_size, heap_size, block_size):
source_buffer = shmem.ones(buffer_size // element_size_bytes, dtype=dtype)
result_buffer = shmem.zeros_like(source_buffer)

shmem.barrier()

for source_rank in range(num_ranks):
for destination_rank in range(num_ranks):
bandwidth_gbps = module.bench_load(
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_atomic_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def test_atomic_add_api(dtype, sem, scope, BLOCK_SIZE):

results = shmem.zeros(BLOCK_SIZE, dtype=dtype)

shmem.barrier()

grid = lambda meta: (1,)
atomic_add_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_atomic_and.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def test_atomic_and_api(dtype, sem, scope, BLOCK_SIZE):

results = shmem.full((BLOCK_SIZE,), initial_mask, dtype=dtype)

shmem.barrier()

grid = lambda meta: (1,)
atomic_and_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_atomic_cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def test_atomic_cas_api(dtype, sem, scope):

results = shmem.zeros((1,), dtype=dtype)

shmem.barrier()

grid = lambda meta: (1,)
atomic_cas_kernel[grid](results, sem, scope, cur_rank, num_ranks, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_atomic_max.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def test_atomic_max_api(dtype, sem, scope, BLOCK_SIZE):
min_val = torch.iinfo(dtype).min
results = shmem.full((BLOCK_SIZE,), min_val, dtype=dtype)

shmem.barrier()

grid = lambda meta: (1,)
atomic_max_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_atomic_min.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def test_atomic_min_api(dtype, sem, scope, BLOCK_SIZE):
max_val = torch.iinfo(dtype).max
results = shmem.full((BLOCK_SIZE,), max_val, dtype=dtype)

shmem.barrier()

grid = lambda meta: (1,)
atomic_min_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_atomic_or.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def test_atomic_or_api(dtype, sem, scope, BLOCK_SIZE):

results = shmem.zeros(BLOCK_SIZE, dtype=dtype)

shmem.barrier()

grid = lambda meta: (1,)
atomic_or_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_atomic_xchg.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def test_atomic_xchg_api(dtype, sem, scope):

results = shmem.zeros((1,), dtype=dtype)

shmem.barrier()

grid = lambda meta: (1,)
atomic_xchg_kernel[grid](results, sem, scope, cur_rank, num_ranks, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_atomic_xor.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def test_atomic_xor_api(dtype, sem, scope, BLOCK_SIZE):

results = shmem.zeros(BLOCK_SIZE, dtype=dtype)

shmem.barrier()

grid = lambda meta: (1,)
atomic_xor_kernel[grid](results, sem, scope, cur_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def test_get_api(dtype, BLOCK_SIZE):
data = shmem.ones(BLOCK_SIZE, dtype=dtype)
results = shmem.zeros_like(data)

shmem.barrier()

grid = lambda meta: (1,)
get_kernel[grid](data, results, cur_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def test_load_api(dtype, BLOCK_SIZE):
data = shmem.full((BLOCK_SIZE,), source_rank, dtype=dtype)
results = shmem.zeros_like(data)

shmem.barrier()

grid = lambda meta: (1,)
load_kernel[grid](data, results, source_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_put.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def test_put_api(dtype, BLOCK_SIZE):
data = shmem.ones(BLOCK_SIZE, dtype=dtype)
results = shmem.zeros_like(data)

shmem.barrier()

grid = lambda meta: (1,)
put_kernel[grid](data, results, cur_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
2 changes: 2 additions & 0 deletions tests/unittests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def test_store_api(dtype, BLOCK_SIZE):
src = shmem.ones(BLOCK_SIZE, dtype=dtype)
results = shmem.zeros_like(src)

shmem.barrier()

grid = lambda meta: (1,)
store_kernel[grid](src, results, destination_rank, num_ranks, BLOCK_SIZE, heap_bases)
shmem.barrier()
Expand Down
Loading