Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions .github/scripts/container_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Universal container build script that works with Apptainer or Docker

set -e

# Check which container runtime is available
if command -v apptainer &> /dev/null; then
CONTAINER_RUNTIME="apptainer"
echo "[INFO] Using Apptainer"
elif command -v docker &> /dev/null; then
CONTAINER_RUNTIME="docker"
echo "[INFO] Using Docker"
else
echo "[ERROR] Neither Apptainer nor Docker is available"
echo "[ERROR] Please install either Apptainer or Docker to continue"
exit 1
fi

# Build based on detected runtime
if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
echo "[INFO] Building with Apptainer..."

# Create persistent Apptainer directory
mkdir -p ~/apptainer

# Build Apptainer image from definition file (only if it doesn't exist)
if [ ! -f ~/apptainer/iris-dev.sif ]; then
echo "[INFO] Building new Apptainer image..."
apptainer build ~/apptainer/iris-dev.sif apptainer/iris.def
else
echo "[INFO] Using existing Apptainer image at ~/apptainer/iris-dev.sif"
fi

elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
echo "[INFO] Building with Docker..."
IMAGE_NAME=${1:-"iris-dev"}
# We don't want to build a docker container for now.
# bash docker/build.sh "$IMAGE_NAME"
fi

echo "[INFO] Container build completed successfully with $CONTAINER_RUNTIME"

120 changes: 120 additions & 0 deletions .github/scripts/container_exec.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Universal container exec script - thin wrapper that executes commands in either Apptainer or Docker
# Usage: container_exec.sh [--gpus GPUS] [--image IMAGE] <command>

set -e

# Parse optional arguments
GPU_DEVICES=""
CUSTOM_IMAGE=""

while [[ $# -gt 0 ]]; do
case $1 in
--gpus)
GPU_DEVICES="$2"
shift 2
;;
--image)
CUSTOM_IMAGE="$2"
shift 2
;;
*)
break
;;
esac
done

# Remaining args are the command
COMMAND="$@"
if [ -z "$COMMAND" ]; then
echo "[ERROR] No command provided"
echo "Usage: $0 [--gpus GPUS] [--image IMAGE] <command>"
exit 1
fi

# Check which container runtime is available
if command -v apptainer &> /dev/null; then
CONTAINER_RUNTIME="apptainer"
echo "[INFO] Using Apptainer"
elif command -v docker &> /dev/null; then
CONTAINER_RUNTIME="docker"
echo "[INFO] Using Docker"
else
echo "[ERROR] Neither Apptainer nor Docker is available"
exit 1
fi

# Execute based on detected runtime
if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
# Find image
if [ -n "$CUSTOM_IMAGE" ]; then
IMAGE="$CUSTOM_IMAGE"
elif [ -f ~/apptainer/iris-dev.sif ]; then
IMAGE=~/apptainer/iris-dev.sif
elif [ -f apptainer/images/iris.sif ]; then
IMAGE="apptainer/images/iris.sif"
else
echo "[ERROR] Apptainer image not found"
exit 1
fi

# Create temporary overlay in workspace
OVERLAY="./iris_overlay_$(date +%s%N).img"
apptainer overlay create --size 1024 --create-dir /var/cache/iris "${OVERLAY}" > /dev/null 2>&1

# Build exec command
EXEC_CMD="apptainer exec --overlay ${OVERLAY} --no-home --cleanenv"

# Add GPU selection if specified
if [ -n "$GPU_DEVICES" ]; then
EXEC_CMD="$EXEC_CMD --env HIP_VISIBLE_DEVICES=${GPU_DEVICES}"
fi

# Add standard flags
EXEC_CMD="$EXEC_CMD --bind ${PWD}:/iris_workspace --cwd /iris_workspace"

# Execute
$EXEC_CMD "$IMAGE" bash -c "$COMMAND"

elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
IMAGE_NAME=${CUSTOM_IMAGE:-${DOCKER_IMAGE_NAME:-"iris-dev"}}

if ! docker image inspect "$IMAGE_NAME" &> /dev/null; then
echo "[ERROR] Docker image $IMAGE_NAME not found"
exit 1
fi

# Build run command with proper GPU access
# Get video and render group IDs from host
VIDEO_GID=$(getent group video | cut -d: -f3)
RENDER_GID=$(getent group render | cut -d: -f3)

RUN_CMD="docker run --rm --network=host --device=/dev/kfd --device=/dev/dri"
RUN_CMD="$RUN_CMD --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
RUN_CMD="$RUN_CMD -v ${PWD}:/iris_workspace -w /iris_workspace"
RUN_CMD="$RUN_CMD --shm-size=16G --ulimit memlock=-1 --ulimit stack=67108864"
RUN_CMD="$RUN_CMD --user $(id -u):$(id -g)"

# Add video and render groups for GPU access
if [ -n "$VIDEO_GID" ]; then
RUN_CMD="$RUN_CMD --group-add $VIDEO_GID"
fi
if [ -n "$RENDER_GID" ]; then
RUN_CMD="$RUN_CMD --group-add $RENDER_GID"
fi

RUN_CMD="$RUN_CMD -e HOME=/iris_workspace"
RUN_CMD="$RUN_CMD --entrypoint bash"

# Add GPU selection if specified
if [ -n "$GPU_DEVICES" ]; then
RUN_CMD="$RUN_CMD -e HIP_VISIBLE_DEVICES=${GPU_DEVICES}"
fi

# Execute
$RUN_CMD "$IMAGE_NAME" -c "$COMMAND"
fi

32 changes: 32 additions & 0 deletions .github/scripts/container_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Universal container run script that works with Apptainer or Docker

set -e

# Check which container runtime is available
if command -v apptainer &> /dev/null; then
CONTAINER_RUNTIME="apptainer"
echo "[INFO] Using Apptainer"
elif command -v docker &> /dev/null; then
CONTAINER_RUNTIME="docker"
echo "[INFO] Using Docker"
else
echo "[ERROR] Neither Apptainer nor Docker is available"
echo "[ERROR] Please install either Apptainer or Docker to continue"
exit 1
fi

# Run based on detected runtime
if [ "$CONTAINER_RUNTIME" = "apptainer" ]; then
echo "[INFO] Running with Apptainer..."
bash apptainer/run.sh "$@"
elif [ "$CONTAINER_RUNTIME" = "docker" ]; then
echo "[INFO] Running with Docker..."
IMAGE_NAME=${1:-"iris-dev"}
WORKSPACE_DIR=${2:-"$(pwd)"}
bash docker/run.sh "$IMAGE_NAME" "$WORKSPACE_DIR"
fi

67 changes: 34 additions & 33 deletions .github/scripts/run_perf_benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,63 +1,64 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Run performance benchmark in a container
# Usage: run_perf_benchmark.sh <example_path> <tflops_threshold> <benchmark_args...>

set -e

# Arguments
EXAMPLE_PATH=$1
TFLOPS_THRESHOLD=$2
shift 2
BENCHMARK_ARGS="$@"

# Create overlay image in workspace (will be auto-cleaned by GitHub Actions)
OVERLAY="iris_overlay_perf_${EXAMPLE_PATH//\//_}.img"
if [ -z "$EXAMPLE_PATH" ] || [ -z "$TFLOPS_THRESHOLD" ]; then
echo "[ERROR] Missing required arguments"
echo "Usage: $0 <example_path> <tflops_threshold> <benchmark_args...>"
exit 1
fi

echo "::group::Creating overlay image"
apptainer overlay create --size 1024 --create-dir /var/cache/iris "${OVERLAY}"
echo "::endgroup::"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

echo "::group::Running performance benchmark"
apptainer exec --overlay "${OVERLAY}" --no-home --cleanenv --env HIP_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
--bind "${PWD}:/iris_workspace" --cwd /iris_workspace \
~/apptainer/iris-dev.sif bash -c "
# Run benchmark in container
"$SCRIPT_DIR/container_exec.sh" --gpus "0,1,2,3,4,5,6,7" "
set -e
pip install -e .
python examples/${EXAMPLE_PATH}/benchmark.py \
--benchmark \
--validate \
-r 8 \
${BENCHMARK_ARGS} \
--output_file perf_result.json
"
echo "::endgroup::"

# Parse JSON and check performance
echo "::group::Validating performance"

# Check if benchmark succeeded
--benchmark \
--validate \
-r 8 \
${BENCHMARK_ARGS} \
--output_file perf_result.json
"

# Validate performance (runs outside container)
echo "Validating performance results..."

SUCCESS=$(jq -r '.success' perf_result.json)
if [ "$SUCCESS" != "true" ]; then
echo "::error::Benchmark failed (success: $SUCCESS)"
jq '.' perf_result.json
exit 1
echo "[ERROR] Benchmark failed (success: $SUCCESS)"
jq '.' perf_result.json
exit 1
fi

TFLOPS=$(jq -r '.tflops' perf_result.json)

if [ -z "$TFLOPS" ] || [ "$TFLOPS" = "null" ]; then
echo "::error::Failed to extract tflops from benchmark output"
jq '.' perf_result.json
exit 1
echo "[ERROR] Failed to extract tflops from benchmark output"
jq '.' perf_result.json
exit 1
fi

echo "::notice::Achieved TFLOPs: $TFLOPS"
echo "[INFO] Achieved TFLOPs: $TFLOPS"

# Convert to integer for comparison
TFLOPS_INT=${TFLOPS%.*}
if (( TFLOPS_INT < TFLOPS_THRESHOLD )); then
echo "::error::Performance regression detected! TFLOPs ($TFLOPS) is below threshold ($TFLOPS_THRESHOLD)"
jq '.' perf_result.json
exit 1
echo "[ERROR] Performance regression detected! TFLOPs ($TFLOPS) is below threshold ($TFLOPS_THRESHOLD)"
jq '.' perf_result.json
exit 1
fi

echo "✅ Performance test passed! TFLOPs: $TFLOPS (threshold: >$TFLOPS_THRESHOLD)"
echo "::endgroup::"

48 changes: 32 additions & 16 deletions .github/scripts/run_tests.sh
Original file line number Diff line number Diff line change
@@ -1,28 +1,44 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Run Iris tests in a container
# Usage: run_tests.sh <num_ranks> [gpu_devices]

set -e # Exit on any error
set -e

# Get num_ranks from command line argument
NUM_RANKS=$1
GPU_DEVICES=${2:-""}

if [ -z "$NUM_RANKS" ]; then
echo "Error: NUM_RANKS not provided"
echo "Usage: $0 <num_ranks>"
echo "[ERROR] NUM_RANKS not provided"
echo "Usage: $0 <num_ranks> [gpu_devices]"
exit 1
fi

# Run examples tests one at a time using distributed wrapper
echo 'Running examples tests one at a time...'
for test_file in tests/examples/test_*.py; do
echo "Testing: $test_file with $NUM_RANKS ranks"
python tests/run_tests_distributed.py --num_ranks $NUM_RANKS "$test_file" -v --tb=short --durations=10
done
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# Build GPU argument if provided
GPU_ARG=""
if [ -n "$GPU_DEVICES" ]; then
GPU_ARG="--gpus $GPU_DEVICES"
fi

# Run tests in container
"$SCRIPT_DIR/container_exec.sh" $GPU_ARG "
set -e
pip install -e .

# Run examples tests
for test_file in tests/examples/test_*.py; do
echo \"Testing: \$test_file with $NUM_RANKS ranks\"
python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
done

# Run unit tests
for test_file in tests/unittests/test_*.py; do
echo \"Testing: \$test_file with $NUM_RANKS ranks\"
python tests/run_tests_distributed.py --num_ranks $NUM_RANKS \"\$test_file\" -v --tb=short --durations=10
done
"

# Run unit tests one at a time using distributed wrapper
echo 'Running unit tests one at a time...'
for test_file in tests/unittests/test_*.py; do
echo "Testing: $test_file with $NUM_RANKS ranks"
python tests/run_tests_distributed.py --num_ranks $NUM_RANKS "$test_file" -v --tb=short --durations=10
done
Loading
Loading