|
| 1 | +trigger: |
| 2 | + push: |
| 3 | + branches: ["master", "release/stable"] |
| 4 | + pull_request: |
| 5 | + branches: ["master", "release/stable"] |
| 6 | + |
| 7 | +timeout: "90" # minutes |
| 8 | +parametrize: |
| 9 | + matrix: |
| 10 | + PACKAGE_NAME: ["fabric", "pytorch"] |
| 11 | +image: "nvidia/cuda:12.1.1-runtime-ubuntu22.04" |
| 12 | +machine: "L4_X_2" |
| 13 | +env: |
| 14 | + TZ: "Etc/UTC" |
| 15 | + DEBIAN_FRONTEND: "noninteractive" |
| 16 | + python_version: "3.12" |
| 17 | + MKL_THREADING_LAYER: "GNU" |
| 18 | + CUDA_LAUNCH_BLOCKING: "1" |
| 19 | + NCCL_DEBUG: "INFO" |
| 20 | + TORCHDYNAMO_VERBOSE: "1" |
| 21 | + FREEZE_REQUIREMENTS: "1" |
| 22 | + RUN_ONLY_CUDA_TESTS: "1" |
| 23 | + |
| 24 | +run: | |
| 25 | + echo "Installing dependencies" |
| 26 | + apt-get update -qq --fix-missing -o=Dpkg::Use-Pty=0 &> /dev/null |
| 27 | + apt-get install -q -y software-properties-common curl |
| 28 | + echo "Add deadsnakes PPA for newer Python versions if needed" |
| 29 | + add-apt-repository ppa:deadsnakes/ppa -y |
| 30 | + apt-get update -qq --fix-missing -o=Dpkg::Use-Pty=0 &> /dev/null |
| 31 | + echo "Install Python ${python_version} and other dependencies" |
| 32 | + apt-get install -q -y --no-install-recommends --allow-downgrades --allow-change-held-packages \ |
| 33 | + build-essential \ |
| 34 | + pkg-config \ |
| 35 | + cmake \ |
| 36 | + ca-certificates \ |
| 37 | + libopenmpi-dev \ |
| 38 | + openmpi-bin |
| 39 | +
|
| 40 | + echo "Install Python ${python_version} and UV" |
| 41 | + apt-get install -y python${python_version} python${python_version}-venv python${python_version}-dev |
| 42 | + ln -sf /usr/bin/python${python_version} /usr/bin/python |
| 43 | + curl -LsSf https://astral.sh/uv/install.sh | sh |
| 44 | +
|
| 45 | + echo "Source the environment and ensure UV is in PATH" |
| 46 | + [ -f "$HOME/.local/bin/env" ] && . "$HOME/.local/bin/env" |
| 47 | + export PATH="$HOME/.local/bin:$PATH" |
| 48 | + source $HOME/.cargo/env 2>/dev/null || true |
| 49 | + export PATH="$HOME/.cargo/bin:$PATH" |
| 50 | +
|
| 51 | + echo "Verify UV installation" |
| 52 | + command -v uv || (echo "UV not found in PATH" && exit 1) |
| 53 | + # Create and activate a local uv virtual environment |
| 54 | + uv venv .venv -p "/usr/bin/python${python_version}" || uv venv .venv -p "python${python_version}" || uv venv .venv |
| 55 | + . .venv/bin/activate |
| 56 | + hash -r |
| 57 | +
|
| 58 | + echo "Show system information" |
| 59 | + whereis nvidia |
| 60 | + nvidia-smi |
| 61 | + python --version |
| 62 | + uv --version |
| 63 | + uv pip list |
| 64 | + set -ex |
| 65 | +
|
| 66 | + # Parse CUDA version from image tag, e.g., "nvidia/cuda:12.6.3-devel-ubuntu22.04" |
| 67 | + IMAGE_TAG="${image##*:}" # "12.6.3-devel-ubuntu22.04" |
| 68 | + CUDA_VERSION="${IMAGE_TAG%%-*}" # "12.6.3" |
| 69 | + echo "Using CUDA version: ${CUDA_VERSION}" |
| 70 | + CUDA_VERSION_M_M="${CUDA_VERSION%.*}" # "12.6" |
| 71 | + CUDA_VERSION_MM="${CUDA_VERSION_M_M//./}" # "126" |
| 72 | + export UV_TORCH_BACKEND=cu${CUDA_VERSION_MM} |
| 73 | +
|
| 74 | + echo "Adjust tests" |
| 75 | + uv pip install -q -r .actions/requirements.txt |
| 76 | + python .actions/assistant.py copy_replace_imports --source_dir="./tests" \ |
| 77 | + --source_import="lightning.fabric,lightning.pytorch" \ |
| 78 | + --target_import="lightning_fabric,pytorch_lightning" |
| 79 | +
|
| 80 | + echo "Install package" |
| 81 | + uv pip install ".[dev]" |
| 82 | +
|
| 83 | + # Env details |
| 84 | + python requirements/collect_env_details.py |
| 85 | + python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'" |
| 86 | +
|
| 87 | + cd tests/ |
| 88 | + echo "Testing: benchmarks" |
| 89 | + export PL_RUNNING_BENCHMARKS=1 |
| 90 | + python -m pytest parity_${PACKAGE_NAME} -v --durations=0 |
| 91 | + export PL_RUNNING_BENCHMARKS=0 |
| 92 | +
|
| 93 | + echo "Testing: fabric standalone tasks" |
| 94 | + export PL_RUN_STANDALONE_TESTS=1 |
| 95 | + if [ "${PACKAGE_NAME}" == "fabric" ]; then |
| 96 | + cd parity_fabric/ |
| 97 | + bash run_standalone_tasks.sh cuda |
| 98 | + cd .. |
| 99 | + fi |
| 100 | + export PL_RUN_STANDALONE_TESTS=0 |
| 101 | +
|
| 102 | + cd .. |
| 103 | + echo "Benchmarks completed successfully" |
0 commit comments