Pass which replaces torch quantized embedding byte with cadence variant #87
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Test ExecuTorch CUDA Build Compatibility | |
# This workflow tests whether ExecuTorch can be successfully built with CUDA support | |
# across different CUDA versions (12.6, 12.8, 12.9) using the command: | |
# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh | |
# | |
# Note: ExecuTorch automatically detects the system CUDA version using nvcc and | |
# installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed. | |
name: Test CUDA Builds | |
on: | |
pull_request: | |
push: | |
branches: | |
- main | |
- release/* | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: false | |
jobs: | |
test-cuda-builds: | |
strategy: | |
fail-fast: false | |
matrix: | |
cuda-version: ["12.6", "12.8", "13.0"] | |
name: test-executorch-cuda-build-${{ matrix.cuda-version }} | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
with: | |
timeout: 90 | |
runner: linux.g5.4xlarge.nvidia.gpu | |
gpu-arch-type: cuda | |
gpu-arch-version: ${{ matrix.cuda-version }} | |
use-custom-docker-registry: false | |
submodules: recursive | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
set -eux | |
# Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version | |
# and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" | |
source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}" | |
# This job will fail if any of the CUDA versions fail | |
check-all-cuda-builds: | |
needs: test-cuda-builds | |
runs-on: ubuntu-latest | |
if: always() | |
steps: | |
- name: Check if all CUDA builds succeeded | |
run: | | |
if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then | |
echo "ERROR: One or more ExecuTorch CUDA builds failed!" | |
echo "CUDA build results: ${{ needs.test-cuda-builds.result }}" | |
exit 1 | |
else | |
echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!" | |
fi | |
test-models-cuda: | |
name: test-models-cuda | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
fail-fast: false | |
matrix: | |
model: [linear, add, add_mul, resnet18] | |
with: | |
timeout: 90 | |
runner: linux.g5.4xlarge.nvidia.gpu | |
gpu-arch-type: cuda | |
gpu-arch-version: 12.6 | |
use-custom-docker-registry: false | |
submodules: recursive | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
set -eux | |
PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh | |
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH | |
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda | |
test-voxtral-cuda-e2e: | |
name: test-voxtral-cuda-e2e | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
secrets: inherit | |
strategy: | |
fail-fast: false | |
with: | |
timeout: 90 | |
secrets-env: EXECUTORCH_HF_TOKEN | |
runner: linux.g5.4xlarge.nvidia.gpu | |
gpu-arch-type: cuda | |
gpu-arch-version: 12.6 | |
use-custom-docker-registry: false | |
submodules: recursive | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
set -eux | |
echo "::group::Setup ExecuTorch" | |
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh | |
echo "::endgroup::" | |
echo "::group::Setup Huggingface" | |
pip install -U "huggingface_hub[cli]" accelerate | |
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) | |
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} | |
pip install mistral-common librosa | |
echo "::endgroup::" | |
echo "::group::Export Voxtral" | |
optimum-cli export executorch \ | |
--model "mistralai/Voxtral-Mini-3B-2507" \ | |
--task "multimodal-text-to-text" \ | |
--recipe "cuda" \ | |
--dtype bfloat16 \ | |
--device cuda \ | |
--max_seq_len 1024 \ | |
--output_dir ./ | |
echo "::endgroup::" | |
echo "::group::Build Voxtral Runner" | |
cmake -DCMAKE_BUILD_TYPE=Release \ | |
-DEXECUTORCH_BUILD_CUDA=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ | |
-DEXECUTORCH_BUILD_TESTS=ON \ | |
-Bcmake-out . | |
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner | |
echo "::endgroup::" | |
echo "::group::Run Voxtral Runner" | |
# Capture output and allow exit code 139 if we have the expected printout | |
set +e | |
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH | |
OUTPUT=$(cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 2>&1) | |
EXIT_CODE=$? | |
set -e | |
echo "$OUTPUT" | |
# Check if the output contains "Run latency (ms):" | |
if echo "$OUTPUT" | grep -q "Run latency (ms):"; then | |
echo "Found expected output: 'Run latency (ms):'" | |
if [ $EXIT_CODE -eq 139 ]; then | |
echo "Exit code 139 (segfault) detected, but passing since we have the expected output" | |
exit 0 | |
elif [ $EXIT_CODE -ne 0 ]; then | |
echo "Unexpected exit code: $EXIT_CODE" | |
exit $EXIT_CODE | |
else | |
echo "Command succeeded with exit code 0" | |
exit 0 | |
fi | |
else | |
echo "Expected output 'Run latency (ms):' not found in output" | |
exit 1 | |
fi | |
echo "::endgroup::" |