8787        export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH 
8888        PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda 
8989
90-    test -voxtral-cuda-e2e  :
91-     name : test -voxtral-cuda-e2e 
90+    export -voxtral-cuda-artifact  :
91+     name : export -voxtral-cuda-artifact 
9292    uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
9393    permissions :
9494      id-token : write 
@@ -104,6 +104,7 @@ jobs:
104104      gpu-arch-version : 12.6 
105105      use-custom-docker-registry : false 
106106      submodules : recursive 
107+       upload-artifact : voxtral-cuda-export 
107108      ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
108109      script : | 
109110        set -eux 
@@ -118,6 +119,7 @@ jobs:
118119        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) 
119120        pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} 
120121        pip install mistral-common librosa 
122+         pip list 
121123        echo "::endgroup::" 
122124
123125        echo "::group::Export Voxtral" 
@@ -129,9 +131,58 @@ jobs:
129131            --device cuda \ 
130132            --max_seq_len 1024 \ 
131133            --output_dir ./ 
134+         python -m executorch.extension.audio.mel_spectrogram \ 
135+             --feature_size 128 \ 
136+             --stack_output \ 
137+             --max_audio_len 300 \ 
138+             --output_file voxtral_preprocessor.pte 
139+ 
140+         test -f model.pte 
141+         test -f aoti_cuda_blob.ptd 
142+         test -f voxtral_preprocessor.pte 
132143        echo "::endgroup::" 
133144
134-         echo "::group::Build Voxtral Runner" 
145+         echo "::group::Store Voxtral Artifacts" 
146+         mkdir -p "${RUNNER_ARTIFACT_DIR}" 
147+         cp model.pte "${RUNNER_ARTIFACT_DIR}/" 
148+         cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/" 
149+         cp voxtral_preprocessor.pte "${RUNNER_ARTIFACT_DIR}/" 
150+         ls -al "${RUNNER_ARTIFACT_DIR}" 
151+         echo "::endgroup::" 
152+ 
153+    benchmark-voxtral-cuda :
154+     name : benchmark-voxtral-cuda 
155+     needs : export-voxtral-cuda-artifact 
156+     uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
157+     permissions :
158+       id-token : write 
159+       contents : read 
160+     strategy :
161+       fail-fast : false 
162+     with :
163+       timeout : 90 
164+       runner : linux.g5.4xlarge.nvidia.gpu 
165+       gpu-arch-type : cuda 
166+       gpu-arch-version : 12.6 
167+       use-custom-docker-registry : false 
168+       submodules : recursive 
169+       download-artifact : voxtral-cuda-export 
170+       ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
171+       script : | 
172+         set -eux 
173+ 
174+         echo "::group::Setup ExecuTorch Requirements" 
175+         CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh 
176+         pip list 
177+         echo "::endgroup::" 
178+ 
179+         echo "::group::Prepare Voxtral Artifacts" 
180+         cp "${RUNNER_ARTIFACT_DIR}/model.pte" . 
181+         cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" . 
182+         ls -al model.pte aoti_cuda_blob.ptd 
183+         echo "::endgroup::" 
184+ 
185+         echo "::group::Build Voxtral Benchmark" 
135186        cmake -DCMAKE_BUILD_TYPE=Release \ 
136187              -DEXECUTORCH_BUILD_CUDA=ON \ 
137188              -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ 
@@ -142,31 +193,90 @@ jobs:
142193        cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner 
143194        echo "::endgroup::" 
144195
196+         echo "::group::Run Voxtral Benchmark" 
197+ 
198+         export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH 
199+         cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 
200+ 
201+         echo "::endgroup::" 
202+ 
203+    test-voxtral-cuda-e2e :
204+     name : test-voxtral-cuda-e2e 
205+     needs : export-voxtral-cuda-artifact 
206+     uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
207+     permissions :
208+       id-token : write 
209+       contents : read 
210+     strategy :
211+       fail-fast : false 
212+     with :
213+       timeout : 90 
214+       runner : linux.g5.4xlarge.nvidia.gpu 
215+       gpu-arch-type : cuda 
216+       gpu-arch-version : 12.6 
217+       use-custom-docker-registry : false 
218+       submodules : recursive 
219+       download-artifact : voxtral-cuda-export 
220+       ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
221+       script : | 
222+         set -eux 
223+ 
224+         echo "::group::Setup ExecuTorch Requirements" 
225+         CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh 
226+         pip list 
227+         echo "::endgroup::" 
228+ 
229+         echo "::group::Prepare Voxtral Artifacts" 
230+         cp "${RUNNER_ARTIFACT_DIR}/model.pte" . 
231+         cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" . 
232+         cp "${RUNNER_ARTIFACT_DIR}/voxtral_preprocessor.pte" . 
233+         TOKENIZER_URL="https://huggingface.co/mistralai/Voxtral-Mini-3B-2507/resolve/main/tekken.json" 
234+         curl -L $TOKENIZER_URL -o tekken.json 
235+         ls -al model.pte aoti_cuda_blob.ptd voxtral_preprocessor.pte tekken.json 
236+         echo "::endgroup::" 
237+ 
238+         echo "::group::Download Test Audio File" 
239+         AUDIO_URL="https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav" 
240+         curl -L $AUDIO_URL -o poem.wav 
241+         echo "::endgroup::" 
242+ 
243+         echo "::group::Build Voxtral Runner" 
244+         cmake --preset llm \ 
245+               -DEXECUTORCH_BUILD_CUDA=ON \ 
246+               -DCMAKE_INSTALL_PREFIX=cmake-out \ 
247+               -DCMAKE_BUILD_TYPE=Release \ 
248+               -Bcmake-out -S. 
249+         cmake --build cmake-out -j$(( $(nproc) - 1 )) --target install --config Release 
250+ 
251+         cmake -DEXECUTORCH_BUILD_CUDA=ON \ 
252+               -DCMAKE_BUILD_TYPE=Release \ 
253+               -Sexamples/models/voxtral \ 
254+               -Bcmake-out/examples/models/voxtral/ 
255+         cmake --build cmake-out/examples/models/voxtral --target voxtral_runner --config Release 
256+         echo "::endgroup::" 
257+ 
145258        echo "::group::Run Voxtral Runner" 
146-         # Capture output and allow exit code 139 if we have the expected printout 
147259        set +e 
148260        export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH 
149-         OUTPUT=$(cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 2>&1) 
261+         OUTPUT=$(cmake-out/examples/models/voxtral/voxtral_runner \ 
262+               --model_path model.pte \ 
263+               --data_path aoti_cuda_blob.ptd \ 
264+               --tokenizer_path tekken.json \ 
265+               --audio_path poem.wav \ 
266+               --processor_path voxtral_preprocessor.pte \ 
267+               --temperature 0 2>&1) 
150268        EXIT_CODE=$? 
151269        set -e 
152270
153271        echo "$OUTPUT" 
154272
155-         # Check if the output contains "Run latency (ms):" 
156-         if echo "$OUTPUT" | grep -q "Run latency (ms):"; then 
157-           echo "Found expected output: 'Run latency (ms):'" 
158-           if [ $EXIT_CODE -eq 139 ]; then 
159-             echo "Exit code 139 (segfault) detected, but passing since we have the expected output" 
160-             exit 0 
161-           elif [ $EXIT_CODE -ne 0 ]; then 
162-             echo "Unexpected exit code: $EXIT_CODE" 
163-             exit $EXIT_CODE 
164-           else 
165-             echo "Command succeeded with exit code 0" 
166-             exit 0 
167-           fi 
168-         else 
169-           echo "Expected output 'Run latency (ms):' not found in output" 
273+         if ! echo "$OUTPUT" | grep -iq "poem"; then 
274+           echo "Expected output 'poem' not found in output" 
170275          exit 1 
171276        fi 
277+ 
278+         if [ $EXIT_CODE -ne 0 ]; then 
279+           echo "Unexpected exit code: $EXIT_CODE" 
280+           exit $EXIT_CODE 
281+         fi 
172282        echo "::endgroup::" 
0 commit comments