8787 export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
8888 PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
90- test -voxtral-cuda-e2e :
91- name : test -voxtral-cuda-e2e
90+ export -voxtral-cuda-artifact :
91+ name : export -voxtral-cuda-artifact
9292 uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9393 permissions :
9494 id-token : write
@@ -104,6 +104,7 @@ jobs:
104104 gpu-arch-version : 12.6
105105 use-custom-docker-registry : false
106106 submodules : recursive
107+ upload-artifact : voxtral-cuda-export
107108 ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
108109 script : |
109110 set -eux
@@ -118,6 +119,7 @@ jobs:
118119 OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
119120 pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
120121 pip install mistral-common librosa
122+ pip list
121123 echo "::endgroup::"
122124
123125 echo "::group::Export Voxtral"
@@ -129,9 +131,58 @@ jobs:
129131 --device cuda \
130132 --max_seq_len 1024 \
131133 --output_dir ./
134+ python -m executorch.extension.audio.mel_spectrogram \
135+ --feature_size 128 \
136+ --stack_output \
137+ --max_audio_len 300 \
138+ --output_file voxtral_preprocessor.pte
139+
140+ test -f model.pte
141+ test -f aoti_cuda_blob.ptd
142+ test -f voxtral_preprocessor.pte
132143 echo "::endgroup::"
133144
134- echo "::group::Build Voxtral Runner"
145+ echo "::group::Store Voxtral Artifacts"
146+ mkdir -p "${RUNNER_ARTIFACT_DIR}"
147+ cp model.pte "${RUNNER_ARTIFACT_DIR}/"
148+ cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/"
149+ cp voxtral_preprocessor.pte "${RUNNER_ARTIFACT_DIR}/"
150+ ls -al "${RUNNER_ARTIFACT_DIR}"
151+ echo "::endgroup::"
152+
153+ benchmark-voxtral-cuda :
154+ name : benchmark-voxtral-cuda
155+ needs : export-voxtral-cuda-artifact
156+ uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
157+ permissions :
158+ id-token : write
159+ contents : read
160+ strategy :
161+ fail-fast : false
162+ with :
163+ timeout : 90
164+ runner : linux.g5.4xlarge.nvidia.gpu
165+ gpu-arch-type : cuda
166+ gpu-arch-version : 12.6
167+ use-custom-docker-registry : false
168+ submodules : recursive
169+ download-artifact : voxtral-cuda-export
170+ ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
171+ script : |
172+ set -eux
173+
174+ echo "::group::Setup ExecuTorch Requirements"
175+ CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
176+ pip list
177+ echo "::endgroup::"
178+
179+ echo "::group::Prepare Voxtral Artifacts"
180+ cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
181+ cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
182+ ls -al model.pte aoti_cuda_blob.ptd
183+ echo "::endgroup::"
184+
185+ echo "::group::Build Voxtral Benchmark"
135186 cmake -DCMAKE_BUILD_TYPE=Release \
136187 -DEXECUTORCH_BUILD_CUDA=ON \
137188 -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
@@ -142,31 +193,90 @@ jobs:
142193 cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner
143194 echo "::endgroup::"
144195
196+ echo "::group::Run Voxtral Benchmark"
197+
198+ export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
199+ cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd
200+
201+ echo "::endgroup::"
202+
203+ test-voxtral-cuda-e2e :
204+ name : test-voxtral-cuda-e2e
205+ needs : export-voxtral-cuda-artifact
206+ uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
207+ permissions :
208+ id-token : write
209+ contents : read
210+ strategy :
211+ fail-fast : false
212+ with :
213+ timeout : 90
214+ runner : linux.g5.4xlarge.nvidia.gpu
215+ gpu-arch-type : cuda
216+ gpu-arch-version : 12.6
217+ use-custom-docker-registry : false
218+ submodules : recursive
219+ download-artifact : voxtral-cuda-export
220+ ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
221+ script : |
222+ set -eux
223+
224+ echo "::group::Setup ExecuTorch Requirements"
225+ CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
226+ pip list
227+ echo "::endgroup::"
228+
229+ echo "::group::Prepare Voxtral Artifacts"
230+ cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
231+ cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
232+ cp "${RUNNER_ARTIFACT_DIR}/voxtral_preprocessor.pte" .
233+ TOKENIZER_URL="https://huggingface.co/mistralai/Voxtral-Mini-3B-2507/resolve/main/tekken.json"
234+ curl -L $TOKENIZER_URL -o tekken.json
235+ ls -al model.pte aoti_cuda_blob.ptd voxtral_preprocessor.pte tekken.json
236+ echo "::endgroup::"
237+
238+ echo "::group::Download Test Audio File"
239+ AUDIO_URL="https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
240+ curl -L $AUDIO_URL -o poem.wav
241+ echo "::endgroup::"
242+
243+ echo "::group::Build Voxtral Runner"
244+ cmake --preset llm \
245+ -DEXECUTORCH_BUILD_CUDA=ON \
246+ -DCMAKE_INSTALL_PREFIX=cmake-out \
247+ -DCMAKE_BUILD_TYPE=Release \
248+ -Bcmake-out -S.
249+ cmake --build cmake-out -j$(( $(nproc) - 1 )) --target install --config Release
250+
251+ cmake -DEXECUTORCH_BUILD_CUDA=ON \
252+ -DCMAKE_BUILD_TYPE=Release \
253+ -Sexamples/models/voxtral \
254+ -Bcmake-out/examples/models/voxtral/
255+ cmake --build cmake-out/examples/models/voxtral --target voxtral_runner --config Release
256+ echo "::endgroup::"
257+
145258 echo "::group::Run Voxtral Runner"
146- # Capture output and allow exit code 139 if we have the expected printout
147259 set +e
148260 export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
149- OUTPUT=$(cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 2>&1)
261+ OUTPUT=$(cmake-out/examples/models/voxtral/voxtral_runner \
262+ --model_path model.pte \
263+ --data_path aoti_cuda_blob.ptd \
264+ --tokenizer_path tekken.json \
265+ --audio_path poem.wav \
266+ --processor_path voxtral_preprocessor.pte \
267+ --temperature 0 2>&1)
150268 EXIT_CODE=$?
151269 set -e
152270
153271 echo "$OUTPUT"
154272
155- # Check if the output contains "Run latency (ms):"
156- if echo "$OUTPUT" | grep -q "Run latency (ms):"; then
157- echo "Found expected output: 'Run latency (ms):'"
158- if [ $EXIT_CODE -eq 139 ]; then
159- echo "Exit code 139 (segfault) detected, but passing since we have the expected output"
160- exit 0
161- elif [ $EXIT_CODE -ne 0 ]; then
162- echo "Unexpected exit code: $EXIT_CODE"
163- exit $EXIT_CODE
164- else
165- echo "Command succeeded with exit code 0"
166- exit 0
167- fi
168- else
169- echo "Expected output 'Run latency (ms):' not found in output"
273+ if ! echo "$OUTPUT" | grep -iq "poem"; then
274+ echo "Expected output 'poem' not found in output"
170275 exit 1
171276 fi
277+
278+ if [ $EXIT_CODE -ne 0 ]; then
279+ echo "Unexpected exit code: $EXIT_CODE"
280+ exit $EXIT_CODE
281+ fi
172282 echo "::endgroup::"
0 commit comments