@@ -150,6 +150,61 @@ jobs:
150150 ls -al "${RUNNER_ARTIFACT_DIR}"
151151 echo "::endgroup::"
152152
153+ export-gemma3-cuda-artifact :
154+ name : export-gemma3-cuda-artifact
155+ uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
156+ permissions :
157+ id-token : write
158+ contents : read
159+ secrets : inherit
160+ strategy :
161+ fail-fast : false
162+ with :
163+ timeout : 90
164+ secrets-env : EXECUTORCH_HF_TOKEN
165+ runner : linux.g5.4xlarge.nvidia.gpu
166+ gpu-arch-type : cuda
167+ gpu-arch-version : 12.6
168+ use-custom-docker-registry : false
169+ submodules : recursive
170+ upload-artifact : gemma3-cuda-export
171+ ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
172+ script : |
173+ set -eux
174+
175+ echo "::group::Setup ExecuTorch"
176+ ./install_executorch.sh
177+ echo "::endgroup::"
178+
179+ echo "::group::Setup Huggingface"
180+ pip install -U "huggingface_hub[cli]" accelerate
181+ huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
182+ OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
183+ pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
184+ pip list
185+ echo "::endgroup::"
186+
187+ echo "::group::Export Gemma3"
188+ optimum-cli export executorch \
189+ --model "google/gemma-3-4b-it" \
190+ --task "multimodal-text-to-text" \
191+ --recipe "cuda" \
192+ --dtype bfloat16 \
193+ --device cuda \
194+ --max_seq_len 64 \
195+ --output_dir ./
196+
197+ test -f model.pte
198+ test -f aoti_cuda_blob.ptd
199+ echo "::endgroup::"
200+
201+ echo "::group::Store Gemma3 Artifacts"
202+ mkdir -p "${RUNNER_ARTIFACT_DIR}/"
203+ cp model.pte "${RUNNER_ARTIFACT_DIR}/"
204+ cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/"
205+ ls -al "${RUNNER_ARTIFACT_DIR}/"
206+ echo "::endgroup::"
207+
153208 benchmark-voxtral-cuda :
154209 name : benchmark-voxtral-cuda
155210 needs : export-voxtral-cuda-artifact
@@ -190,13 +245,63 @@ jobs:
190245 -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
191246 -DEXECUTORCH_BUILD_TESTS=ON \
192247 -Bcmake-out .
193- cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner
248+ cmake --build cmake-out -j$(( $(nproc) - 1 )) --target multimodal_benchmark
194249 echo "::endgroup::"
195250
196251 echo "::group::Run Voxtral Benchmark"
197252
198253 export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
199- cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd
254+ cmake-out/backends/cuda/multimodal_benchmark voxtral model.pte aoti_cuda_blob.ptd
255+
256+ echo "::endgroup::"
257+
258+ benchmark-gemma3-cuda :
259+ name : benchmark-gemma3-cuda
260+ needs : export-gemma3-cuda-artifact
261+ uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
262+ permissions :
263+ id-token : write
264+ contents : read
265+ strategy :
266+ fail-fast : false
267+ with :
268+ timeout : 90
269+ runner : linux.g5.4xlarge.nvidia.gpu
270+ gpu-arch-type : cuda
271+ gpu-arch-version : 12.6
272+ use-custom-docker-registry : false
273+ submodules : recursive
274+ download-artifact : gemma3-cuda-export
275+ ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
276+ script : |
277+ set -eux
278+
279+ echo "::group::Setup ExecuTorch Requirements"
280+ CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
281+ pip list
282+ echo "::endgroup::"
283+
284+ echo "::group::Prepare Gemma3 Artifacts"
285+ cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
286+ cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
287+ ls -al model.pte aoti_cuda_blob.ptd
288+ echo "::endgroup::"
289+
290+ echo "::group::Build Gemma3 Benchmark"
291+ cmake -DCMAKE_BUILD_TYPE=Release \
292+ -DEXECUTORCH_BUILD_CUDA=ON \
293+ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
294+ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
295+ -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
296+ -DEXECUTORCH_BUILD_TESTS=ON \
297+ -Bcmake-out .
298+ cmake --build cmake-out -j$(( $(nproc) - 1 )) --target multimodal_benchmark
299+ echo "::endgroup::"
300+
301+ echo "::group::Run Gemma3 Benchmark"
302+
303+ export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
304+ cmake-out/backends/cuda/multimodal_benchmark gemma3 model.pte aoti_cuda_blob.ptd
200305
201306 echo "::endgroup::"
202307
0 commit comments