8989
9090 export-voxtral-cuda-artifact :
9191 name : export-voxtral-cuda-${{ matrix.quant.name }}
92+ # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
93+ if : github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
9294 uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9395 permissions :
9496 id-token : write
@@ -126,7 +128,7 @@ jobs:
126128 echo "::endgroup::"
127129
128130 echo "::group::Setup Huggingface"
129- pip install -U "huggingface_hub[cli]" accelerate
131+ pip install -U "huggingface_hub[cli]<1.0 " accelerate
130132 huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
131133 OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
132134 pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -166,6 +168,8 @@ jobs:
166168
167169 export-gemma3-cuda-artifact :
168170 name : export-gemma3-cuda-${{ matrix.quant.name }}
171+ # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
172+ if : github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
169173 uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
170174 permissions :
171175 id-token : write
@@ -176,12 +180,12 @@ jobs:
176180 matrix :
177181 quant :
178182 - name : " non-quantized"
179- artifact : " voxtral -cuda-export"
183+ artifact : " gemma3 -cuda-export"
180184 extra_args : " "
181- # TODO: enable gemma3 quantization
182- # - name : "quantized-int4-tile-packed"
183- # artifact : "voxtral-cuda-quantized-int4-tile-packed "
184- # extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
185+ - name : " quantized-int4-tile-packed "
186+ artifact : " gemma3-cuda- quantized-int4-tile-packed"
187+ extra_args : " --qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d "
188+ # TODO: enable int4-weight-only on gemma3.
185189 # - name: "quantized-int4-weight-only"
186190 # artifact: "voxtral-cuda-quantized-int4-weight-only"
187191 # # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
@@ -194,7 +198,7 @@ jobs:
194198 gpu-arch-version : 12.6
195199 use-custom-docker-registry : false
196200 submodules : recursive
197- upload-artifact : gemma3-cuda-export
201+ upload-artifact : ${{ matrix.quant.artifact }}
198202 ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
199203 script : |
200204 set -eux
@@ -204,7 +208,7 @@ jobs:
204208 echo "::endgroup::"
205209
206210 echo "::group::Setup Huggingface"
207- pip install -U "huggingface_hub[cli]" accelerate
211+ pip install -U "huggingface_hub[cli]<1.0 " accelerate
208212 huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
209213 OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
210214 pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -255,7 +259,7 @@ jobs:
255259 set -eux
256260
257261 echo "::group::Setup ExecuTorch Requirements"
258- CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
262+ ./install_requirements.sh
259263 pip list
260264 echo "::endgroup::"
261265
@@ -305,7 +309,7 @@ jobs:
305309 set -eux
306310
307311 echo "::group::Setup ExecuTorch Requirements"
308- CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
312+ ./install_requirements.sh
309313 pip list
310314 echo "::endgroup::"
311315
@@ -363,7 +367,7 @@ jobs:
363367 set -eux
364368
365369 echo "::group::Setup ExecuTorch Requirements"
366- CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
370+ ./install_requirements.sh
367371 pip list
368372 echo "::endgroup::"
369373
@@ -435,9 +439,9 @@ jobs:
435439 format :
436440 - name : " non-quantized"
437441 artifact : " gemma3-cuda-export"
438- # TODO: enable quantized gemma3.
439- # - name : "quantized-int4-tile-packed"
440- # artifact: "gemma3-cuda-quantized- int4-tile-packed"
442+ - name : " quantized-int4-tile-packed "
443+ artifact : " gemma3-cuda- quantized-int4-tile-packed"
444+ # TODO: enable int4-weight-only on gemma3.
441445 # - name: "quantized-int4-weight-only"
442446 # artifact: "gemma3-cuda-quantized-int4-weight-only"
443447 with :
0 commit comments