8989
9090export-voxtral-cuda-artifact :
9191    name : export-voxtral-cuda-${{ matrix.quant.name }} 
92+     #  Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
93+     if : github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' 
9294    uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
9395    permissions :
9496      id-token : write 
@@ -126,7 +128,7 @@ jobs:
126128        echo "::endgroup::" 
127129
128130        echo "::group::Setup Huggingface" 
129-         pip install -U "huggingface_hub[cli]" accelerate 
131+         pip install -U "huggingface_hub[cli]<1.0 " accelerate 
130132        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN 
131133        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) 
132134        pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} 
@@ -166,6 +168,8 @@ jobs:
166168
167169export-gemma3-cuda-artifact :
168170    name : export-gemma3-cuda-${{ matrix.quant.name }} 
171+     #  Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
172+     if : github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' 
169173    uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 
170174    permissions :
171175      id-token : write 
@@ -176,12 +180,12 @@ jobs:
176180      matrix :
177181        quant :
178182          - name : " non-quantized" 
179-             artifact : " voxtral -cuda-export" 
183+             artifact : " gemma3 -cuda-export" 
180184            extra_args : " " 
181-           #  TODO: enable gemma3 quantization 
182-           #  - name : "quantized-int4-tile-packed"
183-           #    artifact : "voxtral-cuda-quantized-int4-tile-packed "
184-           #    extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d" 
185+           -  name :  " quantized-int4-tile-packed " 
186+              artifact : " gemma3-cuda- quantized-int4-tile-packed" 
187+              extra_args : " --qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d " 
188+           #  TODO: enable int4-weight-only on gemma3. 
185189          #  - name: "quantized-int4-weight-only"
186190          #    artifact: "voxtral-cuda-quantized-int4-weight-only"
187191          #    # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
@@ -194,7 +198,7 @@ jobs:
194198      gpu-arch-version : 12.6 
195199      use-custom-docker-registry : false 
196200      submodules : recursive 
197-       upload-artifact : gemma3-cuda-export 
201+       upload-artifact : ${{ matrix.quant.artifact }} 
198202      ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 
199203      script : | 
200204        set -eux 
@@ -204,7 +208,7 @@ jobs:
204208        echo "::endgroup::" 
205209
206210        echo "::group::Setup Huggingface" 
207-         pip install -U "huggingface_hub[cli]" accelerate 
211+         pip install -U "huggingface_hub[cli]<1.0 " accelerate 
208212        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN 
209213        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) 
210214        pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} 
@@ -255,7 +259,7 @@ jobs:
255259        set -eux 
256260
257261        echo "::group::Setup ExecuTorch Requirements" 
258-         CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"  ./install_requirements.sh 
262+         ./install_requirements.sh 
259263        pip list 
260264        echo "::endgroup::" 
261265
@@ -305,7 +309,7 @@ jobs:
305309        set -eux 
306310
307311        echo "::group::Setup ExecuTorch Requirements" 
308-         CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"  ./install_requirements.sh 
312+         ./install_requirements.sh 
309313        pip list 
310314        echo "::endgroup::" 
311315
@@ -363,7 +367,7 @@ jobs:
363367        set -eux 
364368
365369        echo "::group::Setup ExecuTorch Requirements" 
366-         CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"  ./install_requirements.sh 
370+         ./install_requirements.sh 
367371        pip list 
368372        echo "::endgroup::" 
369373
@@ -435,9 +439,9 @@ jobs:
435439        format :
436440          - name : " non-quantized" 
437441            artifact : " gemma3-cuda-export" 
438-           #  TODO: enable  quantized gemma3. 
439-           #  - name : "quantized-int4-tile-packed"
440-           #    artifact: "gemma3-cuda-quantized- int4-tile-packed" 
442+           -  name :  " quantized-int4-tile-packed " 
443+              artifact : " gemma3-cuda- quantized-int4-tile-packed" 
444+           #  TODO: enable  int4-weight-only on gemma3. 
441445          #  - name: "quantized-int4-weight-only"
442446          #    artifact: "gemma3-cuda-quantized-int4-weight-only"
443447    with :
0 commit comments