@@ -108,6 +108,7 @@ jobs:
108108          declare -A DEVICE_POOL_ARNS 
109109          DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa" 
110110          DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db" 
111+           DEVICE_POOL_ARNS[google_pixel_8_pro]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a" 
111112
112113          # Resolve device names with their corresponding ARNs 
113114          if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then 
@@ -168,18 +169,20 @@ jobs:
168169    name : export-models 
169170    uses : pytorch/test-infra/.github/workflows/linux_job.yml@main 
170171    needs : set-parameters 
172+     secrets : inherit 
171173    strategy :
172174      matrix :
173175          model : ${{ fromJson(needs.set-parameters.outputs.models) }} 
174176          delegate : ${{ fromJson(needs.set-parameters.outputs.delegates) }} 
175177      fail-fast : false 
176178    with :
177-       runner : linux.4xlarge  
179+       runner : linux.2xlarge.memory  
178180      docker-image : executorch-ubuntu-22.04-qnn-sdk 
179181      submodules : ' true' 
180182      timeout : 60 
181183      upload-artifact : android-models 
182184      upload-artifact-to-s3 : true 
185+       secrets-env : EXECUTORCH_HF_TOKEN 
183186      script : | 
184187        # The generic Linux job chooses to use base env, not the one setup by the image 
185188        echo "::group::Setting up dev environment" 
@@ -190,14 +193,109 @@ jobs:
190193            PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh 
191194        fi 
192195        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" 
196+         # Install requirements for export_llama 
197+         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh 
193198        ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }} 
194199        echo "::endgroup::" 
195200
196201        echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}" 
197202        BUILD_MODE="cmake" 
198-         DTYPE="fp32" 
199203
200-         if [[ ${{ matrix.model }} =~ ^stories* ]]; then 
204+         if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then 
205+             pip install -U "huggingface_hub[cli]" 
206+             huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN 
207+             pip install accelerate sentencepiece 
208+             # HuggingFace model. Assume the pattern is always like "<org>/<repo>" 
209+             HF_MODEL_REPO=${{ matrix.model }} 
210+             OUT_ET_MODEL_NAME="$(echo "$HF_MODEL_REPO" | awk -F'/' '{print $2}' | sed 's/_/-/g' | tr '[:upper:]' '[:lower:]')_${{ matrix.delegate }}" 
211+ 
212+             if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then 
213+                 # Llama models on Hugging Face 
214+                 if [[ "$HF_MODEL_REPO" == *"SpinQuant"* ]]; then 
215+                     # SpinQuant 
216+                     # Download prequantized chceckpoint from Hugging Face 
217+                     DOWNLOADED_PATH=$( 
218+                       bash .ci/scripts/download_hf_hub.sh \ 
219+                         --model_id "${HF_MODEL_REPO}" \ 
220+                         --files "tokenizer.model" "params.json" "consolidated.00.pth" 
221+                     ) 
222+                     # Export using ExecuTorch's model definition 
223+                     python -m examples.models.llama.export_llama \ 
224+                       --model "llama3_2" \ 
225+                       --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ 
226+                       --params "${DOWNLOADED_PATH}/params.json" \ 
227+                       --use_sdpa_with_kv_cache \ 
228+                       -X \ 
229+                       --xnnpack-extended-ops \ 
230+                       --preq_mode 8da4w_output_8da8w \ 
231+                       --preq_group_size 32 \ 
232+                       --max_seq_length 2048 \ 
233+                       --output_name "${OUT_ET_MODEL_NAME}.pte" \ 
234+                       -kv \ 
235+                       -d fp32 \ 
236+                       --preq_embedding_quantize 8,0 \ 
237+                       --use_spin_quant native \ 
238+                       --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' 
239+                     ls -lh "${OUT_ET_MODEL_NAME}.pte" 
240+                 elif [[ "$HF_MODEL_REPO" == *"QLORA"* ]]; then 
241+                     # QAT + LoRA 
242+                     # Download prequantized chceckpoint from Hugging Face 
243+                     DOWNLOADED_PATH=$( 
244+                       bash .ci/scripts/download_hf_hub.sh \ 
245+                         --model_id "${HF_MODEL_REPO}" \ 
246+                         --files "tokenizer.model" "params.json" "consolidated.00.pth" 
247+                     ) 
248+                     # Export using ExecuTorch's model definition 
249+                     python -m examples.models.llama.export_llama \ 
250+                       --model "llama3_2" \ 
251+                       --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ 
252+                       --params "${DOWNLOADED_PATH}/params.json" \ 
253+                       -qat \ 
254+                       -lora 16 \ 
255+                       --preq_mode 8da4w_output_8da8w \ 
256+                       --preq_group_size 32 \ 
257+                       --preq_embedding_quantize 8,0 \ 
258+                       --use_sdpa_with_kv_cache \ 
259+                       -kv \ 
260+                       -X \ 
261+                       --xnnpack-extended-ops \ 
262+                       -d fp32 \ 
263+                       --max_seq_length 2048 \ 
264+                       --output_name "${OUT_ET_MODEL_NAME}.pte" \ 
265+                       --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' 
266+                     ls -lh "${OUT_ET_MODEL_NAME}.pte" 
267+                 else 
268+                     if [[ ${{ matrix.delegate }} == "xnnpack_bf16" ]]; then 
269+                         # Original BF16 version, without any quantization 
270+                         DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth") 
271+                         python -m examples.models.llama.export_llama \ 
272+                           --model "llama3_2" \ 
273+                           --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \ 
274+                           --params "${DOWNLOADED_PATH}/params.json" \ 
275+                           -kv \ 
276+                           --use_sdpa_with_kv_cache \ 
277+                           -X \ 
278+                           -d bf16 \ 
279+                           --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \ 
280+                           --output_name="${OUT_ET_MODEL_NAME}.pte" 
281+                         ls -lh "${OUT_ET_MODEL_NAME}.pte" 
282+                     else 
283+                         # By default, test with the Hugging Face model and the xnnpack recipe 
284+                         DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model") 
285+                         python -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME" 
286+                         ls -lh "${OUT_ET_MODEL_NAME}.pte" 
287+                     fi 
288+                 fi 
289+             else 
290+                 echo "Unsupported model ${{ matrix.model }}" 
291+                 exit 1 
292+             fi 
293+ 
294+             zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${DOWNLOADED_PATH}/tokenizer.model" 
295+             ls -lh model.zip 
296+             mkdir -p "${ARTIFACTS_DIR_NAME}" 
297+             mv model.zip "${ARTIFACTS_DIR_NAME}" 
298+         elif [[ ${{ matrix.model }} =~ ^stories* ]]; then 
201299            # Install requirements for export_llama 
202300            PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh 
203301            # Test llama2 
@@ -209,6 +307,7 @@ jobs:
209307                echo "Unsupported delegate ${{ matrix.delegate }}" 
210308                exit 1 
211309            fi 
310+             DTYPE="fp32" 
212311            PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \ 
213312              -model "${{ matrix.model }}" \ 
214313              -build_tool "${BUILD_MODE}" \ 
0 commit comments