From 93d300e6604ae7344b75957598e091089a269864 Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Thu, 4 Sep 2025 20:08:11 -0700 Subject: [PATCH 01/16] Support pytorch_bin format --- examples/models/phi_4_mini/convert_weights.py | 10 +++++++--- examples/models/qwen3/convert_weights.py | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py index 01b7302ed2e..1fd840c75e3 100644 --- a/examples/models/phi_4_mini/convert_weights.py +++ b/examples/models/phi_4_mini/convert_weights.py @@ -95,9 +95,13 @@ def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.T converted_state_dict = {} inverted_mapping_dict = {v: k for k, v in _PHI_4_FROM_META.items()} - for key, value in state_dict.items(): - new_key = get_mapped_key(key, inverted_mapping_dict) - converted_state_dict[new_key] = value + # Single checkpoint + model_path = os.path.join(input_dir, "pytorch_model.bin") + if os.path.exists(model_path): + state_dict = torch.load( + model_path, weights_only=True, map_location=torch.device("cpu") + ) + return state_dict # Input and output embeddings are tied. converted_state_dict["output.weight"] = converted_state_dict[ diff --git a/examples/models/qwen3/convert_weights.py b/examples/models/qwen3/convert_weights.py index 404fd4cbe88..e905d435ac0 100644 --- a/examples/models/qwen3/convert_weights.py +++ b/examples/models/qwen3/convert_weights.py @@ -89,6 +89,8 @@ def load_checkpoint_from_safetensors(input_dir: str) -> Dict: raise FileNotFoundError(f"Could not find safetensors checkpoint in {input_dir}") + raise FileNotFoundError(f"Could not find pytorch_model checkpoint in {input_dir}") + def load_checkpoint(input_dir: str) -> Dict: try: From 7e2fbd53e315a1ea18d55237c4c2cabc0a1e1b27 Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Thu, 4 Sep 2025 20:10:53 -0700 Subject: [PATCH 02/16] up --- examples/models/phi_4_mini/convert_weights.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py index 1fd840c75e3..51520a9bba5 100644 --- a/examples/models/phi_4_mini/convert_weights.py +++ b/examples/models/phi_4_mini/convert_weights.py @@ -66,6 +66,29 @@ def phi_4_hf_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Ten return converted_state_dict +def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: + """ + Convert a state dict from torchtune's format to Meta's format. This function + doesn't handle any sharding or splitting of state dicts. It follows the + state_dict IN -> state_dict OUT pattern. + Args: + state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format. + Returns: + Dict[str, torch.Tensor]: State dict in Meta's format. + """ + converted_state_dict = {} + inverted_mapping_dict = {v: k for k, v in _PHI_4_FROM_META.items()} + + for key, value in state_dict.items(): + new_key = get_mapped_key(key, inverted_mapping_dict) + converted_state_dict[new_key] = value + + # Input and output embeddings are tied. + converted_state_dict["output.weight"] = converted_state_dict[ + "tok_embeddings.weight" + ] + + # Standard _FROM_META weight mapping of Meta weights to TorchTune. _PHI_4_FROM_META = { "tok_embeddings.weight": "tok_embeddings.weight", From ab15cf7a311304db1b3f27351d10e98c8a55dbd6 Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Thu, 4 Sep 2025 20:12:47 -0700 Subject: [PATCH 03/16] up --- examples/models/phi_4_mini/convert_weights.py | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py index 51520a9bba5..1bae99ec03a 100644 --- a/examples/models/phi_4_mini/convert_weights.py +++ b/examples/models/phi_4_mini/convert_weights.py @@ -66,6 +66,22 @@ def phi_4_hf_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Ten return converted_state_dict +# Standard _FROM_META weight mapping of Meta weights to TorchTune. +_PHI_4_FROM_META = { + "tok_embeddings.weight": "tok_embeddings.weight", + "norm.weight": "norm.scale", + "layers.{}.attention.wk.weight": "layers.{}.attn.k_proj.weight", + "layers.{}.attention.wq.weight": "layers.{}.attn.q_proj.weight", + "layers.{}.attention.wv.weight": "layers.{}.attn.v_proj.weight", + "layers.{}.attention.wo.weight": "layers.{}.attn.output_proj.weight", + "layers.{}.attention_norm.weight": "layers.{}.sa_norm.scale", + "layers.{}.ffn_norm.weight": "layers.{}.mlp_norm.scale", + "layers.{}.feed_forward.w1.weight": "layers.{}.mlp.w1.weight", + "layers.{}.feed_forward.w2.weight": "layers.{}.mlp.w2.weight", + "layers.{}.feed_forward.w3.weight": "layers.{}.mlp.w3.weight", +} + + def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ Convert a state dict from torchtune's format to Meta's format. This function @@ -87,22 +103,7 @@ def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.T converted_state_dict["output.weight"] = converted_state_dict[ "tok_embeddings.weight" ] - - -# Standard _FROM_META weight mapping of Meta weights to TorchTune. -_PHI_4_FROM_META = { - "tok_embeddings.weight": "tok_embeddings.weight", - "norm.weight": "norm.scale", - "layers.{}.attention.wk.weight": "layers.{}.attn.k_proj.weight", - "layers.{}.attention.wq.weight": "layers.{}.attn.q_proj.weight", - "layers.{}.attention.wv.weight": "layers.{}.attn.v_proj.weight", - "layers.{}.attention.wo.weight": "layers.{}.attn.output_proj.weight", - "layers.{}.attention_norm.weight": "layers.{}.sa_norm.scale", - "layers.{}.ffn_norm.weight": "layers.{}.mlp_norm.scale", - "layers.{}.feed_forward.w1.weight": "layers.{}.mlp.w1.weight", - "layers.{}.feed_forward.w2.weight": "layers.{}.mlp.w2.weight", - "layers.{}.feed_forward.w3.weight": "layers.{}.mlp.w3.weight", -} + return converted_state_dict def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: From 024b0da461f657d6abe596891b754adf0d48fc17 Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Sat, 6 Sep 2025 00:21:11 -0700 Subject: [PATCH 04/16] up --- examples/models/checkpoint.py | 2 ++ examples/models/phi_4_mini/convert_weights.py | 2 +- examples/models/qwen3/convert_weights.py | 4 +--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/models/checkpoint.py b/examples/models/checkpoint.py index 4583b105732..53a3b85d385 100644 --- a/examples/models/checkpoint.py +++ b/examples/models/checkpoint.py @@ -11,6 +11,8 @@ import os from pathlib import Path from typing import Any, Dict, Optional +import os +import json import torch diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py index 1bae99ec03a..c7bf515494c 100644 --- a/examples/models/phi_4_mini/convert_weights.py +++ b/examples/models/phi_4_mini/convert_weights.py @@ -2,9 +2,9 @@ from typing import Dict import torch -from executorch.examples.models.checkpoint import load_checkpoint_from_pytorch_model from torchtune.models.convert_weights import get_mapped_key +from executorch.examples.models.checkpoint import load_checkpoint_from_pytorch_model from torchtune.training import FullModelHFCheckpointer diff --git a/examples/models/qwen3/convert_weights.py b/examples/models/qwen3/convert_weights.py index e905d435ac0..70d65ab494f 100644 --- a/examples/models/qwen3/convert_weights.py +++ b/examples/models/qwen3/convert_weights.py @@ -9,6 +9,7 @@ from safetensors.torch import load_file from torchtune.models.convert_weights import get_mapped_key +from executorch.examples.models.checkpoint import load_checkpoint_from_pytorch_model # Standard _FROM_META weight mapping of Meta weights to TorchTune + additional bias weight mappings. _QWEN_3_FROM_META = { @@ -89,9 +90,6 @@ def load_checkpoint_from_safetensors(input_dir: str) -> Dict: raise FileNotFoundError(f"Could not find safetensors checkpoint in {input_dir}") - raise FileNotFoundError(f"Could not find pytorch_model checkpoint in {input_dir}") - - def load_checkpoint(input_dir: str) -> Dict: try: print("Loading checkpoint from pytorch_model directory") From 57dbf96d2a2e51983c1a64e5642a8698e85a883b Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Sun, 7 Sep 2025 16:49:40 -0700 Subject: [PATCH 05/16] lint --- examples/models/checkpoint.py | 2 -- examples/models/phi_4_mini/convert_weights.py | 2 +- examples/models/qwen3/convert_weights.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/examples/models/checkpoint.py b/examples/models/checkpoint.py index 53a3b85d385..4583b105732 100644 --- a/examples/models/checkpoint.py +++ b/examples/models/checkpoint.py @@ -11,8 +11,6 @@ import os from pathlib import Path from typing import Any, Dict, Optional -import os -import json import torch diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py index c7bf515494c..1bae99ec03a 100644 --- a/examples/models/phi_4_mini/convert_weights.py +++ b/examples/models/phi_4_mini/convert_weights.py @@ -2,9 +2,9 @@ from typing import Dict import torch +from executorch.examples.models.checkpoint import load_checkpoint_from_pytorch_model from torchtune.models.convert_weights import get_mapped_key -from executorch.examples.models.checkpoint import load_checkpoint_from_pytorch_model from torchtune.training import FullModelHFCheckpointer diff --git a/examples/models/qwen3/convert_weights.py b/examples/models/qwen3/convert_weights.py index 70d65ab494f..4d492e90fc4 100644 --- a/examples/models/qwen3/convert_weights.py +++ b/examples/models/qwen3/convert_weights.py @@ -9,7 +9,6 @@ from safetensors.torch import load_file from torchtune.models.convert_weights import get_mapped_key -from executorch.examples.models.checkpoint import load_checkpoint_from_pytorch_model # Standard _FROM_META weight mapping of Meta weights to TorchTune + additional bias weight mappings. _QWEN_3_FROM_META = { From 51f41cb1b3d0b54359b78ff83f725c398c49adb6 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Sun, 7 Sep 2025 17:08:26 -0700 Subject: [PATCH 06/16] Add quantized checkpoint tests --- .../test_torchao_huggingface_checkpoints.sh | 109 ++++++++++++++++++ .github/workflows/trunk.yml | 96 ++++++++++++++- 2 files changed, 202 insertions(+), 3 deletions(-) create mode 100644 .ci/scripts/test_torchao_huggingface_checkpoints.sh diff --git a/.ci/scripts/test_torchao_huggingface_checkpoints.sh b/.ci/scripts/test_torchao_huggingface_checkpoints.sh new file mode 100644 index 00000000000..12c182d8620 --- /dev/null +++ b/.ci/scripts/test_torchao_huggingface_checkpoints.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +set -euo pipefail + +MODEL_NAME=${1:-} + +if [[ -z "$MODEL_NAME" ]]; then + echo "Usage: $0 " + echo "Supported model_name values: qwen3_4b, phi_4_mini" + exit 1 +fi + +if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then + PYTHON_EXECUTABLE=python3 +fi + +MODEL_OUT=model.pte + +case "$MODEL_NAME" in + qwen3_4b) + echo "Running Qwen3-4B export..." + HF_MODEL_DIR=$(hf download metascroy/Qwen3-4B-INT8-INT4) + EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB + $PYTHON_EXECUTABLE -m executorch.examples.models.qwen3.convert_weights \ + $HF_MODEL_DIR \ + pytorch_model_converted.bin + + $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \ + --model "qwen3_4b" \ + --checkpoint pytorch_model_converted.bin \ + --params examples/models/qwen3/config/4b_config.json \ + --output_name $MODEL_OUT \ + -kv \ + --use_sdpa_with_kv_cache \ + -X \ + --xnnpack-extended-ops \ + --max_context_length 1024 \ + --max_seq_length 1024 \ + --dtype fp32 \ + --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' + ;; + + phi_4_mini) + echo "Running Phi-4-mini export..." + HF_MODEL_DIR=$(hf download metascroy/Phi-4-mini-instruct-INT8-INT4) + EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB + $PYTHON_EXECUTABLE -m executorch.examples.models.phi_4_mini.convert_weights \ + $HF_MODEL_DIR \ + pytorch_model_converted.bin + + # $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \ + # --model "phi_4_mini" \ + # --checkpoint pytorch_model_converted.bin \ + # --params examples/models/phi_4_mini/config/config.json \ + # --output_name $MODEL_OUT \ + # -kv \ + # --use_sdpa_with_kv_cache \ + # -X \ + # --xnnpack-extended-ops \ + # --max_context_length 1024 \ + # --max_seq_length 1024 \ + # --dtype fp32 \ + # --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' + ;; + + *) + echo "Error: unsupported model_name '$MODEL_NAME'" + echo "Supported values: qwen3_4b, phi_4_mini" + exit 1 + ;; +esac + +# Check file size +MODEL_SIZE=$(stat --printf="%s" $MODEL_OUT 2>/dev/null || stat -f%z $MODEL_OUT) +if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then + echo "Error: model size $MODEL_SIZE is greater than expected upper bound $EXPECTED_MODEL_SIZE_UPPER_BOUND" + exit 1 +fi + +# Install ET with CMake +cmake -DPYTHON_EXECUTABLE=python \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DEXECUTORCH_ENABLE_LOGGING=1 \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ + -DEXECUTORCH_BUILD_KERNELS_LLM=ON \ + -Bcmake-out . +cmake --build cmake-out -j16 --config Release --target install + +# Install llama runner +cmake -DPYTHON_EXECUTABLE=python \ + -DCMAKE_BUILD_TYPE=Release \ + -Bcmake-out/examples/models/llama \ + examples/models/llama +cmake --build cmake-out/examples/models/llama -j16 --config Release + +# Run the model +./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time," + +# Clean up +rm pytorch_model_converted.bin +rm $MODEL_OUT diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 251bb238f1b..a869435839b 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -550,6 +550,65 @@ jobs: # Test llama2 PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" + test-llama-runner-linux: + # Test Both linux x86 and linux aarch64 + name: test-llama-runner-linux + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + strategy: + matrix: + dtype: [fp32] + mode: [portable, xnnpack+custom] + runner: [linux.2xlarge, linux.arm64.2xlarge] + docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64] + include: + - dtype: bf16 + mode: portable + runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-clang12 + - dtype: bf16 + mode: portable + runner: linux.arm64.2xlarge + docker-image: executorch-ubuntu-22.04-gcc11-aarch64 + - dtype: bf16 + mode: custom + runner: linux.arm64.2xlarge + docker-image: executorch-ubuntu-22.04-gcc11-aarch64 + # Excluding specific runner + docker image combinations that don't make sense: + # - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge) + # - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge) + exclude: + - runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-gcc11-aarch64 + - runner: linux.arm64.2xlarge + docker-image: executorch-ubuntu-22.04-clang12 + fail-fast: false + with: + runner: ${{ matrix.runner }} + docker-image: ci-image:${{ matrix.docker-image }} + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 900 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + DTYPE=${{ matrix.dtype }} + BUILD_TOOL="cmake" + MODE=${{ matrix.mode }} + ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}" + ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" + + # Setup executorch + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" + # Install requirements for export_llama + PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh + # Test llama2 + PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" + test-llama-runner-macos: name: test-llama-runner-mac uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -585,6 +644,37 @@ jobs: # Test llama2 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" + test-torchao_huggingface_checkpoints: + name: test-llama-runner-linux + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + strategy: + matrix: + mode: [xnnpack+custom] + runner: [linux.2xlarge, linux.arm64.2xlarge] + docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64] + exclude: + - runner: linux.2xlarge + docker-image: executorch-ubuntu-22.04-gcc11-aarch64 + - runner: linux.arm64.2xlarge + docker-image: executorch-ubuntu-22.04-clang12 + fail-fast: false + with: + runner: ${{ matrix.runner }} + docker-image: ci-image:${{ matrix.docker-image }} + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 900 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + sh .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini + sh .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b + # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. # test-llava-runner-macos: # name: test-llava-runner-macos @@ -993,13 +1083,13 @@ jobs: timeout: 60 script: | conda init powershell - + powershell -Command "& { Set-PSDebug -Trace 1 \$ErrorActionPreference = 'Stop' \$PSNativeCommandUseErrorActionPreference = \$true - .ci/scripts/setup-windows.ps1 + .ci/scripts/setup-windows.ps1 powershell .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }} - }" \ No newline at end of file + }" From 3af11d6074de7b0addefbce242836bdbc9a57e40 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Sun, 7 Sep 2025 17:11:42 -0700 Subject: [PATCH 07/16] up --- .github/workflows/trunk.yml | 65 ++----------------------------------- 1 file changed, 3 insertions(+), 62 deletions(-) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index a869435839b..9f2757bb21a 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -550,65 +550,6 @@ jobs: # Test llama2 PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" - test-llama-runner-linux: - # Test Both linux x86 and linux aarch64 - name: test-llama-runner-linux - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - dtype: [fp32] - mode: [portable, xnnpack+custom] - runner: [linux.2xlarge, linux.arm64.2xlarge] - docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64] - include: - - dtype: bf16 - mode: portable - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - - dtype: bf16 - mode: portable - runner: linux.arm64.2xlarge - docker-image: executorch-ubuntu-22.04-gcc11-aarch64 - - dtype: bf16 - mode: custom - runner: linux.arm64.2xlarge - docker-image: executorch-ubuntu-22.04-gcc11-aarch64 - # Excluding specific runner + docker image combinations that don't make sense: - # - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge) - # - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge) - exclude: - - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-gcc11-aarch64 - - runner: linux.arm64.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - fail-fast: false - with: - runner: ${{ matrix.runner }} - docker-image: ci-image:${{ matrix.docker-image }} - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - DTYPE=${{ matrix.dtype }} - BUILD_TOOL="cmake" - MODE=${{ matrix.mode }} - ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}" - ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" - - # Setup executorch - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" - # Install requirements for export_llama - PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh - # Test llama2 - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" - test-llama-runner-macos: name: test-llama-runner-mac uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -644,8 +585,8 @@ jobs: # Test llama2 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" - test-torchao_huggingface_checkpoints: - name: test-llama-runner-linux + test-torchao-huggingface-checkpoints: + name: test-torchao-huggingface-checkpoints uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: id-token: write @@ -672,8 +613,8 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - sh .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini sh .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b + sh .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. # test-llava-runner-macos: From ba6fb4b184221cf9a1d602c6e74fe9150aae4ca4 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 8 Sep 2025 10:01:44 -0700 Subject: [PATCH 08/16] up --- examples/models/phi_4_mini/convert_weights.py | 28 ------------------- examples/models/qwen3/convert_weights.py | 1 + 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py index 1bae99ec03a..01b7302ed2e 100644 --- a/examples/models/phi_4_mini/convert_weights.py +++ b/examples/models/phi_4_mini/convert_weights.py @@ -106,34 +106,6 @@ def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.T return converted_state_dict -def phi_4_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: - """ - Convert a state dict from torchtune's format to Meta's format. This function - doesn't handle any sharding or splitting of state dicts. It follows the - state_dict IN -> state_dict OUT pattern. - Args: - state_dict (Dict[str, torch.Tensor]): State dict in torchtune's format. - Returns: - Dict[str, torch.Tensor]: State dict in Meta's format. - """ - converted_state_dict = {} - inverted_mapping_dict = {v: k for k, v in _PHI_4_FROM_META.items()} - - # Single checkpoint - model_path = os.path.join(input_dir, "pytorch_model.bin") - if os.path.exists(model_path): - state_dict = torch.load( - model_path, weights_only=True, map_location=torch.device("cpu") - ) - return state_dict - - # Input and output embeddings are tied. - converted_state_dict["output.weight"] = converted_state_dict[ - "tok_embeddings.weight" - ] - return converted_state_dict - - def convert_weights(input_dir_or_checkpoint: str, output_file: str) -> None: try: sd = load_checkpoint_from_pytorch_model(input_dir_or_checkpoint) diff --git a/examples/models/qwen3/convert_weights.py b/examples/models/qwen3/convert_weights.py index 4d492e90fc4..404fd4cbe88 100644 --- a/examples/models/qwen3/convert_weights.py +++ b/examples/models/qwen3/convert_weights.py @@ -89,6 +89,7 @@ def load_checkpoint_from_safetensors(input_dir: str) -> Dict: raise FileNotFoundError(f"Could not find safetensors checkpoint in {input_dir}") + def load_checkpoint(input_dir: str) -> Dict: try: print("Loading checkpoint from pytorch_model directory") From c3b6f0e5fdd8430264bab2d96ad557f673bb4a18 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 8 Sep 2025 10:07:39 -0700 Subject: [PATCH 09/16] up --- .github/workflows/trunk.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 9f2757bb21a..e9f9787dbe2 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -612,6 +612,7 @@ jobs: # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" + pip install -U "huggingface_hub[cli]" sh .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b sh .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini From ed1561019b1dec4326177fdb14dd00fffa162b55 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 8 Sep 2025 10:30:08 -0700 Subject: [PATCH 10/16] up --- .../test_torchao_huggingface_checkpoints.sh | 26 +++++++++---------- .github/workflows/trunk.yml | 13 +++------- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/.ci/scripts/test_torchao_huggingface_checkpoints.sh b/.ci/scripts/test_torchao_huggingface_checkpoints.sh index 12c182d8620..a9ffd3ffb3f 100644 --- a/.ci/scripts/test_torchao_huggingface_checkpoints.sh +++ b/.ci/scripts/test_torchao_huggingface_checkpoints.sh @@ -47,19 +47,19 @@ case "$MODEL_NAME" in $HF_MODEL_DIR \ pytorch_model_converted.bin - # $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \ - # --model "phi_4_mini" \ - # --checkpoint pytorch_model_converted.bin \ - # --params examples/models/phi_4_mini/config/config.json \ - # --output_name $MODEL_OUT \ - # -kv \ - # --use_sdpa_with_kv_cache \ - # -X \ - # --xnnpack-extended-ops \ - # --max_context_length 1024 \ - # --max_seq_length 1024 \ - # --dtype fp32 \ - # --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' + $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \ + --model "phi_4_mini" \ + --checkpoint pytorch_model_converted.bin \ + --params examples/models/phi_4_mini/config/config.json \ + --output_name $MODEL_OUT \ + -kv \ + --use_sdpa_with_kv_cache \ + -X \ + --xnnpack-extended-ops \ + --max_context_length 1024 \ + --max_seq_length 1024 \ + --dtype fp32 \ + --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' ;; *) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index e9f9787dbe2..be99a712a39 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -594,13 +594,8 @@ jobs: strategy: matrix: mode: [xnnpack+custom] - runner: [linux.2xlarge, linux.arm64.2xlarge] - docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64] - exclude: - - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-gcc11-aarch64 - - runner: linux.arm64.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 + runner: [linux.2xlarge] + docker-image: [executorch-ubuntu-22.04-clang12] fail-fast: false with: runner: ${{ matrix.runner }} @@ -614,8 +609,8 @@ jobs: conda activate "${CONDA_ENV}" pip install -U "huggingface_hub[cli]" - sh .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b - sh .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini + bash .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b + bash .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. # test-llava-runner-macos: From 62b25ca44ad134bd26d9ad410c25c54ca66066d4 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 8 Sep 2025 10:40:34 -0700 Subject: [PATCH 11/16] up --- .github/workflows/trunk.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index be99a712a39..078c0cfcb0e 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -608,6 +608,7 @@ jobs: CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" pip install -U "huggingface_hub[cli]" + python install_executorch.py bash .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b bash .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini From e2a3abb58bcee4b28325f1d791a15c058b9a1c31 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 8 Sep 2025 10:56:04 -0700 Subject: [PATCH 12/16] up --- .github/workflows/trunk.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 078c0cfcb0e..870b964be1f 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -607,8 +607,9 @@ jobs: # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" + + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake pip install -U "huggingface_hub[cli]" - python install_executorch.py bash .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b bash .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini From 13004c2f7bd673d616d513ea4a0c3bac67ac34d7 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 8 Sep 2025 12:02:30 -0700 Subject: [PATCH 13/16] up --- .../test_torchao_huggingface_checkpoints.sh | 91 ++++++++++++------- .github/workflows/trunk.yml | 16 ++-- 2 files changed, 69 insertions(+), 38 deletions(-) diff --git a/.ci/scripts/test_torchao_huggingface_checkpoints.sh b/.ci/scripts/test_torchao_huggingface_checkpoints.sh index a9ffd3ffb3f..ea82abc51a9 100644 --- a/.ci/scripts/test_torchao_huggingface_checkpoints.sh +++ b/.ci/scripts/test_torchao_huggingface_checkpoints.sh @@ -1,14 +1,41 @@ #!/usr/bin/env bash set -euo pipefail -MODEL_NAME=${1:-} +# ------------------------- +# Args / flags +# ------------------------- +TEST_WITH_RUNNER=0 +MODEL_NAME="" -if [[ -z "$MODEL_NAME" ]]; then - echo "Usage: $0 " +# Parse args +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [--test_with_runner]" echo "Supported model_name values: qwen3_4b, phi_4_mini" exit 1 fi +MODEL_NAME="$1" +shift + +while [[ $# -gt 0 ]]; do + case "$1" in + --test_with_runner) + TEST_WITH_RUNNER=1 + ;; + -h|--help) + echo "Usage: $0 [--test_with_runner]" + echo " model_name: qwen3_4b | phi_4_mini" + echo " --test_with_runner: build ET + run llama_main to sanity-check the export" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac + shift +done + if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then PYTHON_EXECUTABLE=python3 fi @@ -77,33 +104,35 @@ if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then fi # Install ET with CMake -cmake -DPYTHON_EXECUTABLE=python \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DEXECUTORCH_ENABLE_LOGGING=1 \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ - -DEXECUTORCH_BUILD_KERNELS_LLM=ON \ - -Bcmake-out . -cmake --build cmake-out -j16 --config Release --target install - -# Install llama runner -cmake -DPYTHON_EXECUTABLE=python \ - -DCMAKE_BUILD_TYPE=Release \ - -Bcmake-out/examples/models/llama \ - examples/models/llama -cmake --build cmake-out/examples/models/llama -j16 --config Release - -# Run the model -./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time," +if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then + echo "[runner] Building and testing llama_main ..." + cmake -DPYTHON_EXECUTABLE=python \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DEXECUTORCH_ENABLE_LOGGING=1 \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ + -DEXECUTORCH_BUILD_KERNELS_LLM=ON \ + -Bcmake-out . + cmake --build cmake-out -j16 --config Release --target install + + # Install llama runner + cmake -DPYTHON_EXECUTABLE=python \ + -DCMAKE_BUILD_TYPE=Release \ + -Bcmake-out/examples/models/llama \ + examples/models/llama + cmake --build cmake-out/examples/models/llama -j16 --config Release + + # Run the model + ./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time," +fi # Clean up -rm pytorch_model_converted.bin -rm $MODEL_OUT +rm -f pytorch_model_converted.bin "$MODEL_OUT" diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 870b964be1f..f5c5161e0cc 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -593,13 +593,16 @@ jobs: contents: read strategy: matrix: - mode: [xnnpack+custom] - runner: [linux.2xlarge] - docker-image: [executorch-ubuntu-22.04-clang12] + model: [qwen3_4b, phi_4_mini] + include: + - model: qwen3_4b + test_with_runner: true + - model: phi_4_mini + test_with_runner: false fail-fast: false with: - runner: ${{ matrix.runner }} - docker-image: ci-image:${{ matrix.docker-image }} + runner: linux.2xlarge + docker-image: ci-image:executorch-ubuntu-22.04-clang12 submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout: 900 @@ -611,8 +614,7 @@ jobs: PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake pip install -U "huggingface_hub[cli]" - bash .ci/scripts/test_torchao_huggingface_checkpoints.sh qwen3_4b - bash .ci/scripts/test_torchao_huggingface_checkpoints.sh phi_4_mini + bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.test_with_runner && '--test_with_runner' || '' }} # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. # test-llava-runner-macos: From 1a8e1d7eaaa3bdd127c09c3ce0b4fa2d31b8358d Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 9 Sep 2025 12:22:00 -0700 Subject: [PATCH 14/16] up --- .../test_torchao_huggingface_checkpoints.sh | 21 +++---------------- 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/.ci/scripts/test_torchao_huggingface_checkpoints.sh b/.ci/scripts/test_torchao_huggingface_checkpoints.sh index ea82abc51a9..3e400691d63 100644 --- a/.ci/scripts/test_torchao_huggingface_checkpoints.sh +++ b/.ci/scripts/test_torchao_huggingface_checkpoints.sh @@ -45,7 +45,7 @@ MODEL_OUT=model.pte case "$MODEL_NAME" in qwen3_4b) echo "Running Qwen3-4B export..." - HF_MODEL_DIR=$(hf download metascroy/Qwen3-4B-INT8-INT4) + HF_MODEL_DIR=$(hf download pytorch/Qwen3-4B-INT8-INT4) EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB $PYTHON_EXECUTABLE -m executorch.examples.models.qwen3.convert_weights \ $HF_MODEL_DIR \ @@ -68,7 +68,7 @@ case "$MODEL_NAME" in phi_4_mini) echo "Running Phi-4-mini export..." - HF_MODEL_DIR=$(hf download metascroy/Phi-4-mini-instruct-INT8-INT4) + HF_MODEL_DIR=$(hf download pytorch/Phi-4-mini-instruct-INT8-INT4) EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB $PYTHON_EXECUTABLE -m executorch.examples.models.phi_4_mini.convert_weights \ $HF_MODEL_DIR \ @@ -106,22 +106,7 @@ fi # Install ET with CMake if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then echo "[runner] Building and testing llama_main ..." - cmake -DPYTHON_EXECUTABLE=python \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DEXECUTORCH_ENABLE_LOGGING=1 \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ - -DEXECUTORCH_BUILD_KERNELS_LLM=ON \ - -Bcmake-out . - cmake --build cmake-out -j16 --config Release --target install + cmake --preset llm -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out # Install llama runner cmake -DPYTHON_EXECUTABLE=python \ From 7647ed9fa51488af40ae9d55fc648dc698f3aba0 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 9 Sep 2025 13:18:20 -0700 Subject: [PATCH 15/16] up --- .ci/scripts/test_torchao_huggingface_checkpoints.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.ci/scripts/test_torchao_huggingface_checkpoints.sh b/.ci/scripts/test_torchao_huggingface_checkpoints.sh index 3e400691d63..1a864a00697 100644 --- a/.ci/scripts/test_torchao_huggingface_checkpoints.sh +++ b/.ci/scripts/test_torchao_huggingface_checkpoints.sh @@ -107,12 +107,14 @@ fi if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then echo "[runner] Building and testing llama_main ..." cmake --preset llm -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out + cmake --build cmake-out -j16 --target install --config Release # Install llama runner - cmake -DPYTHON_EXECUTABLE=python \ - -DCMAKE_BUILD_TYPE=Release \ - -Bcmake-out/examples/models/llama \ - examples/models/llama + cmake -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DBUILD_TESTING=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -Bcmake-out/examples/models/llama \ + examples/models/llama cmake --build cmake-out/examples/models/llama -j16 --config Release # Run the model From 2f47f54590cfd0efde081243dcec3a2c58051255 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Tue, 9 Sep 2025 14:30:13 -0700 Subject: [PATCH 16/16] up --- .../test_torchao_huggingface_checkpoints.sh | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/.ci/scripts/test_torchao_huggingface_checkpoints.sh b/.ci/scripts/test_torchao_huggingface_checkpoints.sh index 1a864a00697..c0910b47826 100644 --- a/.ci/scripts/test_torchao_huggingface_checkpoints.sh +++ b/.ci/scripts/test_torchao_huggingface_checkpoints.sh @@ -106,15 +106,29 @@ fi # Install ET with CMake if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then echo "[runner] Building and testing llama_main ..." - cmake --preset llm -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out - cmake --build cmake-out -j16 --target install --config Release + cmake -DPYTHON_EXECUTABLE=python \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DEXECUTORCH_ENABLE_LOGGING=1 \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ + -DEXECUTORCH_BUILD_KERNELS_LLM=ON \ + -Bcmake-out . + cmake --build cmake-out -j16 --config Release --target install + # Install llama runner - cmake -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DBUILD_TESTING=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -Bcmake-out/examples/models/llama \ - examples/models/llama + cmake -DPYTHON_EXECUTABLE=python \ + -DCMAKE_BUILD_TYPE=Release \ + -Bcmake-out/examples/models/llama \ + examples/models/llama cmake --build cmake-out/examples/models/llama -j16 --config Release # Run the model