pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_conda.sh‎
Lines changed: 6 additions & 2 deletions b/‎.ci/docker/common/install_conda.sh‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎.ci/docker/conda-env-ci.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/conda-env-ci.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/check_c10_sync.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/check_c10_sync.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 2 additions & 2 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/android-perf-private-device-experiment.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf-private-device-experiment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 5 additions & 13 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 5 additions & 13 deletions
diff --git a/‎.github/workflows/apple-perf-private-device-experiment.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple-perf-private-device-experiment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 5 additions & 13 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 5 additions & 13 deletions
@@ -0,0 +1 @@
+a3942627f5ac048e06b4b1d703b0a6a53bf6da5b
@@ -1 +1 @@
-5616fa4a68718ead203314a3467f7dd9547153ae
+9b498d3bb28b8e3411ce464dd2755c5b96d92c8f
@@ -13,7 +13,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 install_miniconda() {
   BASE_URL="https://repo.anaconda.com/miniconda"
   CONDA_FILE="Miniconda3-py${PYTHON_VERSION//./}_${MINICONDA_VERSION}-Linux-x86_64.sh"
-  if [[ $(uname -m) == "aarch64" ]]; then 
+  if [[ $(uname -m) == "aarch64" ]]; then
     CONDA_FILE="Miniconda3-py${PYTHON_VERSION//./}_${MINICONDA_VERSION}-Linux-aarch64.sh"
   fi
 
@@ -71,4 +71,8 @@ fix_conda_ubuntu_libstdcxx() {
 install_miniconda
 install_python
 install_pip_dependencies
-fix_conda_ubuntu_libstdcxx
+# Hack breaks the job on aarch64 but is still necessary everywhere
+# else.
+if [ "$(uname -m)" != "aarch64" ]; then
+    fix_conda_ubuntu_libstdcxx
+fi
@@ -1,4 +1,4 @@
-cmake=3.26.4
+cmake=3.31.2
 ninja=1.10.2
 libuv
 llvm-openmp
 
@@ -12,3 +12,4 @@ pushd pytorch
 git checkout "$pytorch_pin"
 popd
 "$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/c10 pytorch/c10
+"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/standalone pytorch/torch/standalone
@@ -102,15 +102,15 @@ test_model() {
       bash examples/models/llama/install_requirements.sh
       # Test export_llm script: python3 -m extension.llm.export.export_llm.
       # Use Llama random checkpoint with Qwen 2.5 1.5b model configuration.
-      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/qwen2_5/1_5b_config.json
+      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/qwen2_5/config/1_5b_config.json
       rm "./${MODEL_NAME}.pte"
       return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
   fi
   if [[ "${MODEL_NAME}" == "phi_4_mini" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
       # Test export_llm script: python3 -m extension.llm.export.export_llm.
-      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/phi_4_mini/config.json
+      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/phi_4_mini/config/config.json
       run_portable_executor_runner
       rm "./${MODEL_NAME}.pte"
       return
 
@@ -57,6 +57,6 @@ jobs:
       id-token: write
       contents: read
     with:
-      models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
+      models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
       devices: samsung_galaxy_s22_private
       benchmark_configs: ${{ inputs.benchmark_configs }}
@@ -72,7 +72,7 @@ jobs:
           # Separate default values from the workflow dispatch. To ensure defaults are accessible
           # during scheduled runs and to provide flexibility for different defaults between
           # on-demand and periodic benchmarking.
-          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
+          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
           CRON_DEFAULT_DEVICES: samsung_galaxy_s22
         run: |
           set -eux
@@ -317,7 +317,7 @@ jobs:
                 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
                 python -m extension.llm.export.export_llm \
                   base.model_class=qwen3_0_6b \
-                  base.params=examples/models/qwen3/0_6b_config.json \
+                  base.params=examples/models/qwen3/config/0_6b_config.json \
                   model.use_kv_cache=true \
                   model.use_sdpa_with_kv_cache=true \
                   model.dtype_override=fp32 \
@@ -341,10 +341,11 @@ jobs:
               echo "tokenizer.json is downloaded to $DOWNLOADED_PATH"
 
               # Install optimum-executorch
+              OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
               git clone https://github.com/huggingface/optimum-executorch
               pushd optimum-executorch
               # There is no release yet, for CI stability, always test from the same commit on main
-              git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+              git checkout $OPTIMUM_ET_COMMIT
               python install_dev.py --skip_override_torch
               pip list
 
@@ -353,21 +354,12 @@ jobs:
                 "--task" "text-generation"
                 "--recipe" "xnnpack"
                 "--use_custom_sdpa"
+                "--use_custom_kv_cache"
                 "--qlinear"
                 "--qembedding"
                 "--output_dir" ".."
               )
 
-              # Add conditional arguments based on model
-              case "${HF_MODEL_REPO}" in
-                *"google/gemma-3-1b-it"*)
-                  echo "--use_custom_kv_cache can not be used for HybridCache"
-                  ;;
-                *)
-                  ARGS+=("--use_custom_kv_cache")
-                  ;;
-              esac
-
               optimum-cli export executorch "${ARGS[@]}"
               popd
 
 
@@ -57,6 +57,6 @@ jobs:
       id-token: write
       contents: read
     with:
-      models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
+      models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
       devices: apple_iphone_15_private
       benchmark_configs: ${{ inputs.benchmark_configs }}
@@ -72,7 +72,7 @@ jobs:
           # Separate default values from the workflow dispatch. To ensure defaults are accessible
           # during scheduled runs and to provide flexibility for different defaults between
           # on-demand and periodic benchmarking.
-          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
+          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
           CRON_DEFAULT_DEVICES: apple_iphone_15
         run: |
           set -eux
@@ -322,7 +322,7 @@ jobs:
                 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
                 ${CONDA_RUN} python -m extension.llm.export.export_llm \
                   base.model_class=qwen3_0_6b \
-                  base.params=examples/models/qwen3/0_6b_config.json \
+                  base.params=examples/models/qwen3/config/0_6b_config.json \
                   model.use_kv_cache=true \
                   model.use_sdpa_with_kv_cache=true \
                   model.dtype_override=fp32 \
@@ -346,10 +346,11 @@ jobs:
             echo "tokenizer.json is downloaded to $DOWNLOADED_PATH"
 
             # Install optimum-executorch
+            OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
             git clone https://github.com/huggingface/optimum-executorch
             pushd optimum-executorch
             # There is no release yet, for CI stability, always test from the same commit on main
-            git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+            git checkout $OPTIMUM_ET_COMMIT
             ${CONDA_RUN} python install_dev.py --skip_override_torch
             pip list
 
@@ -358,21 +359,12 @@ jobs:
               "--task" "text-generation"
               "--recipe" "xnnpack"
               "--use_custom_sdpa"
+              "--use_custom_kv_cache"
               "--qlinear"
               "--qembedding"
               "--output_dir" ".."
             )
 
-            # Add conditional arguments based on model
-            case "${HF_MODEL_REPO}" in
-              *"google/gemma-3-1b-it"*)
-                echo "--use_custom_kv_cache can not be used for HybridCache"
-                ;;
-              *)
-                ARGS+=("--use_custom_kv_cache")
-                ;;
-            esac
-
             ${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}"
             popd
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+a3942627f5ac048e06b4b1d703b0a6a53bf6da5b`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-5616fa4a68718ead203314a3467f7dd9547153ae`
	`1`	`+9b498d3bb28b8e3411ce464dd2755c5b96d92c8f`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-cmake=3.26.4`
	`1`	`+cmake=3.31.2`
`2`	`2`	`ninja=1.10.2`
`3`	`3`	`libuv`
`4`	`4`	`llvm-openmp`