pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/android-perf-private-device-experiment.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf-private-device-experiment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 4 additions & 12 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 4 additions & 12 deletions
diff --git a/‎.github/workflows/apple-perf-private-device-experiment.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple-perf-private-device-experiment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 4 additions & 12 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 4 additions & 12 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 7 additions & 15 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 7 additions & 15 deletions
diff --git a/‎backends/arm/operators/op_index_tensor.py‎
Lines changed: 12 additions & 2 deletions b/‎backends/arm/operators/op_index_tensor.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎backends/arm/test/models/test_llama.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/test/models/test_llama.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/test/test_model.py‎
Lines changed: 2 additions & 7 deletions b/‎backends/arm/test/test_model.py‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎backends/cadence/aot/memory_constraints.py‎
Lines changed: 21 additions & 9 deletions b/‎backends/cadence/aot/memory_constraints.py‎
Lines changed: 21 additions & 9 deletions
@@ -0,0 +1 @@
+a3942627f5ac048e06b4b1d703b0a6a53bf6da5b
@@ -57,6 +57,6 @@ jobs:
       id-token: write
       contents: read
     with:
-      models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
+      models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
       devices: samsung_galaxy_s22_private
       benchmark_configs: ${{ inputs.benchmark_configs }}
@@ -72,7 +72,7 @@ jobs:
           # Separate default values from the workflow dispatch. To ensure defaults are accessible
           # during scheduled runs and to provide flexibility for different defaults between
           # on-demand and periodic benchmarking.
-          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
+          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
           CRON_DEFAULT_DEVICES: samsung_galaxy_s22
         run: |
           set -eux
@@ -341,10 +341,11 @@ jobs:
               echo "tokenizer.json is downloaded to $DOWNLOADED_PATH"
 
               # Install optimum-executorch
+              OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
               git clone https://github.com/huggingface/optimum-executorch
               pushd optimum-executorch
               # There is no release yet, for CI stability, always test from the same commit on main
-              git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+              git checkout $OPTIMUM_ET_COMMIT
               python install_dev.py --skip_override_torch
               pip list
 
@@ -353,21 +354,12 @@ jobs:
                 "--task" "text-generation"
                 "--recipe" "xnnpack"
                 "--use_custom_sdpa"
+                "--use_custom_kv_cache"
                 "--qlinear"
                 "--qembedding"
                 "--output_dir" ".."
               )
 
-              # Add conditional arguments based on model
-              case "${HF_MODEL_REPO}" in
-                *"google/gemma-3-1b-it"*)
-                  echo "--use_custom_kv_cache can not be used for HybridCache"
-                  ;;
-                *)
-                  ARGS+=("--use_custom_kv_cache")
-                  ;;
-              esac
-
               optimum-cli export executorch "${ARGS[@]}"
               popd
 
 
@@ -57,6 +57,6 @@ jobs:
       id-token: write
       contents: read
     with:
-      models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
+      models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
       devices: apple_iphone_15_private
       benchmark_configs: ${{ inputs.benchmark_configs }}
@@ -72,7 +72,7 @@ jobs:
           # Separate default values from the workflow dispatch. To ensure defaults are accessible
           # during scheduled runs and to provide flexibility for different defaults between
           # on-demand and periodic benchmarking.
-          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
+          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
           CRON_DEFAULT_DEVICES: apple_iphone_15
         run: |
           set -eux
@@ -346,10 +346,11 @@ jobs:
             echo "tokenizer.json is downloaded to $DOWNLOADED_PATH"
 
             # Install optimum-executorch
+            OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
             git clone https://github.com/huggingface/optimum-executorch
             pushd optimum-executorch
             # There is no release yet, for CI stability, always test from the same commit on main
-            git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+            git checkout $OPTIMUM_ET_COMMIT
             ${CONDA_RUN} python install_dev.py --skip_override_torch
             pip list
 
@@ -358,21 +359,12 @@ jobs:
               "--task" "text-generation"
               "--recipe" "xnnpack"
               "--use_custom_sdpa"
+              "--use_custom_kv_cache"
               "--qlinear"
               "--qembedding"
               "--output_dir" ".."
             )
 
-            # Add conditional arguments based on model
-            case "${HF_MODEL_REPO}" in
-              *"google/gemma-3-1b-it"*)
-                echo "--use_custom_kv_cache can not be used for HybridCache"
-                ;;
-              *)
-                ARGS+=("--use_custom_kv_cache")
-                ;;
-            esac
-
             ${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}"
             popd
 
 
@@ -594,10 +594,11 @@ jobs:
         echo "::group::Set up Hugging Face"
         pip install -U "huggingface_hub[cli]"
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+        OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         git clone https://github.com/huggingface/optimum-executorch
         pushd optimum-executorch
         # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+        git checkout $OPTIMUM_ET_COMMIT
         python install_dev.py --skip_override_torch
         popd
         pip list
@@ -614,21 +615,12 @@ jobs:
           "--task" "text-generation"
           "--recipe" "xnnpack"
           "--use_custom_sdpa"
+          "--use_custom_kv_cache"
           "--qlinear"
           "--qembedding"
           "--output_dir" "${OUTPUT_DIR}"
         )
 
-        # Add conditional arguments based on model
-        case "${MODEL_ID}" in
-          *"google/gemma-3-1b-it"*)
-            echo "--use_custom_kv_cache can not be used for HybridCache"
-            ;;
-          *)
-            ARGS+=("--use_custom_kv_cache")
-            ;;
-        esac
-
         optimum-cli export executorch "${ARGS[@]}"
 
         ls -FlAGhp ${OUTPUT_DIR}
@@ -732,18 +724,18 @@ jobs:
       timeout: 90
       script: |
         set -eux
-        
+
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
-        
+
         # Build and install Executorch
         PYTHON_EXECUTABLE=python \
         CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
         .ci/scripts/setup-linux.sh --build-tool "cmake"
-        
+
         # Install test requirements
         pip install -r backends/nxp/requirements-tests.txt
-        
+
         # Run pytest
         PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
@@ -189,11 +189,16 @@ def define_node(
             if i == 0:
                 gather_index_name = reshaped_idxs.name
             else:
+                add_idxs = tosa_graph.addIntermediate(
+                    reshaped_idxs.shape,
+                    reshaped_idxs.dtype,
+                )
                 tosa_graph.addOperator(
                     ts.TosaOp.Op().ADD,
                     [gather_index_name, reshaped_idxs.name],
-                    [gather_index_name],
+                    [add_idxs.name],
                 )
+                gather_index_name = add_idxs.name
 
         gather_vals_shape = [N, K, C]
         reshaped_input = tosa_graph.addIntermediate(gather_vals_shape, values.dtype)
@@ -314,11 +319,16 @@ def define_node(
             if i == 0:
                 gather_index_name = reshaped_idxs.name
             else:
+                add_idxs = tosa_graph.addIntermediate(
+                    reshaped_idxs.shape,
+                    reshaped_idxs.dtype,
+                )
                 tosa_graph.addOperator(
                     ts.TosaOp.Op().ADD,
                     [gather_index_name, reshaped_idxs.name],
-                    [gather_index_name],
+                    [add_idxs.name],
                 )
+                gather_index_name = add_idxs.name
 
         gather_vals_shape = [N, K, C]
         reshaped_input = tosa_graph.addIntermediate(gather_vals_shape, values.dtype)
 
@@ -22,13 +22,13 @@
     TosaPipelineBI,
     TosaPipelineMI,
 )
-
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import (
     build_args_parser,
     get_llama_model,
 )
 
+from executorch.extension.llm.export.config.llm_config import LlmConfig
+
 input_t = Tuple[torch.Tensor]
 
 # Add project dir to sys path to workaround importlib.import_module() conditions in model_factory.py
 
@@ -64,7 +64,7 @@ def get_args():
     parser.add_argument(
         "--timeout",
         required=False,
-        default=60 * 10,
+        default=60 * 20,
         help="Timeout in seconds used when running the model",
     )
     args = parser.parse_args()
@@ -165,11 +165,6 @@ def build_ethosu_runtime(
     extra_flags: str,
     elf_build_path: str,
 ):
-
-    extra_build_flag = ""
-    if extra_flags:
-        extra_build_flag = f"--extra_build_flags={extra_flags}"
-
     run_external_cmd(
         [
             "bash",
@@ -182,7 +177,7 @@ def build_ethosu_runtime(
             "--build_type=Release",
             f"--system_config={system_config}",
             f"--memory_mode={memory_mode}",
-            extra_build_flag,
+            f"--extra_build_flags=-DET_DUMP_OUTPUT=OFF {extra_flags}",
             f"--output={elf_build_path}",
         ]
     )
 
@@ -11,7 +11,7 @@
 import typing
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import cast, DefaultDict, Iterable, Optional, Sequence
+from typing import Callable, cast, DefaultDict, Iterable, Optional, Sequence, TypeAlias
 
 import torch
 import torch.fx
@@ -573,23 +573,34 @@ def compute_slice_and_select_loc_constraints(
         graph_module.recompile()
 
 
+ConstraintsGenPass: TypeAlias = Callable[
+    [MemConstraints],
+    Callable[[torch.fx.GraphModule], Optional[PassResult]],
+]
+
+
 # The class to generate all the constraints that will be passed on to the memory
 # planning algorithm.
 class GenerateMemConstraints:
     def __init__(
         self,
         mem_constraints: MemConstraints,
-        additional_constraint_gen_passes: list | None = None,
+        additional_constraint_gen_passes: Sequence[ConstraintsGenPass] | None = None,
     ) -> None:
-        self.mem_constraints = mem_constraints
-        self.additional_constraint_gen_passes = additional_constraint_gen_passes or []
+        self.mem_constraints: MemConstraints = mem_constraints
+        self.additional_constraint_gen_passes: Sequence[ConstraintsGenPass] = (
+            additional_constraint_gen_passes or []
+        )
 
     def __call__(self, graph_module: torch.fx.GraphModule) -> PassResult:
-        constraint_gen_passes: list = [
-            GenerateMemoryViewConstraints,
-            GenerateSliceAndSelectNopConstraints,
-            GenerateCatNopConstraints,
-        ] + self.additional_constraint_gen_passes
+        constraint_gen_passes: Sequence[ConstraintsGenPass] = cast(
+            list[ConstraintsGenPass],
+            [
+                GenerateMemoryViewConstraints,
+                GenerateSliceAndSelectNopConstraints,
+                GenerateCatNopConstraints,
+            ],
+        ) + list(self.additional_constraint_gen_passes)
         # Create a filter using the opt level in mem_constraints, and filter
         # the relevant passes.
         pass_filter = create_cadence_pass_filter(self.mem_constraints.opt_level)
@@ -602,6 +613,7 @@ def __call__(self, graph_module: torch.fx.GraphModule) -> PassResult:
                         typing.Callable[[torch.fx.GraphModule], Optional[PassResult]],
                     ]
                 ],
+                # pyre-ignore[6]: Incompatible parameter type.
                 list(filter(pass_filter, constraint_gen_passes)),
             )
         ]
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+a3942627f5ac048e06b4b1d703b0a6a53bf6da5b`
Original file line number	Diff line number	Diff line change
`@@ -22,13 +22,13 @@`
`22`	`22`	`TosaPipelineBI,`
`23`	`23`	`TosaPipelineMI,`
`24`	`24`	`)`
`25`		`-`
`26`		`-from executorch.examples.models.llama.config.llm_config import LlmConfig`
`27`	`25`	`from executorch.examples.models.llama.export_llama_lib import (`
`28`	`26`	`build_args_parser,`
`29`	`27`	`get_llama_model,`
`30`	`28`	`)`
`31`	`29`
	`30`	`+from executorch.extension.llm.export.config.llm_config import LlmConfig`
	`31`	`+`
`32`	`32`	`input_t = Tuple[torch.Tensor]`
`33`	`33`
`34`	`34`	`# Add project dir to sys path to workaround importlib.import_module() conditions in model_factory.py`