Skip to content

Commit c577adc

Browse files
authored
Merge branch 'main' into milestone2.2
2 parents b663eca + b74c68d commit c577adc

File tree

95 files changed

+1314
-705
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+1314
-705
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
a3942627f5ac048e06b4b1d703b0a6a53bf6da5b

.github/workflows/android-perf-private-device-experiment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
6161
devices: samsung_galaxy_s22_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/android-perf.yml

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
75-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
7676
CRON_DEFAULT_DEVICES: samsung_galaxy_s22
7777
run: |
7878
set -eux
@@ -341,10 +341,11 @@ jobs:
341341
echo "tokenizer.json is downloaded to $DOWNLOADED_PATH"
342342
343343
# Install optimum-executorch
344+
OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
344345
git clone https://github.com/huggingface/optimum-executorch
345346
pushd optimum-executorch
346347
# There is no release yet, for CI stability, always test from the same commit on main
347-
git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
348+
git checkout $OPTIMUM_ET_COMMIT
348349
python install_dev.py --skip_override_torch
349350
pip list
350351
@@ -353,21 +354,12 @@ jobs:
353354
"--task" "text-generation"
354355
"--recipe" "xnnpack"
355356
"--use_custom_sdpa"
357+
"--use_custom_kv_cache"
356358
"--qlinear"
357359
"--qembedding"
358360
"--output_dir" ".."
359361
)
360362
361-
# Add conditional arguments based on model
362-
case "${HF_MODEL_REPO}" in
363-
*"google/gemma-3-1b-it"*)
364-
echo "--use_custom_kv_cache can not be used for HybridCache"
365-
;;
366-
*)
367-
ARGS+=("--use_custom_kv_cache")
368-
;;
369-
esac
370-
371363
optimum-cli export executorch "${ARGS[@]}"
372364
popd
373365

.github/workflows/apple-perf-private-device-experiment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'google/gemma-3-1b-it' }}
6161
devices: apple_iphone_15_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/apple-perf.yml

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
75-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf,google/gemma-3-1b-it' || 'Qwen/Qwen3-0.6B' }}
7676
CRON_DEFAULT_DEVICES: apple_iphone_15
7777
run: |
7878
set -eux
@@ -346,10 +346,11 @@ jobs:
346346
echo "tokenizer.json is downloaded to $DOWNLOADED_PATH"
347347
348348
# Install optimum-executorch
349+
OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
349350
git clone https://github.com/huggingface/optimum-executorch
350351
pushd optimum-executorch
351352
# There is no release yet, for CI stability, always test from the same commit on main
352-
git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
353+
git checkout $OPTIMUM_ET_COMMIT
353354
${CONDA_RUN} python install_dev.py --skip_override_torch
354355
pip list
355356
@@ -358,21 +359,12 @@ jobs:
358359
"--task" "text-generation"
359360
"--recipe" "xnnpack"
360361
"--use_custom_sdpa"
362+
"--use_custom_kv_cache"
361363
"--qlinear"
362364
"--qembedding"
363365
"--output_dir" ".."
364366
)
365367
366-
# Add conditional arguments based on model
367-
case "${HF_MODEL_REPO}" in
368-
*"google/gemma-3-1b-it"*)
369-
echo "--use_custom_kv_cache can not be used for HybridCache"
370-
;;
371-
*)
372-
ARGS+=("--use_custom_kv_cache")
373-
;;
374-
esac
375-
376368
${CONDA_RUN} optimum-cli export executorch "${ARGS[@]}"
377369
popd
378370

.github/workflows/trunk.yml

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -594,10 +594,11 @@ jobs:
594594
echo "::group::Set up Hugging Face"
595595
pip install -U "huggingface_hub[cli]"
596596
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
597+
OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
597598
git clone https://github.com/huggingface/optimum-executorch
598599
pushd optimum-executorch
599600
# There is no release yet, for CI stability, always test from the same commit on main
600-
git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
601+
git checkout $OPTIMUM_ET_COMMIT
601602
python install_dev.py --skip_override_torch
602603
popd
603604
pip list
@@ -614,21 +615,12 @@ jobs:
614615
"--task" "text-generation"
615616
"--recipe" "xnnpack"
616617
"--use_custom_sdpa"
618+
"--use_custom_kv_cache"
617619
"--qlinear"
618620
"--qembedding"
619621
"--output_dir" "${OUTPUT_DIR}"
620622
)
621623
622-
# Add conditional arguments based on model
623-
case "${MODEL_ID}" in
624-
*"google/gemma-3-1b-it"*)
625-
echo "--use_custom_kv_cache can not be used for HybridCache"
626-
;;
627-
*)
628-
ARGS+=("--use_custom_kv_cache")
629-
;;
630-
esac
631-
632624
optimum-cli export executorch "${ARGS[@]}"
633625
634626
ls -FlAGhp ${OUTPUT_DIR}
@@ -732,18 +724,18 @@ jobs:
732724
timeout: 90
733725
script: |
734726
set -eux
735-
727+
736728
# The generic Linux job chooses to use base env, not the one setup by the image
737729
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
738730
conda activate "${CONDA_ENV}"
739-
731+
740732
# Build and install Executorch
741733
PYTHON_EXECUTABLE=python \
742734
CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
743735
.ci/scripts/setup-linux.sh --build-tool "cmake"
744-
736+
745737
# Install test requirements
746738
pip install -r backends/nxp/requirements-tests.txt
747-
739+
748740
# Run pytest
749741
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh

backends/arm/operators/op_index_tensor.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,16 @@ def define_node(
189189
if i == 0:
190190
gather_index_name = reshaped_idxs.name
191191
else:
192+
add_idxs = tosa_graph.addIntermediate(
193+
reshaped_idxs.shape,
194+
reshaped_idxs.dtype,
195+
)
192196
tosa_graph.addOperator(
193197
ts.TosaOp.Op().ADD,
194198
[gather_index_name, reshaped_idxs.name],
195-
[gather_index_name],
199+
[add_idxs.name],
196200
)
201+
gather_index_name = add_idxs.name
197202

198203
gather_vals_shape = [N, K, C]
199204
reshaped_input = tosa_graph.addIntermediate(gather_vals_shape, values.dtype)
@@ -314,11 +319,16 @@ def define_node(
314319
if i == 0:
315320
gather_index_name = reshaped_idxs.name
316321
else:
322+
add_idxs = tosa_graph.addIntermediate(
323+
reshaped_idxs.shape,
324+
reshaped_idxs.dtype,
325+
)
317326
tosa_graph.addOperator(
318327
ts.TosaOp.Op().ADD,
319328
[gather_index_name, reshaped_idxs.name],
320-
[gather_index_name],
329+
[add_idxs.name],
321330
)
331+
gather_index_name = add_idxs.name
322332

323333
gather_vals_shape = [N, K, C]
324334
reshaped_input = tosa_graph.addIntermediate(gather_vals_shape, values.dtype)

backends/arm/test/models/test_llama.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
TosaPipelineBI,
2323
TosaPipelineMI,
2424
)
25-
26-
from executorch.examples.models.llama.config.llm_config import LlmConfig
2725
from executorch.examples.models.llama.export_llama_lib import (
2826
build_args_parser,
2927
get_llama_model,
3028
)
3129

30+
from executorch.extension.llm.export.config.llm_config import LlmConfig
31+
3232
input_t = Tuple[torch.Tensor]
3333

3434
# Add project dir to sys path to workaround importlib.import_module() conditions in model_factory.py

backends/arm/test/test_model.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def get_args():
6464
parser.add_argument(
6565
"--timeout",
6666
required=False,
67-
default=60 * 10,
67+
default=60 * 20,
6868
help="Timeout in seconds used when running the model",
6969
)
7070
args = parser.parse_args()
@@ -165,11 +165,6 @@ def build_ethosu_runtime(
165165
extra_flags: str,
166166
elf_build_path: str,
167167
):
168-
169-
extra_build_flag = ""
170-
if extra_flags:
171-
extra_build_flag = f"--extra_build_flags={extra_flags}"
172-
173168
run_external_cmd(
174169
[
175170
"bash",
@@ -182,7 +177,7 @@ def build_ethosu_runtime(
182177
"--build_type=Release",
183178
f"--system_config={system_config}",
184179
f"--memory_mode={memory_mode}",
185-
extra_build_flag,
180+
f"--extra_build_flags=-DET_DUMP_OUTPUT=OFF {extra_flags}",
186181
f"--output={elf_build_path}",
187182
]
188183
)

backends/cadence/aot/memory_constraints.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import typing
1212
from collections import defaultdict
1313
from dataclasses import dataclass
14-
from typing import cast, DefaultDict, Iterable, Optional, Sequence
14+
from typing import Callable, cast, DefaultDict, Iterable, Optional, Sequence, TypeAlias
1515

1616
import torch
1717
import torch.fx
@@ -573,23 +573,34 @@ def compute_slice_and_select_loc_constraints(
573573
graph_module.recompile()
574574

575575

576+
ConstraintsGenPass: TypeAlias = Callable[
577+
[MemConstraints],
578+
Callable[[torch.fx.GraphModule], Optional[PassResult]],
579+
]
580+
581+
576582
# The class to generate all the constraints that will be passed on to the memory
577583
# planning algorithm.
578584
class GenerateMemConstraints:
579585
def __init__(
580586
self,
581587
mem_constraints: MemConstraints,
582-
additional_constraint_gen_passes: list | None = None,
588+
additional_constraint_gen_passes: Sequence[ConstraintsGenPass] | None = None,
583589
) -> None:
584-
self.mem_constraints = mem_constraints
585-
self.additional_constraint_gen_passes = additional_constraint_gen_passes or []
590+
self.mem_constraints: MemConstraints = mem_constraints
591+
self.additional_constraint_gen_passes: Sequence[ConstraintsGenPass] = (
592+
additional_constraint_gen_passes or []
593+
)
586594

587595
def __call__(self, graph_module: torch.fx.GraphModule) -> PassResult:
588-
constraint_gen_passes: list = [
589-
GenerateMemoryViewConstraints,
590-
GenerateSliceAndSelectNopConstraints,
591-
GenerateCatNopConstraints,
592-
] + self.additional_constraint_gen_passes
596+
constraint_gen_passes: Sequence[ConstraintsGenPass] = cast(
597+
list[ConstraintsGenPass],
598+
[
599+
GenerateMemoryViewConstraints,
600+
GenerateSliceAndSelectNopConstraints,
601+
GenerateCatNopConstraints,
602+
],
603+
) + list(self.additional_constraint_gen_passes)
593604
# Create a filter using the opt level in mem_constraints, and filter
594605
# the relevant passes.
595606
pass_filter = create_cadence_pass_filter(self.mem_constraints.opt_level)
@@ -602,6 +613,7 @@ def __call__(self, graph_module: torch.fx.GraphModule) -> PassResult:
602613
typing.Callable[[torch.fx.GraphModule], Optional[PassResult]],
603614
]
604615
],
616+
# pyre-ignore[6]: Incompatible parameter type.
605617
list(filter(pass_filter, constraint_gen_passes)),
606618
)
607619
]

0 commit comments

Comments
 (0)