Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
ed0ab94
remove ET_LOG from kernel temp allocator
zonglinpeng Sep 12, 2025
1454743
Measure model latency when input list is not provided
limintang Sep 13, 2025
a77c8df
Remove generate_from_pos since there's no user of it (#14277)
larryliu0820 Sep 13, 2025
23acfea
Swap Llava export arg order (#14238)
jackzhxng Sep 13, 2025
2faf5ce
Update Voxtral README.md (#14283)
jackzhxng Sep 13, 2025
888f84d
update pt2e example code (#14284)
cccclai Sep 13, 2025
3ce51ab
[tosa-tools] Remove v0.80 usage from ET (#14291)
pytorchbot Sep 13, 2025
6e0d2c9
[tosa-tools] Add top level targets for tosa, serializer (#14292)
pytorchbot Sep 13, 2025
6789f75
[ethos-u-vela==4.4.0._git_8278f09] Update to vela 4.4.0 (#14293)
pytorchbot Sep 13, 2025
09f5beb
Add support for fusing Conv+ReLU
mcremon-meta Sep 13, 2025
b5523cd
pyre-fix
metascroy Sep 13, 2025
897b0d5
Reapply "[Windows] Run native unit tests in CI (#13923)"
GregoryComer Sep 13, 2025
0b3227f
Add support for conv1d
ethansfng Sep 13, 2025
0b4fe31
Support qwen phi gemma whisper (#14294)
neuropilot-captain Sep 13, 2025
79c8e49
Remove non-per-tensor quantized add and replace with per-tensor variant
DrJessop Sep 14, 2025
e9903b8
Add int8/uint8 specialized variants of quantized_add_per_tensor
DrJessop Sep 14, 2025
eec95d0
Support custom quantized_matmul + variants
DrJessop Sep 14, 2025
59008b5
Arm backend: Fix incorrect tag name (#14298)
mansnils Sep 15, 2025
dfd7f2a
NXP backend: Move optimization in keep_one_empty_buffer.py to Model b…
roman-janik-nxp Sep 15, 2025
957915f
Arm Backend: Add visualization script for Arm models (#14257)
jmahbs Sep 15, 2025
41c2a62
Add -x to test_torchao_huggingface_checkpoints.sh (#14232)
swolchok Sep 15, 2025
365cab3
Clean up test type dispatches (#14228)
ethansfng Sep 15, 2025
76692f5
Arm backend: Add --enable_debug_mode to AOT compiler (Try 2)
mergennachin Sep 15, 2025
7edb278
Support qwen phi gemma whisper (#14296)
neuropilot-captain Sep 15, 2025
eaad1c2
Arm backend: Add INT16 support to rescale operation (#14301)
lucylq Sep 15, 2025
0447ebd
swap arg init in text_llm_runner (#14304)
lucylq Sep 15, 2025
0f066e0
Arm backend: use ArmCompileSpec in backend
Erik-Lundell Sep 15, 2025
750cba7
Revert "Add EXECUTORCH_THREADPOOL_SIZE options, default to u… (#14307)
GregoryComer Sep 15, 2025
30a904b
Arm backend: Add support for ETDump of outputs
zingo Sep 15, 2025
5a1c117
Temporarily disable OpenVINO CI job (#14315)
GregoryComer Sep 15, 2025
ea4f004
Update cache position population and arg order for multimodal runner …
kirklandsign Sep 15, 2025
4a4f5a0
custom fbcode serialize stage to run FVP internally on arm ops tests …
3l1 Sep 15, 2025
e0dda90
[Backend Tester] Skip in-place activation tests due to lack of suppor…
GregoryComer Sep 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/scripts/setup-windows.ps1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
param (
[string]$editable = $false
[string]$editable = "false"
)

conda create --yes --quiet -n et python=3.12
Expand Down
2 changes: 1 addition & 1 deletion .ci/scripts/test_torchao_huggingface_checkpoints.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
set -euo pipefail
set -euxo pipefail

# -------------------------
# Args / flags
Expand Down
33 changes: 28 additions & 5 deletions .ci/scripts/unittest-windows.ps1
Original file line number Diff line number Diff line change
@@ -1,15 +1,38 @@
param (
[string]$editable = $false
[string]$buildMode = "Release"
)

Set-PSDebug -Trace 1
$ErrorActionPreference = 'Stop'
$PSNativeCommandUseErrorActionPreference = $true

# Run pytest with coverage
# pytest -n auto --cov=./ --cov-report=xml
pytest -v --full-trace -c pytest-windows.ini
# Run native unit tests (via ctest)
New-Item -Path "test-build" -ItemType Directory
cd "test-build"

cmake .. --preset windows -B . -DEXECUTORCH_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=$buildMode
if ($LASTEXITCODE -ne 0) {
Write-Host "Pytest invocation was unsuccessful. Exit code: $LASTEXITCODE."
Write-Host "CMake configuration was unsuccessful. Exit code: $LASTEXITCODE."
exit $LASTEXITCODE
}

cmake --build . -j8 --config $buildMode --verbose
if ($LASTEXITCODE -ne 0) {
Write-Host "CMake build was unsuccessful. Exit code: $LASTEXITCODE."
exit $LASTEXITCODE
}

ctest -j8 . --build-config $buildMode --output-on-failure -E "method_test|tensor_parser_test"
if ($LASTEXITCODE -ne 0) {
Write-Host "CTest run was unsuccessful. Exit code: $LASTEXITCODE."
exit $LASTEXITCODE
}

cd ..

# Run pytest
pytest -v -c pytest-windows.ini
if ($LASTEXITCODE -ne 0) {
Write-Host "Pytest invocation was unsuccessful. Exit code: $LASTEXITCODE."
exit $LASTEXITCODE
}
12 changes: 10 additions & 2 deletions .github/workflows/_unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,15 @@ jobs:
\$ErrorActionPreference = 'Stop'
\$PSNativeCommandUseErrorActionPreference = \$true

.ci/scripts/setup-windows.ps1
.ci/scripts/setup-windows.ps1 -editable "${{ inputs.editable }}"
if (\$LASTEXITCODE -ne 0) {
Write-Host "Setup failed. Exit code: \$LASTEXITCODE."
exit \$LASTEXITCODE
}

powershell .ci/scripts/unittest-windows.ps1 -editable "${{ inputs.editable }}"
.ci/scripts/unittest-windows.ps1 -buildMode "${{ inputs.build-mode }}"
if (\$LASTEXITCODE -ne 0) {
Write-Host "Unit tests failed. Exit code: \$LASTEXITCODE."
exit \$LASTEXITCODE
}
}"
1 change: 1 addition & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ jobs:
contents: read
strategy:
fail-fast: false
if: false # TODO Re-enable after fixing timeouts (#14314)
with:
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-gcc9
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1032,5 +1032,5 @@ jobs:

.ci/scripts/setup-windows.ps1

powershell .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
.ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
}"
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,13 @@ endif()

# -ffunction-sections -fdata-sections: breaks function and data into sections so
# they can be properly gc'd. -s: strip symbol.
set(CMAKE_CXX_FLAGS_RELEASE
"-ffunction-sections -fdata-sections ${CMAKE_CXX_FLAGS_RELEASE}"
)
if(WIN32)
set(CMAKE_CXX_FLAGS_RELEASE "/Gy /Gw ${CMAKE_CXX_FLAGS_RELEASE}")
else()
set(CMAKE_CXX_FLAGS_RELEASE
"-ffunction-sections -fdata-sections ${CMAKE_CXX_FLAGS_RELEASE}"
)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
endif()
Expand Down
12 changes: 4 additions & 8 deletions backends/arm/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ runtime.python_library(
"common/debug.py",
],
deps = [
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
"fbsource//third-party/tosa_tools:serializer",
"//caffe2:torch",
"//executorch/exir:lib",
],
Expand All @@ -37,10 +36,8 @@ runtime.python_library(
deps = [
"fbsource//third-party/pypi/flatbuffers:flatbuffers",
"fbsource//third-party/pypi/ml-dtypes:ml-dtypes",
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/serializer:serializer",
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/tosa:tosa",
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/tosa:tosa",
"fbsource//third-party/tosa_tools:serializer",
"fbsource//third-party/tosa_tools:tosa",
":process_node",
"//executorch/exir/backend:compile_spec_schema",
"//executorch/backends/arm/operators:lib",
Expand Down Expand Up @@ -83,8 +80,7 @@ runtime.python_library(
name = "process_node",
srcs = ["process_node.py"],
deps = [
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/tosa:tosa",
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/tosa:tosa",
"fbsource//third-party/tosa_tools:tosa",
"//executorch/backends/arm/operators:node_visitor",
"//executorch/backends/arm/tosa:mapping",
"//executorch/backends/arm/tosa:quant_utils",
Expand Down
8 changes: 5 additions & 3 deletions backends/arm/arm_vela.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,19 @@
# per-io structs to simplify runtime use.
def vela_bin_pack_io(prefix, data):
vela_input_shapes = data[prefix + "_shape"]
# Vela input/output shape is fixed to 6D
vela_io_shape_dims = 6

ios = struct.pack("<i", len(vela_input_shapes))
for i in range(len(vela_input_shapes)):
io_shape = vela_input_shapes[i]
io_elem_size = data[prefix + "_elem_size"][i]
io_offset = data[prefix + "_offset"][i]
io_region = data[prefix + "_region"][i]
assert len(io_shape) <= 4
inp_pad = io_shape.tolist() + [0] * (4 - len(io_shape))
assert len(io_shape) == vela_io_shape_dims
inp_pad = io_shape.tolist()
io_struct = struct.pack(
"<iiiiiii", *inp_pad, io_elem_size, io_offset, io_region
"<iiiiiiiii", *inp_pad, io_elem_size, io_offset, io_region
)
ios += io_struct
return ios
Expand Down
2 changes: 1 addition & 1 deletion backends/arm/debug/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ runtime.python_library(
"schema.py",
],
deps = [
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/serializer:serializer",
"fbsource//third-party/tosa_tools:serializer",
"//caffe2:torch",
],
)
13 changes: 6 additions & 7 deletions backends/arm/ethosu/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from typing import final, List

from executorch.backends.arm.arm_vela import vela_compile
from executorch.backends.arm.ethosu.compile_spec import EthosUCompileSpec

from executorch.backends.arm.tosa.backend import TOSABackend
from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
Expand All @@ -35,16 +36,13 @@ class EthosUBackend(BackendDetails):

@staticmethod
def _compile_tosa_flatbuffer(
tosa_flatbuffer: bytes, compile_spec: List[CompileSpec]
tosa_flatbuffer: bytes, compile_spec: EthosUCompileSpec
) -> bytes:
"""
Static helper method to do the compilation of the TOSA flatbuffer
representation to a target specific binary stream.
"""
compile_flags = []
for spec in compile_spec:
if spec.key == "compile_flags":
compile_flags.append(spec.value.decode())
compile_flags = compile_spec.compiler_flags

if len(compile_flags) == 0:
# Not testing for compile_flags correctness here, just that they are
Expand All @@ -64,10 +62,11 @@ def _compile_tosa_flatbuffer(
@staticmethod
def preprocess(
edge_program: ExportedProgram,
compile_spec: List[CompileSpec],
compile_specs: List[CompileSpec],
) -> PreprocessResult:
logger.info(f"{EthosUBackend.__name__} preprocess")

compile_spec = EthosUCompileSpec.from_list(compile_specs)
# deduce TOSA compile_spec from Ethos-U compile spec. We get a new
# compile spec list, containing only elements relevant for the
# TOSABackend.
Expand All @@ -77,7 +76,7 @@ def preprocess(
# ('All backend implementation are final...'), so use composition instead.
# preprocess returns the serialized TOSA flatbuffer in .processed_bytes,
# which can be passed on to next compilation step.
tosa_preprocess = TOSABackend.preprocess(edge_program, tosa_compile_spec)
tosa_preprocess = TOSABackend._preprocess(edge_program, tosa_compile_spec)

binary = EthosUBackend._compile_tosa_flatbuffer(
tosa_preprocess.processed_bytes, compile_spec
Expand Down
3 changes: 1 addition & 2 deletions backends/arm/operators/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ runtime.python_library(
name = "ops",
srcs = glob(["op_*.py", "ops_*.py"]),
deps = [
"fbsource//third-party/tosa_tools/v0.80/serialization_lib/python/tosa:tosa",
"fbsource//third-party/tosa_tools/v1.00/serialization_lib/python/tosa:tosa",
"fbsource//third-party/tosa_tools:tosa",
":node_visitor",
":operator_validation_utils",
"//executorch/backends/arm/tosa:mapping",
Expand Down
4 changes: 3 additions & 1 deletion backends/arm/operators/op_abs.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ def define_node(
abs_output = output

# Do the INT32 Abs
tosa_graph.addOperator(
self._serialize_operator(
node,
tosa_graph,
ts.TosaOp.Op().ABS,
[
rescaled_inputs[0].name,
Expand Down
15 changes: 11 additions & 4 deletions backends/arm/operators/op_rescale.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,20 @@ def define_node(
input_zp = cast(int, node.args[3])
output_zp = cast(int, node.args[4])

if input_dtype != map_dtype(torch.int8, self.tosa_spec) and input_zp != 0:
if (
input_dtype
not in [
map_dtype(torch.int8, self.tosa_spec),
map_dtype(torch.int16, self.tosa_spec),
]
and input_zp != 0
):
raise ValueError(
f"If input dtype is not int8, input_zp must be 0. Got input_dtype{input_dtype=}, {input_zp=}"
f"If input dtype is not int8 or int16, input_zp must be 0. Got input_dtype{input_dtype=}, {input_zp=}"
)
if output_dtype != torch.int8 and output_zp != 0:
if output_dtype not in [torch.int8, torch.int16] and output_zp != 0:
raise ValueError(
f"If output dtype is not int8, output_zp must be 0. Got {ts.DTypeNames[output_dtype]}, {output_zp=}"
f"If output dtype is not int8 or int16, output_zp must be 0. Got {ts.DTypeNames[output_dtype]}, {output_zp=}"
)

build_rescale(
Expand Down
8 changes: 6 additions & 2 deletions backends/arm/operators/op_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ def define_node(
dtype=ts.DType.INT32,
)

tosa_graph.addOperator(
self._serialize_operator(
node,
tosa_graph,
ts.TosaOp.Op().REDUCE_SUM,
[rescaled_inputs[0].name],
[intermediate.name],
Expand Down Expand Up @@ -111,7 +113,9 @@ def define_node(
attr = ts.TosaSerializerAttribute()
attr.ReduceSumAttribute(tensor.dim_order.index(dim))

tosa_graph.addOperator(
self._serialize_operator(
node,
tosa_graph,
ts.TosaOp.Op().REDUCE_SUM,
[tensor.name],
[output.name],
Expand Down
2 changes: 1 addition & 1 deletion backends/arm/requirements-arm-ethos-u.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

ethos-u-vela @ git+https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela@d37febc1715edf0d236c2ff555739a8a9aadcf9a
ethos-u-vela == 4.4.0
2 changes: 2 additions & 0 deletions backends/arm/requirements-arm-tosa.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@

ml_dtypes == 0.5.1
flatbuffers == 24.3.25
tosa-adapter-model-explorer == 0.0.1
ai-edge-model-explorer >= 0.1.16

tosa-tools @ git+https://git.gitlab.arm.com/tosa/[email protected]
4 changes: 2 additions & 2 deletions backends/arm/runtime/EthosUBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,8 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
*tensor_count = *tensor_count * tensor.size(i);
}

// The VelaIO type has a shape of fixed size 4
for (int i = 0; i < 4; i++) {
// The VelaIO type has a shape of fixed size 6
for (int i = 0; i < shapeDim; i++) {
*io_count = *io_count * io->shape[i];
}
}
Expand Down
4 changes: 3 additions & 1 deletion backends/arm/runtime/VelaBinStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ typedef struct {
char data[]; // block.name specific format data
} VelaBinBlock;

constexpr int shapeDim = 6; // Number of dimensions in VelaIO

// A Vela input or output descriptor in the binary stream
typedef struct {
int shape[4]; // Up to 4D shape of input or output
int shape[shapeDim]; // Shape of input or output
int elem_size; // Element sizeof in bytes
int offset; // Offset in bytes within SRAM working data
int region; // Scratch region this belongs to
Expand Down
4 changes: 2 additions & 2 deletions backends/arm/scripts/build_executor_runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ help() {
echo " --memory_mode=<CONFIG> Vela memory mode, used for setting the Timing Adapter parameters of the Corstone platforms."
echo " Valid values are Shared_Sram(for Ethos-U55, Ethos-U65, Ethos-85), Sram_Only(for Ethos-U55, Ethos-U65, Ethos-U85) or Dedicated_Sram(for Ethos-U65, Ethos-U85)."
echo " Default: Shared_Sram for the Ethos-U55 and Sram_Only for the Ethos-U85"
echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log"
echo " --etdump Adds Devtools etdump support to track timing and output, etdump area will be base64 encoded in the log"
echo " --extra_build_flags=<FLAGS> Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none "
echo " --output=<FOLDER> Output folder Default: <MODEL>/<MODEL>_<TARGET INFO>.pte"
echo " --et_build_root=<FOLDER> Build output root folder to use, defaults to ${et_build_root}"
Expand Down Expand Up @@ -161,7 +161,7 @@ if [ "$bundleio" = true ] ; then
fi

if [ "$build_with_etdump" = true ] ; then
build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=ON "
build_with_etdump_flags=" -DEXECUTORCH_ENABLE_EVENT_TRACER=ON -DET_DUMP_INTERMEDIATE_OUTPUTS=ON "
fi

echo "Building with BundleIO/etdump/extra flags: ${build_bundleio_flags} ${build_with_etdump_flags} ${extra_build_flags}"
Expand Down
2 changes: 1 addition & 1 deletion backends/arm/scripts/mlsdk_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
set -euo pipefail

mlsdk_manifest_url="https://github.com/arm/ai-ml-sdk-manifest.git"
mlsdk_manifest_tag="dev-snapshot-2025-09-12"
mlsdk_manifest_tag="refs/tags/dev-snapshot-2025-09-12"

script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)

Expand Down
Loading
Loading