Skip to content

Commit 7c52981

Browse files
committed
Update
[ghstack-poisoned]
2 parents 52302df + a21281b commit 7c52981

File tree

65 files changed

+2873
-418
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+2873
-418
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
a3942627f5ac048e06b4b1d703b0a6a53bf6da5b
1+
eea657ddbdeb1118943a92fb73c289985c3ee1ba

.ci/scripts/setup-emscripten.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@
77

88
set -ex
99

10+
# need version >= 17
11+
install_node() {
12+
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
13+
source "$HOME/.nvm/nvm.sh"
14+
nvm install 22
15+
}
16+
1017
install_emscripten() {
1118
git clone https://github.com/emscripten-core/emsdk.git
1219
pushd emsdk || return
@@ -16,4 +23,5 @@ install_emscripten() {
1623
popd || return
1724
}
1825

26+
install_node
1927
install_emscripten

.github/workflows/android-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,8 @@ jobs:
355355
"--recipe" "xnnpack"
356356
"--use_custom_sdpa"
357357
"--use_custom_kv_cache"
358-
"--qlinear"
359-
"--qembedding"
358+
"--qlinear" "8da4w"
359+
"--qembedding" "8w"
360360
"--output_dir" ".."
361361
)
362362

.github/workflows/apple-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,8 +360,8 @@ jobs:
360360
"--recipe" "xnnpack"
361361
"--use_custom_sdpa"
362362
"--use_custom_kv_cache"
363-
"--qlinear"
364-
"--qembedding"
363+
"--qlinear" "8da4w"
364+
"--qembedding" "8w"
365365
"--output_dir" ".."
366366
)
367367

.github/workflows/pull.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,41 @@ jobs:
764764
# Test selective build
765765
PYTHON_EXECUTABLE=python bash examples/wasm/test_build_wasm.sh
766766
767+
unittest-wasm-bindings:
768+
name: unittest-wasm-bindings
769+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
770+
permissions:
771+
id-token: write
772+
contents: read
773+
strategy:
774+
fail-fast: false
775+
with:
776+
runner: linux.2xlarge
777+
docker-image: ci-image:executorch-ubuntu-22.04-clang12
778+
submodules: 'recursive'
779+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
780+
timeout: 90
781+
script: |
782+
# The generic Linux job chooses to use base env, not the one setup by the image
783+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
784+
conda activate "${CONDA_ENV}"
785+
786+
BUILD_TOOL="cmake"
787+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
788+
789+
# Install Node.js and Emscripten
790+
source .ci/scripts/setup-emscripten.sh
791+
792+
# Test selective build
793+
bash scripts/build_wasm_tests.sh
794+
795+
# Install Jest
796+
cd cmake-out-wasm/extension/wasm/test
797+
npm install --save-dev jest
798+
799+
# Run unit test
800+
npm test
801+
767802
unittest-nxp-neutron:
768803
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
769804
permissions:

.github/workflows/trunk.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -711,8 +711,8 @@ jobs:
711711
"--recipe" "xnnpack"
712712
"--use_custom_sdpa"
713713
"--use_custom_kv_cache"
714-
"--qlinear"
715-
"--qembedding"
714+
"--qlinear" "8da4w"
715+
"--qembedding" "8w"
716716
"--output_dir" "${OUTPUT_DIR}"
717717
)
718718

CMakeLists.txt

Lines changed: 84 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
123123
# Instead please use `find_package(executorch REQUIRED)` in the example
124124
# directory and add a new executable in the example `CMakeLists.txt`.
125125

126+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
127+
126128
if(NOT EXECUTORCH_ENABLE_LOGGING)
127129
# Avoid pulling in the logging strings, which can be large. Note that this
128130
# will set the compiler flag for all targets in this directory, and for all
@@ -383,6 +385,12 @@ add_library(executorch_no_prim_ops ALIAS executorch_core)
383385
# A list of all configured backends.
384386
set(_executorch_backends "")
385387

388+
# A list of all configured extensions.
389+
set(_executorch_extensions "")
390+
391+
# A list of all configured kernel libraries.
392+
set(_executorch_kernels "")
393+
386394
target_link_libraries(executorch_core PRIVATE program_schema)
387395
if(ANDROID)
388396
target_link_libraries(executorch_core PUBLIC log)
@@ -579,6 +587,7 @@ endif()
579587

580588
if(EXECUTORCH_BUILD_EXTENSION_APPLE)
581589
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple)
590+
list(APPEND _executorch_extensions apple_extension)
582591
endif()
583592

584593
if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
@@ -589,6 +598,7 @@ if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
589598
FILES_MATCHING
590599
PATTERN "*.h"
591600
)
601+
list(APPEND _executorch_extensions extension_data_loader)
592602
endif()
593603

594604
if(EXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL)
@@ -603,6 +613,7 @@ endif()
603613

604614
if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
605615
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor)
616+
list(APPEND _executorch_extensions extension_flat_tensor)
606617
endif()
607618

608619
if(EXECUTORCH_BUILD_EXTENSION_MODULE)
@@ -613,6 +624,7 @@ if(EXECUTORCH_BUILD_EXTENSION_MODULE)
613624
FILES_MATCHING
614625
PATTERN "*.h"
615626
)
627+
list(APPEND _executorch_extensions extension_module_static)
616628
endif()
617629

618630
if(EXECUTORCH_BUILD_EXTENSION_LLM)
@@ -632,14 +644,17 @@ if(EXECUTORCH_BUILD_EXTENSION_LLM)
632644
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG}
633645
)
634646
endif()
647+
list(APPEND _executorch_extensions tokenizers)
635648
endif()
636649

637650
if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
638651
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
652+
list(APPEND _executorch_extensions extension_llm_runner)
639653
endif()
640654

641655
if(EXECUTORCH_BUILD_EXTENSION_LLM_APPLE)
642656
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/apple)
657+
list(APPEND _executorch_extensions extension_llm_apple)
643658
endif()
644659

645660
if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
@@ -650,10 +665,12 @@ if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
650665
FILES_MATCHING
651666
PATTERN "*.h"
652667
)
668+
list(APPEND _executorch_extensions extension_runner_util)
653669
endif()
654670

655671
if(EXECUTORCH_BUILD_EXTENSION_TENSOR)
656672
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/tensor)
673+
list(APPEND _executorch_extensions extension_tensor)
657674
endif()
658675

659676
if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
@@ -754,18 +771,25 @@ if(EXECUTORCH_BUILD_PYBIND)
754771
)
755772
endif()
756773

774+
if(EXECUTORCH_BUILD_WASM)
775+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/wasm)
776+
endif()
777+
757778
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
758779
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/training)
780+
list(APPEND _executorch_extensions extension_training)
759781
endif()
760782

761783
if(EXECUTORCH_BUILD_KERNELS_LLM)
762784
# TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom
763785
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops)
786+
list(APPEND _executorch_kernels custom_ops_aot_lib)
764787
endif()
765788

766789
if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
767790
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
768791
executorch_target_link_options_shared_lib(quantized_ops_lib)
792+
list(APPEND _executorch_kernels quantized_ops_lib)
769793
endif()
770794

771795
if(EXECUTORCH_BUILD_VULKAN)
@@ -778,25 +802,76 @@ if(EXECUTORCH_BUILD_VGF)
778802
list(APPEND _executorch_backends vgf_backend)
779803
endif()
780804

781-
782805
# Top-level interface targets.
783-
add_library(executorch_backends INTERFACE)
784-
add_library(executorch::backends ALIAS executorch_backends)
785806

786807
# A target containing all configured backends.
808+
add_library(executorch_backends INTERFACE)
809+
add_library(executorch::backends ALIAS executorch_backends)
787810
target_link_libraries(executorch_backends INTERFACE ${_executorch_backends})
788811

789-
install(
790-
TARGETS executorch_backends
791-
INCLUDES
792-
DESTINATION ${_common_include_directories}
812+
# A target containing all configured extensions.
813+
add_library(executorch_extensions INTERFACE)
814+
add_library(executorch::extensions ALIAS executorch_extensions)
815+
target_link_libraries(executorch_extensions INTERFACE ${_executorch_extensions})
816+
817+
# A target containing all configured kernels, with selective build, if enabled.
818+
add_library(executorch_kernels INTERFACE)
819+
add_library(executorch::kernels ALIAS executorch_kernels)
820+
if(NOT EXECUTORCH_SELECT_OPS_YAML STREQUAL ""
821+
OR NOT EXECUTORCH_SELECT_OPS_LIST STREQUAL ""
822+
OR NOT EXECUTORCH_SELECT_OPS_MODEL STREQUAL ""
793823
)
824+
gen_selected_ops(
825+
LIB_NAME
826+
"executorch_selected_kernels"
827+
OPS_SCHEMA_YAML
828+
"${EXECUTORCH_SELECT_OPS_LIB}"
829+
ROOT_OPS
830+
"${EXECUTORCH_SELECT_OPS_LIST}"
831+
INCLUDE_ALL_OPS
832+
FALSE
833+
OPS_FROM_MODEL
834+
"${EXECUTORCH_SELECT_OPS_MODEL}"
835+
DTYPE_SELECTIVE_BUILD
836+
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
837+
)
838+
839+
generate_bindings_for_kernels(
840+
LIB_NAME
841+
"executorch_selected_kernels"
842+
FUNCTIONS_YAML
843+
${EXECUTORCH_ROOT}/kernels/portable/functions.yaml
844+
CUSTOM_OPS_YAML
845+
""
846+
DTYPE_SELECTIVE_BUILD
847+
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
848+
)
849+
850+
gen_operators_lib(
851+
LIB_NAME
852+
"executorch_selected_kernels"
853+
KERNEL_LIBS
854+
"portable_kernels"
855+
DEPS
856+
executorch_core
857+
DTYPE_SELECTIVE_BUILD
858+
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
859+
)
860+
list(APPEND _executorch_kernels executorch_selected_kernels)
861+
else()
862+
# No selective build - link the full library.
863+
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
864+
list(APPEND _executorch_kernels optimized_native_cpu_ops_lib)
865+
else()
866+
list(APPEND _executorch_kernels portable_ops_lib)
867+
endif()
868+
endif()
869+
target_link_libraries(executorch_kernels INTERFACE ${_executorch_kernels})
794870

795871
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
796872
# Baseline libraries that executor_runner will link against.
797873
set(_executor_runner_libs executorch extension_evalue_util
798-
extension_runner_util gflags
799-
executorch_backends
874+
extension_runner_util gflags executorch_backends
800875
)
801876

802877
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
_get_inputs,
1616
NUM_TO_NUMPY_DTYPE,
1717
NUM_TO_TORCH_DTYPE,
18+
split,
1819
transpose,
1920
unbind,
2021
)
@@ -37,6 +38,12 @@ def unbind_copy(context, node):
3738
unbind(context, node)
3839

3940

41+
# https://github.com/apple/coremltools/pull/2563
42+
@register_torch_op(override=False)
43+
def split_copy(context, node):
44+
split(context, node)
45+
46+
4047
# https://github.com/apple/coremltools/pull/2558
4148
@register_torch_op(
4249
torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],

backends/arm/README.md

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,14 @@ The you can run the tests with
104104
pytest -c /dev/null -v -n auto backends/arm/test
105105
```
106106

107+
### Model test dependencies
108+
Some model tests in Arm backend require third-party libraries or packages. To run these tests, you need to install the required dependencies by running the script `examples/arm/setup.sh` with the flag `--setup-test-dependency`.
109+
110+
Please note that installing model test dependencies is a standalone process. When using the `--setup-test-dependency` flag, the script will install only the necessary dependencies for model tests, skipping all other setup procedures.
111+
112+
List of models with specific dependencies:
113+
- Stable Diffusion: [diffusers](https://github.com/huggingface/diffusers/tree/main)
114+
107115
## Passes
108116

109117
With the default passes in the Arm Ethos-U backend, assuming the model lowers fully to the
@@ -189,7 +197,14 @@ Configuration of the EthosUBackend export flow is controlled by CompileSpec info
189197
As this is in active development see the EthosUBackend for accurate information on [compilation flags](https://github.com/pytorch/executorch/blob/29f6dc9353e90951ed3fae3c57ae416de0520067/backends/arm/arm_backend.py#L319-L324)
190198

191199
## Model specific and optional passes
192-
The current TOSA version does not support int64. For LLMs for example LLama, often aten.emedding is the first operator and it requires int64 indicies.
193-
In order to lower this to TOSA and int64->int32 cast need to be injected. This pass need to run very early in the lowering process and can be passed in to the to_edge_transform_and_lower() function call as an optional parameter. See example in: backends/arm/test/models/test_llama.py.
194-
By doing this aten.embedding will be decomposed into to aten.index_select which can handle int32 indices.
195-
Note that this additional step is only needed for pure float models. With quantization this is automatically handled during annotation before the export stage.
200+
The current TOSA version does not support int64. However, int64 is commonly used in many models. In order to lower the operators with int64 inputs and/or outputs to TOSA, a few passes have been developed to handle the int64-related issues. The main idea behind these passes is to replace the uses of int64 with int32 where feasible.
201+
- For floating-point models, these passes need to run very early in the lowering process and can be passed in to the to_edge_transform_and_lower() function call as an optional parameter.
202+
- For quantized models, these transformations will be automatically handled during annotation before the export stage.
203+
204+
List of model specific and optional passes:
205+
- InsertCastForOpsWithInt64InputPass
206+
- Functionality:
207+
- For LLMs such as LLama, some opeartors like aten.embedding have int64 input. In order to lower these operators to TOSA, this pass will insert a casting node that converts the input from int64 to int32.
208+
- Example usage: backends/arm/test/models/test_llama.py
209+
- Supported Ops:
210+
- aten.embedding.default, aten.slice_copy.Tensor

0 commit comments

Comments
 (0)