pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-emscripten.sh‎
Lines changed: 8 additions & 0 deletions b/‎.ci/scripts/setup-emscripten.sh‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 35 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 84 additions & 9 deletions b/‎CMakeLists.txt‎
Lines changed: 84 additions & 9 deletions
diff --git a/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 7 additions & 0 deletions b/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎backends/arm/README.md‎
Lines changed: 19 additions & 4 deletions b/‎backends/arm/README.md‎
Lines changed: 19 additions & 4 deletions
@@ -1 +1 @@
-a3942627f5ac048e06b4b1d703b0a6a53bf6da5b
+eea657ddbdeb1118943a92fb73c289985c3ee1ba
@@ -7,6 +7,13 @@
 
 set -ex
 
+# need version >= 17
+install_node() {
+    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
+    source "$HOME/.nvm/nvm.sh"
+    nvm install 22
+}
+
 install_emscripten() {
     git clone https://github.com/emscripten-core/emsdk.git
     pushd emsdk || return
@@ -16,4 +23,5 @@ install_emscripten() {
     popd || return
 }
 
+install_node
 install_emscripten
@@ -355,8 +355,8 @@ jobs:
                 "--recipe" "xnnpack"
                 "--use_custom_sdpa"
                 "--use_custom_kv_cache"
-                "--qlinear"
-                "--qembedding"
+                "--qlinear" "8da4w"
+                "--qembedding" "8w"
                 "--output_dir" ".."
               )
 
 
@@ -360,8 +360,8 @@ jobs:
               "--recipe" "xnnpack"
               "--use_custom_sdpa"
               "--use_custom_kv_cache"
-              "--qlinear"
-              "--qembedding"
+              "--qlinear" "8da4w"
+              "--qembedding" "8w"
               "--output_dir" ".."
             )
 
 
@@ -764,6 +764,41 @@ jobs:
         # Test selective build
         PYTHON_EXECUTABLE=python bash examples/wasm/test_build_wasm.sh
 
+  unittest-wasm-bindings:
+    name: unittest-wasm-bindings
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        BUILD_TOOL="cmake"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+
+        # Install Node.js and Emscripten
+        source .ci/scripts/setup-emscripten.sh
+
+        # Test selective build
+        bash scripts/build_wasm_tests.sh
+
+        # Install Jest
+        cd cmake-out-wasm/extension/wasm/test
+        npm install --save-dev jest
+
+        # Run unit test
+        npm test
+
   unittest-nxp-neutron:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
 
@@ -711,8 +711,8 @@ jobs:
           "--recipe" "xnnpack"
           "--use_custom_sdpa"
           "--use_custom_kv_cache"
-          "--qlinear"
-          "--qembedding"
+          "--qlinear" "8da4w"
+          "--qembedding" "8w"
           "--output_dir" "${OUTPUT_DIR}"
         )
 
 
@@ -123,6 +123,8 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
 # Instead please use `find_package(executorch REQUIRED)` in the example
 # directory and add a new executable in the example `CMakeLists.txt`.
 
+set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
+
 if(NOT EXECUTORCH_ENABLE_LOGGING)
   # Avoid pulling in the logging strings, which can be large. Note that this
   # will set the compiler flag for all targets in this directory, and for all
@@ -383,6 +385,12 @@ add_library(executorch_no_prim_ops ALIAS executorch_core)
 # A list of all configured backends.
 set(_executorch_backends "")
 
+# A list of all configured extensions.
+set(_executorch_extensions "")
+
+# A list of all configured kernel libraries.
+set(_executorch_kernels "")
+
 target_link_libraries(executorch_core PRIVATE program_schema)
 if(ANDROID)
   target_link_libraries(executorch_core PUBLIC log)
@@ -579,6 +587,7 @@ endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_APPLE)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple)
+  list(APPEND _executorch_extensions apple_extension)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
@@ -589,6 +598,7 @@ if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
     FILES_MATCHING
     PATTERN "*.h"
   )
+  list(APPEND _executorch_extensions extension_data_loader)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL)
@@ -603,6 +613,7 @@ endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor)
+  list(APPEND _executorch_extensions extension_flat_tensor)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_MODULE)
@@ -613,6 +624,7 @@ if(EXECUTORCH_BUILD_EXTENSION_MODULE)
     FILES_MATCHING
     PATTERN "*.h"
   )
+  list(APPEND _executorch_extensions extension_module_static)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_LLM)
@@ -632,14 +644,17 @@ if(EXECUTORCH_BUILD_EXTENSION_LLM)
         ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG}
     )
   endif()
+  list(APPEND _executorch_extensions tokenizers)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
+  list(APPEND _executorch_extensions extension_llm_runner)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_LLM_APPLE)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/apple)
+  list(APPEND _executorch_extensions extension_llm_apple)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
@@ -650,10 +665,12 @@ if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
     FILES_MATCHING
     PATTERN "*.h"
   )
+  list(APPEND _executorch_extensions extension_runner_util)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_TENSOR)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/tensor)
+  list(APPEND _executorch_extensions extension_tensor)
 endif()
 
 if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
@@ -754,18 +771,25 @@ if(EXECUTORCH_BUILD_PYBIND)
   )
 endif()
 
+if(EXECUTORCH_BUILD_WASM)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/wasm)
+endif()
+
 if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/training)
+  list(APPEND _executorch_extensions extension_training)
 endif()
 
 if(EXECUTORCH_BUILD_KERNELS_LLM)
   # TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops)
+  list(APPEND _executorch_kernels custom_ops_aot_lib)
 endif()
 
 if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
   executorch_target_link_options_shared_lib(quantized_ops_lib)
+  list(APPEND _executorch_kernels quantized_ops_lib)
 endif()
 
 if(EXECUTORCH_BUILD_VULKAN)
@@ -778,25 +802,76 @@ if(EXECUTORCH_BUILD_VGF)
   list(APPEND _executorch_backends vgf_backend)
 endif()
 
-
 # Top-level interface targets.
-add_library(executorch_backends INTERFACE)
-add_library(executorch::backends ALIAS executorch_backends)
 
 # A target containing all configured backends.
+add_library(executorch_backends INTERFACE)
+add_library(executorch::backends ALIAS executorch_backends)
 target_link_libraries(executorch_backends INTERFACE ${_executorch_backends})
 
-install(
-  TARGETS executorch_backends
-  INCLUDES
-  DESTINATION ${_common_include_directories}
+# A target containing all configured extensions.
+add_library(executorch_extensions INTERFACE)
+add_library(executorch::extensions ALIAS executorch_extensions)
+target_link_libraries(executorch_extensions INTERFACE ${_executorch_extensions})
+
+# A target containing all configured kernels, with selective build, if enabled.
+add_library(executorch_kernels INTERFACE)
+add_library(executorch::kernels ALIAS executorch_kernels)
+if(NOT EXECUTORCH_SELECT_OPS_YAML STREQUAL ""
+   OR NOT EXECUTORCH_SELECT_OPS_LIST STREQUAL ""
+   OR NOT EXECUTORCH_SELECT_OPS_MODEL STREQUAL ""
 )
+  gen_selected_ops(
+    LIB_NAME
+    "executorch_selected_kernels"
+    OPS_SCHEMA_YAML
+    "${EXECUTORCH_SELECT_OPS_LIB}"
+    ROOT_OPS
+    "${EXECUTORCH_SELECT_OPS_LIST}"
+    INCLUDE_ALL_OPS
+    FALSE
+    OPS_FROM_MODEL
+    "${EXECUTORCH_SELECT_OPS_MODEL}"
+    DTYPE_SELECTIVE_BUILD
+    "${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
+  )
+
+  generate_bindings_for_kernels(
+    LIB_NAME
+    "executorch_selected_kernels"
+    FUNCTIONS_YAML
+    ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml
+    CUSTOM_OPS_YAML
+    ""
+    DTYPE_SELECTIVE_BUILD
+    "${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
+  )
+
+  gen_operators_lib(
+    LIB_NAME
+    "executorch_selected_kernels"
+    KERNEL_LIBS
+    "portable_kernels"
+    DEPS
+    executorch_core
+    DTYPE_SELECTIVE_BUILD
+    "${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
+  )
+  list(APPEND _executorch_kernels executorch_selected_kernels)
+else()
+  # No selective build - link the full library.
+  if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
+    list(APPEND _executorch_kernels optimized_native_cpu_ops_lib)
+  else()
+    list(APPEND _executorch_kernels portable_ops_lib)
+  endif()
+endif()
+target_link_libraries(executorch_kernels INTERFACE ${_executorch_kernels})
 
 if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
   # Baseline libraries that executor_runner will link against.
   set(_executor_runner_libs executorch extension_evalue_util
-                            extension_runner_util gflags
-                            executorch_backends
+                            extension_runner_util gflags executorch_backends
   )
 
   if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
 
@@ -15,6 +15,7 @@
     _get_inputs,
     NUM_TO_NUMPY_DTYPE,
     NUM_TO_TORCH_DTYPE,
+    split,
     transpose,
     unbind,
 )
@@ -37,6 +38,12 @@ def unbind_copy(context, node):
     unbind(context, node)
 
 
+# https://github.com/apple/coremltools/pull/2563
+@register_torch_op(override=False)
+def split_copy(context, node):
+    split(context, node)
+
+
 # https://github.com/apple/coremltools/pull/2558
 @register_torch_op(
     torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
 
@@ -104,6 +104,14 @@ The you can run the tests with
 pytest -c /dev/null -v -n auto backends/arm/test
 ```
 
+### Model test dependencies
+Some model tests in Arm backend require third-party libraries or packages. To run these tests, you need to install the required dependencies by running the script `examples/arm/setup.sh` with the flag `--setup-test-dependency`.
+
+Please note that installing model test dependencies is a standalone process. When using the `--setup-test-dependency` flag, the script will install only the necessary dependencies for model tests, skipping all other setup procedures.
+
+List of models with specific dependencies:
+- Stable Diffusion: [diffusers](https://github.com/huggingface/diffusers/tree/main)
+
 ## Passes
 
 With the default passes in the Arm Ethos-U backend, assuming the model lowers fully to the
@@ -189,7 +197,14 @@ Configuration of the EthosUBackend export flow is controlled by CompileSpec info
 As this is in active development see the EthosUBackend for accurate information on [compilation flags](https://github.com/pytorch/executorch/blob/29f6dc9353e90951ed3fae3c57ae416de0520067/backends/arm/arm_backend.py#L319-L324)
 
 ## Model specific and optional passes
-The current TOSA version does not support int64. For LLMs for example LLama, often aten.emedding is the first operator and it requires int64 indicies.
-In order to lower this to TOSA and int64->int32 cast need to be injected. This pass need to run very early in the lowering process and can be passed in to the to_edge_transform_and_lower() function call as an optional parameter. See example in: backends/arm/test/models/test_llama.py.
-By doing this aten.embedding will be decomposed into to aten.index_select which can handle int32 indices.
-Note that this additional step is only needed for pure float models. With quantization this is automatically handled during annotation before the export stage.
+The current TOSA version does not support int64. However, int64 is commonly used in many models. In order to lower the operators with int64 inputs and/or outputs to TOSA, a few passes have been developed to handle the int64-related issues. The main idea behind these passes is to replace the uses of int64 with int32 where feasible.
+- For floating-point models, these passes need to run very early in the lowering process and can be passed in to the to_edge_transform_and_lower() function call as an optional parameter.
+- For quantized models, these transformations will be automatically handled during annotation before the export stage.
+
+List of model specific and optional passes:
+- InsertCastForOpsWithInt64InputPass
+    - Functionality:
+        - For LLMs such as LLama, some opeartors like aten.embedding have int64 input. In order to lower these operators to TOSA, this pass will insert a casting node that converts the input from int64 to int32.
+        - Example usage: backends/arm/test/models/test_llama.py
+    - Supported Ops:
+        - aten.embedding.default, aten.slice_copy.Tensor
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-a3942627f5ac048e06b4b1d703b0a6a53bf6da5b`
	`1`	`+eea657ddbdeb1118943a92fb73c289985c3ee1ba`