pytorch
diff --git a/‎backends/arm/test/common.py‎
Lines changed: 30 additions & 1 deletion b/‎backends/arm/test/common.py‎
Lines changed: 30 additions & 1 deletion
diff --git a/‎backends/arm/test/misc/test_debug_feats.py‎
Lines changed: 37 additions & 0 deletions b/‎backends/arm/test/misc/test_debug_feats.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎backends/cadence/CMakeLists.txt‎
Lines changed: 0 additions & 49 deletions b/‎backends/cadence/CMakeLists.txt‎
Lines changed: 0 additions & 49 deletions
diff --git a/‎backends/cadence/build_cadence_xtensa.sh‎
Lines changed: 4 additions & 5 deletions b/‎backends/cadence/build_cadence_xtensa.sh‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎backends/cadence/cadence_runner/CMakeLists.txt‎
Lines changed: 72 additions & 0 deletions b/‎backends/cadence/cadence_runner/CMakeLists.txt‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎backends/cadence/build_cadence_runner.sh‎ renamed to ‎backends/cadence/cadence_runner/build_cadence_runner.sh‎
Lines changed: 5 additions & 4 deletions b/‎backends/cadence/build_cadence_runner.sh‎ renamed to ‎backends/cadence/cadence_runner/build_cadence_runner.sh‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎backends/cadence/hifi/kernels/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions b/‎backends/cadence/hifi/kernels/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/cadence/hifi/operators/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/hifi/operators/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/reference/kernels/CMakeLists.txt‎
Lines changed: 6 additions & 1 deletion b/‎backends/cadence/reference/kernels/CMakeLists.txt‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎backends/cadence/reference/kernels/kernels.cpp‎
Lines changed: 7 additions & 10 deletions b/‎backends/cadence/reference/kernels/kernels.cpp‎
Lines changed: 7 additions & 10 deletions
@@ -86,6 +86,29 @@ def is_option_enabled(option: str, fail_if_not_enabled: bool = False) -> bool:
             return False
 
 
+def maybe_get_tosa_collate_path() -> str | None:
+    """
+    Checks the environment variable TOSA_TESTCASES_BASE_PATH and returns the
+    path to the where to store the current tests if it is set.
+    """
+    tosa_test_base = os.environ.get("TOSA_TESTCASES_BASE_PATH")
+    if tosa_test_base:
+        current_test = os.environ.get("PYTEST_CURRENT_TEST")
+        #'backends/arm/test/ops/test_mean_dim.py::TestMeanDim::test_meandim_tosa_BI_0_zeros (call)'
+        test_class = current_test.split("::")[1]
+        test_name = current_test.split("::")[-1].split(" ")[0]
+        if "BI" in test_name:
+            tosa_test_base = os.path.join(tosa_test_base, "tosa-bi")
+        elif "MI" in test_name:
+            tosa_test_base = os.path.join(tosa_test_base, "tosa-mi")
+        else:
+            tosa_test_base = os.path.join(tosa_test_base, "other")
+
+        return os.path.join(tosa_test_base, test_class, test_name)
+
+    return None
+
+
 def get_tosa_compile_spec(
     permute_memory_to_nhwc=True, custom_path=None
 ) -> list[CompileSpec]:
@@ -101,7 +124,13 @@ def get_tosa_compile_spec_unbuilt(
     """Get the ArmCompileSpecBuilder for the default TOSA tests, to modify
     the compile spec before calling .build() to finalize it.
     """
-    intermediate_path = custom_path or tempfile.mkdtemp(prefix="arm_tosa_")
+    if not custom_path:
+        intermediate_path = maybe_get_tosa_collate_path() or tempfile.mkdtemp(
+            prefix="arm_tosa_"
+        )
+    else:
+        intermediate_path = custom_path
+
     if not os.path.exists(intermediate_path):
         os.makedirs(intermediate_path, exist_ok=True)
     compile_spec_builder = (
 
@@ -6,6 +6,7 @@
 
 import logging
 import os
+import shutil
 import tempfile
 import unittest
 
@@ -149,3 +150,39 @@ def test_dump_ops_and_dtypes(self):
             .dump_operator_distribution()
         )
         # Just test that there are no execeptions.
+
+
+class TestCollateTosaTests(unittest.TestCase):
+    """Tests the collation of TOSA tests through setting the environment variable TOSA_TESTCASE_BASE_PATH."""
+
+    def test_collate_tosa_BI_tests(self):
+        # Set the environment variable to trigger the collation of TOSA tests
+        os.environ["TOSA_TESTCASES_BASE_PATH"] = "test_collate_tosa_tests"
+        # Clear out the directory
+
+        model = Linear(20, 30)
+        (
+            ArmTester(
+                model,
+                example_inputs=model.get_inputs(),
+                compile_spec=common.get_tosa_compile_spec(),
+            )
+            .quantize()
+            .export()
+            .to_edge()
+            .partition()
+            .to_executorch()
+        )
+        # test that the output directory is created and contains the expected files
+        assert os.path.exists(
+            "test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests"
+        )
+        assert os.path.exists(
+            "test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/output_tag8.tosa"
+        )
+        assert os.path.exists(
+            "test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/desc_tag8.json"
+        )
+
+        os.environ.pop("TOSA_TESTCASES_BASE_PATH")
+        shutil.rmtree("test_collate_tosa_tests", ignore_errors=True)
@@ -20,7 +20,6 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
 
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
@@ -30,54 +29,6 @@ if(EXECUTORCH_NNLIB_OPT)
   set(TARGET_DIR hifi)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib)
 endif()
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-
-# Source root directory for executorch.
-if(NOT EXECUTORCH_ROOT)
-  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
-endif()
-
-if(NOT PYTHON_EXECUTABLE)
-  resolve_python_executable()
-endif()
-
-set(_common_compile_options -Wno-deprecated-declarations -fPIC)
-
-# Find prebuilt libraries. executorch package should contain portable_ops_lib,
-# etdump, bundled_program.
-find_package(executorch CONFIG REQUIRED)
-target_link_options_shared_lib(executorch)
-target_link_options_shared_lib(portable_ops_lib)
-
-target_include_directories(executorch INTERFACE ${_common_include_directories})
-
-find_package(
-  gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../third-party
-)
-
-add_executable(cadence_runner cadence_runner/cadence_runner.cpp)
-target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED)
 
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
-
-target_include_directories(
-  etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../sdk/include
-                   ${EXECUTORCH_ROOT}/third-party/flatcc/include
-)
-
-target_include_directories(
-  cadence_runner PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
-                        ${_common_include_directories}
-)
-
-target_link_libraries(
-  cadence_runner
-  executorch
-  gflags
-  etdump
-  extension_data_loader
-  bundled_program
-  cadence_ops_lib
-  flatccrt
-)
@@ -65,20 +65,19 @@ else
         -DEXECUTORCH_BUILD_HOST_TARGETS=ON \
         -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
         -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
-        -DEXECUTORCH_BUILD_CADENCE=OFF \
+        -DEXECUTORCH_BUILD_CPUINFO=OFF \
+        -DEXECUTORCH_BUILD_FLATC=OFF \
+        -DEXECUTORCH_BUILD_CADENCE=ON \
         -DFLATC_EXECUTABLE="$(which flatc)" \
+        -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
         -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DEXECUTORCH_ENABLE_PROGRAM_VERIFICATION=ON \
         -DEXECUTORCH_USE_DL=OFF \
         -DBUILD_EXECUTORCH_PORTABLE_OPS=ON \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF \
         -DPYTHON_EXECUTABLE=python3 \
         -DEXECUTORCH_NNLIB_OPT=ON \
-        -DEXECUTORCH_BUILD_GFLAGS=ON \
         -DHAVE_FNMATCH_H=OFF \
-        -DEXECUTORCH_ENABLE_EVENT_TRACER=OFF \
-        -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \
-        -DEXECUTORCH_BUILD_CPUINFO=OFF \
         -Bcmake-out
     cmake --build cmake-out --target install --config Release -j16
 fi
 
@@ -0,0 +1,72 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Set the minimum required version of CMake for this project.
+cmake_minimum_required(VERSION 3.10)
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+endif()
+
+# Set the project name.
+project(cadence_backend)
+
+# Source root directory for executorch.
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
+endif()
+
+include(${EXECUTORCH_ROOT}/build/Utils.cmake)
+include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+
+# Let files say "include <executorch/path/to/header.h>".
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+set(TARGET_DIR reference)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if(NOT PYTHON_EXECUTABLE)
+  resolve_python_executable()
+endif()
+
+# Find prebuilt libraries. executorch package should contain portable_ops_lib,
+# etdump, bundled_program.
+find_package(executorch CONFIG REQUIRED)
+target_link_options_shared_lib(executorch)
+target_link_options_shared_lib(portable_ops_lib)
+
+target_include_directories(executorch INTERFACE ${_common_include_directories})
+
+find_package(
+  gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party
+)
+
+add_executable(cadence_runner cadence_runner.cpp)
+target_compile_options(executorch INTERFACE -DET_EVENT_TRACER_ENABLED)
+
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
+
+target_include_directories(
+  etdump INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/../../../devtools/include
+                   ${EXECUTORCH_ROOT}/third-party/flatcc/include
+)
+
+target_include_directories(
+  cadence_runner PUBLIC ${ROOT_DIR}/../.. ${CMAKE_BINARY_DIR}
+                        ${_common_include_directories}
+)
+
+target_link_libraries(
+  cadence_runner
+  executorch
+  gflags
+  etdump
+  extension_data_loader
+  bundled_program
+  cadence_ops_lib
+  flatccrt
+)
@@ -12,7 +12,7 @@ set -euo pipefail
 SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 readonly SCRIPT_DIR
 
-readonly EXECUTORCH_ROOT="${SCRIPT_DIR}/../.."
+readonly EXECUTORCH_ROOT="${SCRIPT_DIR}/../../.."
 
 # Allow overriding the number of build jobs. Default to 9.
 export CMAKE_BUILD_PARALLEL_LEVEL="${CMAKE_BUILD_PARALLEL_LEVEL:-9}"
@@ -32,8 +32,9 @@ main() {
     -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \
     -DEXECUTORCH_BUILD_CPUINFO=OFF \
     -DEXECUTORCH_ENABLE_LOGGING=ON \
-    -Bcmake-out .
-  cmake --build cmake-out --target install --config Release
+    -DEXECUTORCH_NNLIB_OPT=OFF \
+    -Bcmake-out
+  cmake --build cmake-out --target install --config Release -j16
 
   local example_dir=backends/cadence
   local build_dir="cmake-out/${example_dir}"
@@ -43,7 +44,7 @@ main() {
     -DCMAKE_BUILD_TYPE=Release \
     -B"${build_dir}" \
     "${example_dir}"
-  cmake --build "${build_dir}" --config Release
+  cmake --build "${build_dir}" --config Release -j16
 
   local runner="${PWD}/${build_dir}/cadence_runner"
   if [[ ! -f "${runner}" ]]; then
 
@@ -10,6 +10,8 @@ add_library(
   kernels.cpp
   ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
 )
+# Let files say "include <executorch/path/to/header.h>".
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
 target_include_directories(
   cadence_kernels
@@ -19,6 +21,7 @@ target_include_directories(
     ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include/nnlib
     ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include
     ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/
+  ${_common_include_directories}
 )
 
 target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
@@ -28,6 +28,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/matmul_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/slice_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_add.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
 
@@ -7,4 +7,9 @@
 # lint_cmake: -linelength
 add_library(cadence_kernels kernels.cpp)
 
-target_include_directories(cadence_kernels PUBLIC .)
+# Let files say "include <executorch/path/to/header.h>".
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+
+target_include_directories(cadence_kernels PUBLIC .
+                    ${_common_include_directories}
+)
@@ -9,6 +9,7 @@
 #include <math.h>
 #include <algorithm>
 #include <cstring>
+#include <limits>
 #include <numeric>
 
 namespace impl {
@@ -17,8 +18,7 @@ namespace kernels {
 
 // Quantize a fp32 value to an int8_t/uint8_t value
 template <typename T>
-__attribute__((always_inline)) T
-quantize(const float x, float scale, int32_t zero_point) {
+T quantize(const float x, float scale, int32_t zero_point) {
   constexpr float min_val = std::numeric_limits<T>::min();
   constexpr float max_val = std::numeric_limits<T>::max();
   float tmp = roundf(x * scale + zero_point);
@@ -40,8 +40,7 @@ void quantize(
 
 // Dequantize an int8_t/uint8_t value to an fp32 value
 template <typename T>
-__attribute__((always_inline)) float
-dequantize(const T x, float scale, int32_t zero_point) {
+float dequantize(const T x, float scale, int32_t zero_point) {
   return scale * (x - zero_point);
 }
 
@@ -60,9 +59,8 @@ void dequantize(
 
 // explicit template instantiation
 
-#define typed_quantize_val(dtype)                         \
-  template __attribute__((always_inline)) dtype quantize( \
-      const float x, float inv_scale, int32_t zero_point);
+#define typed_quantize_val(dtype) \
+  template dtype quantize(const float x, float inv_scale, int32_t zero_point);
 typed_quantize_val(int8_t);
 typed_quantize_val(uint8_t);
 typed_quantize_val(int16_t);
@@ -82,9 +80,8 @@ typed_quantize_vec(int16_t);
 typed_quantize_vec(int32_t);
 #undef typed_quantize_vec
 
-#define typed_dequantize_val(dtype)                         \
-  template __attribute__((always_inline)) float dequantize( \
-      const dtype x, float scale, int32_t zero_point);
+#define typed_dequantize_val(dtype) \
+  template float dequantize(const dtype x, float scale, int32_t zero_point);
 typed_dequantize_val(int8_t);
 typed_dequantize_val(uint8_t);
 typed_dequantize_val(int16_t);
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,8 @@ add_library(`
`10`	`10`	`kernels.cpp`
`11`	`11`	`${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp`
`12`	`12`	`)`
	`13`	`+# Let files say "include <executorch/path/to/header.h>".`
	`14`	`+set(_common_include_directories ${EXECUTORCH_ROOT}/..)`
`13`	`15`
`14`	`16`	`target_include_directories(`
`15`	`17`	`cadence_kernels`
`@@ -19,6 +21,7 @@ target_include_directories(`
`19`	`21`	`${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include/nnlib`
`20`	`22`	`${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include`
`21`	`23`	`${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/`
	`24`	`+ ${_common_include_directories}`
`22`	`25`	`)`
`23`	`26`
`24`	`27`	`target_link_libraries(cadence_kernels PRIVATE xa_nnlib)`