Update

GregoryComer · GregoryComer · commit fdd8e12dfa50 · 2025-09-02T14:08:49.000-07:00
[ghstack-poisoned]
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -154,6 +154,7 @@ cmake_install_executorch_libraries() {
     echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
     rm -rf cmake-out
     retry cmake --preset llm \
+        -DEXECUTORCH_BUILD_TESTS=ON \
         -DBUILD_TESTING=OFF \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
@@ -170,6 +171,7 @@ cmake_build_llama_runner() {
     popd
     dir="examples/models/llama"
     retry cmake \
+        -DEXECUTORCH_BUILD_TESTS=ON \
         -DBUILD_TESTING=OFF \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
diff --git a/.ci/scripts/unittest-windows.ps1 b/.ci/scripts/unittest-windows.ps1
@@ -29,7 +29,7 @@ if ($LASTEXITCODE -ne 0) {
 
 # Run pytest with coverage
 # pytest -n auto --cov=./ --cov-report=xml
-pytest --continue-on-collection-errors -v --full-trace -c pytest-windows.ini -n auto
+pytest -v --full-trace -c pytest-windows.ini -n auto
 if ($LASTEXITCODE -ne 0) {
     Write-Host "Pytest invocation was unsuccessful. Exit code: $LASTEXITCODE."
     exit $LASTEXITCODE
diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp
@@ -127,60 +127,6 @@ void dequantize(
   }
 }
 
-// Requantize the int8_t/uint8_t in value to a uint8_t/int8_t out value.
-// The scale and zero_point for requantization are in the args.
-template <typename IT, typename OT>
-__attribute__((always_inline)) OT requantize(
-    const IT in,
-    float in_scale,
-    int32_t in_zero_point,
-    float inv_out_scale,
-    int32_t out_zero_point) {
-  float dequant = dequantize<IT>(in, in_scale, in_zero_point);
-  return quantize<OT>(dequant, inv_out_scale, out_zero_point);
-}
-
-// Requantize the int8_t/uint8_t in array to a uint8_t/int8_t out array.
-// The scale and zero_point for requantization are in the args.
-template <typename IT, typename OT>
-void requantize(
-    OT* __restrict__ out,
-    const IT* __restrict__ in,
-    float in_scale,
-    int32_t in_zero_point,
-    float inv_out_scale,
-    int32_t out_zero_point,
-    size_t size) {
-  xtfloatx2 in_scale_vec = (xtfloatx2)in_scale;
-  xtfloatx2 in_zero_vec = XT_FLOAT_SX2(in_zero_point, 0);
-  xtfloatx2 inv_out_scale_vec = (xtfloatx2)inv_out_scale;
-  xtfloatx2 out_zero_vec = XT_FLOAT_SX2(out_zero_point, 0);
-
-  float min_val = std::numeric_limits<OT>::min();
-  float max_val = std::numeric_limits<OT>::max();
-
-  size_t i = 0;
-  // Vectorize by 2
-  for (; i < (size & ~1); i += 2) {
-    xtfloatx2 in_vec = {(float)in[i], (float)in[i + 1]};
-    xtfloatx2 t0 = XT_SUB_SX2(in_vec, in_zero_vec);
-    xtfloatx2 t1 = XT_MUL_SX2(t0, in_scale_vec);
-
-    xtfloatx2 acc = out_zero_vec;
-    XT_MADD_SX2(acc, inv_out_scale_vec, t1);
-    xtfloatx2 t2 = XT_FIROUND_SX2(acc);
-    ae_int32x2 t3 =
-        XT_UTRUNC_SX2(XT_MAX_SX2(XT_MIN_SX2(t2, max_val), min_val), 0);
-    out[i] = AE_MOVAD32_H(t3);
-    out[i + 1] = AE_MOVAD32_L(t3);
-  }
-  // Handle residual iteration
-  if (i < size) {
-    out[i] = requantize<IT, OT>(
-        in[i], in_scale, in_zero_point, inv_out_scale, out_zero_point);
-  }
-}
-
 // explicit template instantiation
 
 #define typed_quantize_val(dtype)                         \
@@ -229,34 +175,6 @@ typed_dequantize_vec(uint16_t);
 typed_dequantize_vec(int32_t);
 #undef typed_dequantize_vec
 
-#define typed_requantize_val(itype, otype)                  \
-  template __attribute__((always_inline)) otype requantize( \
-      const itype in,                                       \
-      float in_scale,                                       \
-      int32_t in_zero_point,                                \
-      float inv_out_scale,                                  \
-      int32_t out_zero_point);
-typed_requantize_val(int8_t, int8_t);
-typed_requantize_val(uint8_t, uint8_t);
-typed_requantize_val(int8_t, uint8_t);
-typed_requantize_val(uint8_t, int8_t);
-#undef typed_requantize_val
-
-#define typed_requantize_vec(itype, otype) \
-  template void requantize(                \
-      otype* __restrict__ out,             \
-      const itype* __restrict__ in,        \
-      float in_scale,                      \
-      int32_t in_zero_point,               \
-      float inv_out_scale,                 \
-      int32_t out_zero_point,              \
-      size_t size);
-typed_requantize_vec(int8_t, int8_t);
-typed_requantize_vec(uint8_t, uint8_t);
-typed_requantize_vec(int8_t, uint8_t);
-typed_requantize_vec(uint8_t, int8_t);
-#undef typed_requantize_vec
-
 }; // namespace kernels
 }; // namespace HiFi
 }; // namespace impl
diff --git a/examples/models/llama/CMakeLists.txt b/examples/models/llama/CMakeLists.txt
@@ -220,7 +220,8 @@ target_include_directories(llama_main PUBLIC ${_common_include_directories})
 target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
 target_compile_options(llama_main PUBLIC ${_common_compile_options})
 if(APPLE)
-  target_link_options(llama_main PRIVATE -Wl,-rpath,@executable_path)
+  target_link_options(llama_main PRIVATE -Wl,-rpath,@loader_path)
 elseif(UNIX)
   set_target_properties(llama_main PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'")
 endif()
+# Windows doesn't need rpath - DLLs are found via standard Windows search order
diff --git a/runtime/executor/test/method_test.cpp b/runtime/executor/test/method_test.cpp
@@ -8,6 +8,7 @@
 
 #include <cstdlib>
 #include <filesystem>
+#include <unordered_map>
 
 #include <executorch/extension/data_loader/file_data_loader.h>
 #include <executorch/extension/flat_tensor/flat_tensor_data_map.h>