pytorch · JacobSzwejbka · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025
@@ -21,7 +21,10 @@ if(NOT EXECUTORCH_ROOT)
   set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
 endif()
 
-set(_common_compile_options -Wno-deprecated-declarations)
+set(_common_compile_options
+    $<$<CXX_COMPILER_ID:MSVC>:/wd4996>
+    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations -fPIC>
+)
 
 # Note for apple platform we can rely on Accelerate framework Will come back to
 # this

@@ -150,15 +150,14 @@ Tensor& opt_bmm_out(
   ET_KERNEL_CHECK(
       ctx, check_bmm_out_args(self, mat2, out), InvalidArgument, out);
 
-  constexpr auto name = "bmm.out";
   auto self_type = self.scalar_type();
 
   if (executorch::runtime::isComplexType(self_type)) {
-    ET_SWITCH_COMPLEXH_TYPES(self_type, ctx, name, CTYPE, [&]() {
+    ET_SWITCH_COMPLEXH_TYPES(self_type, ctx, "bmm.out", CTYPE, [&]() {
       bmm_kernel<CTYPE>(self, mat2, out);
     });
   } else {
-    ET_SWITCH_REALHBF16_TYPES(self_type, ctx, name, CTYPE, [&]() {
+    ET_SWITCH_REALHBF16_TYPES(self_type, ctx, "bmm.out", CTYPE, [&]() {
       bmm_kernel<CTYPE>(self, mat2, out);
     });
   }

diff --git a/kernels/portable/CMakeLists.txt b/kernels/portable/CMakeLists.txt
@@ -23,7 +23,7 @@ endif()
 
 set(_common_compile_options
     $<$<CXX_COMPILER_ID:MSVC>:/wd4996>
-    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations>
+    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations -fPIC>
 )
 
 include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

@@ -41,13 +41,11 @@ Tensor& masked_scatter_out(
       InvalidArgument,
       out);
 
-  constexpr auto op_name = "masked_scatter.out";
-
   int64_t idx = 0;
   int64_t src_numel = src.numel();
   bool src_numel_check = true;
 
-  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, op_name, CTYPE, [&]() {
+  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, "masked_scatter.out", CTYPE, [&]() {
     const CTYPE* const src_data = src.const_data_ptr<CTYPE>();
     apply_binary_elementwise_fn<CTYPE, bool, CTYPE>(
         [src_data, &idx, &src_numel, &src_numel_check](

@@ -8,6 +8,7 @@
 
 #include <c10/util/irange.h>
 #include <cmath>
+#include <functional>
 #include <tuple>
 
 #include <executorch/kernels/portable/cpu/util/math_util.h>
@@ -118,10 +119,12 @@ void perform_topk(
       }
 
       // Perform topk on the queue
-      const auto elem_greater = [](const elem_t& x, const elem_t& y) -> bool {
+      const std::function<bool(const elem_t&, const elem_t&)> elem_greater =
+          [](const elem_t& x, const elem_t& y) -> bool {
         return float_less_than(y.first, x.first);
       };
-      const auto elem_less = [](const elem_t& x, const elem_t& y) -> bool {
+      const std::function<bool(const elem_t&, const elem_t&)> elem_less =
+          [](const elem_t& x, const elem_t& y) -> bool {
         return float_less_than(x.first, y.first);
       };
       const auto cmp = largest ? elem_greater : elem_less;

@@ -64,13 +64,13 @@ Tensor& view_as_real_copy_out(
   ET_KERNEL_CHECK(
       ctx, tensors_have_same_dim_order(self, out), InvalidArgument, out);
 
-  constexpr auto op_name = "view_as_real_copy.out";
-
-  ET_SWITCH_COMPLEXH_TYPES(self.scalar_type(), ctx, op_name, CTYPE_IN, [&] {
-    ET_SWITCH_FLOATH_TYPES(out.scalar_type(), ctx, op_name, CTYPE_OUT, [&] {
-      _to_impl<CTYPE_IN, CTYPE_OUT>(self, out);
-    });
-  });
+  ET_SWITCH_COMPLEX_TYPES(
+      self.scalar_type(), ctx, "view_as_real_copy.out", CTYPE_IN, [&] {
+        ET_SWITCH_FLOATH_TYPES(
+            out.scalar_type(), ctx, "view_as_real_copy.out", CTYPE_OUT, [&] {
+              _to_impl<CTYPE_IN, CTYPE_OUT>(self, out);
+            });
+      });
 
   return out;
 }

@@ -21,7 +21,10 @@ endif()
 
 list(TRANSFORM _kernels_util_all_deps__srcs PREPEND "${EXECUTORCH_ROOT}/")
 
-set(_common_compile_options -Wno-deprecated-declarations)
+set(_common_compile_options
+    $<$<CXX_COMPILER_ID:MSVC>:/wd4996>
+    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations -fPIC>
+)
 
 add_library(kernels_util_all_deps ${_kernels_util_all_deps__srcs})
 target_link_libraries(kernels_util_all_deps PRIVATE executorch_core)

@@ -85,7 +85,6 @@ inline void dtype_specialized_elementwise_fn_impl(
   static_assert(
       (std::is_same_v<Args, std::pair<const Tensor*, SupportedTensorDtypes>> &&
        ...));
-  constexpr auto kNumInputs = sizeof...(inputs);
   // All inputs must be of type CTYPE_COMPUTE.
   ET_DCHECK(
       ((inputs.first->scalar_type() ==
@@ -105,8 +104,9 @@ inline void dtype_specialized_elementwise_fn_impl(
           out.numel(),
           ::executorch::extension::internal::GRAIN_SIZE,
           [&](const auto begin, const auto end) {
-            std::array<const CTYPE_COMPUTE*, kNumInputs> inputs_data_ptrs = {
-                inputs.first->template const_data_ptr<CTYPE_COMPUTE>()...};
+            std::array<const CTYPE_COMPUTE*, sizeof...(inputs)>
+                inputs_data_ptrs = {
+                    inputs.first->template const_data_ptr<CTYPE_COMPUTE>()...};
 
             CTYPE_OUT* const data_out = out.mutable_data_ptr<CTYPE_OUT>();
 
@@ -119,11 +119,11 @@ inline void dtype_specialized_elementwise_fn_impl(
           // small-sized tests will test whether using Vectorized broke our
           // lambda.
 #ifndef NDEBUG
-              std::array<Vec, kNumInputs> loaded_inputs{};
+              std::array<Vec, sizeof...(inputs)> loaded_inputs{};
 #else // NDEBUG
-              std::array<CTYPE_COMPUTE, kNumInputs> loaded_inputs{};
+              std::array<CTYPE_COMPUTE, sizeof...(inputs)> loaded_inputs{};
 #endif // NDEBUG
-              for (const auto input_idx : c10::irange(kNumInputs)) {
+              for (const auto input_idx : c10::irange(sizeof...(inputs))) {
                 loaded_inputs[input_idx] = inputs_data_ptrs[input_idx][idx];
               }
 #ifndef NDEBUG
@@ -136,8 +136,8 @@ inline void dtype_specialized_elementwise_fn_impl(
             // Main vectorized loop.
             for (auto idx = vectorized_begin; idx < vectorized_end;
                  idx += Vec::size()) {
-              std::array<Vec, kNumInputs> loaded_vec_inputs{};
-              for (const auto input_idx : c10::irange(kNumInputs)) {
+              std::array<Vec, sizeof...(inputs)> loaded_vec_inputs{};
+              for (const auto input_idx : c10::irange(sizeof...(inputs))) {
                 loaded_vec_inputs[input_idx] =
                     Vec::loadu(&inputs_data_ptrs[input_idx][idx]);
               }
@@ -148,11 +148,11 @@ inline void dtype_specialized_elementwise_fn_impl(
             // Scalar epilogue.
             for (const auto idx : c10::irange(vectorized_end, end)) {
 #ifndef NDEBUG
-              std::array<Vec, kNumInputs> loaded_inputs{};
+              std::array<Vec, sizeof...(inputs)> loaded_inputs{};
 #else // NDEBUG
-              std::array<CTYPE_COMPUTE, kNumInputs> loaded_inputs{};
+              std::array<CTYPE_COMPUTE, sizeof...(inputs)> loaded_inputs{};
 #endif // NDEBUG
-              for (const auto input_idx : c10::irange(kNumInputs)) {
+              for (const auto input_idx : c10::irange(sizeof...(inputs))) {
                 loaded_inputs[input_idx] = inputs_data_ptrs[input_idx][idx];
               }
 #ifndef NDEBUG
@@ -172,20 +172,20 @@ inline void dtype_specialized_elementwise_fn_impl(
       out.numel(),
       ::executorch::extension::internal::GRAIN_SIZE,
       [&](const auto begin, const auto end) {
-        std::array<const CTYPE_COMPUTE*, kNumInputs> inputs_data_ptrs = {
+        std::array<const CTYPE_COMPUTE*, sizeof...(inputs)> inputs_data_ptrs = {
             inputs.first->template const_data_ptr<CTYPE_COMPUTE>()...};
 
         CTYPE_OUT* const data_out = out.mutable_data_ptr<CTYPE_OUT>();
 
-        const auto range =
-            BroadcastIndexesRange<kNumInputs, support_noncontiguous_tensors>(
-                out, (*inputs.first)...);
+        const auto range = BroadcastIndexesRange<
+            sizeof...(inputs),
+            support_noncontiguous_tensors>(out, (*inputs.first)...);
         auto begin_it = range.begin();
         begin_it += begin;
         for (; (*begin_it)[0] < end; ++begin_it) {
           const auto& indexes = *begin_it;
-          std::array<CTYPE_COMPUTE, kNumInputs> loaded_inputs{};
-          for (const auto idx : c10::irange(kNumInputs)) {
+          std::array<CTYPE_COMPUTE, sizeof...(inputs)> loaded_inputs{};
+          for (const auto idx : c10::irange(sizeof...(inputs))) {
             loaded_inputs[idx] = inputs_data_ptrs[idx][indexes[idx + 1]];
           }
           data_out[indexes[0]] = std::apply(compute_fun, loaded_inputs);
@@ -229,14 +229,12 @@ inline void apply_elementwise_fn_generic_impl(
     const Tensor& out,
     SupportedTensorDtypes out_dtypes,
     Args... inputs) {
-  constexpr auto kNumInputs = sizeof...(inputs);
-
   struct InputInfo {
     load_to_compute_fn<CTYPE_COMPUTE> load_to_compute;
     const char* data_ptr;
     ssize_t element_size;
   };
-  std::array<InputInfo, kNumInputs> inputs_info = {(InputInfo{
+  std::array<InputInfo, sizeof...(inputs)> inputs_info = {(InputInfo{
       internal::get_load_to_compute_fn<CTYPE_COMPUTE, op_name>(
           ctx, *inputs.first, inputs.second),
       reinterpret_cast<const char*>(inputs.first->const_data_ptr()),
@@ -254,15 +252,15 @@ inline void apply_elementwise_fn_generic_impl(
       out.numel(),
       ::executorch::extension::internal::GRAIN_SIZE,
       [&](const auto begin, const auto end) {
-        const auto range =
-            BroadcastIndexesRange<kNumInputs, support_noncontiguous_tensors>(
-                out, (*inputs.first)...);
+        const auto range = BroadcastIndexesRange<
+            sizeof...(inputs),
+            support_noncontiguous_tensors>(out, (*inputs.first)...);
         auto begin_it = range.begin();
         begin_it += begin;
         for (; (*begin_it)[0] < end; ++begin_it) {
           const auto& indexes = *begin_it;
-          std::array<CTYPE_COMPUTE, kNumInputs> loaded_inputs{};
-          for (const auto idx : c10::irange(kNumInputs)) {
+          std::array<CTYPE_COMPUTE, sizeof...(inputs)> loaded_inputs{};
+          for (const auto idx : c10::irange(sizeof...(inputs))) {
             const auto& input_info = inputs_info[idx];
             loaded_inputs[idx] = input_info.load_to_compute(
                 &input_info

@@ -66,7 +66,7 @@
  * dimension of all the tensors as the upper bound for the for loop.
  */
 #define ET_CHECK_SAME_SHAPE2(a__, b__)                                    \
-  ({                                                                      \
+  do {                                                                    \
     const size_t a_numel__ = (a__).numel();                               \
     const size_t b_numel__ = (b__).numel();                               \
     const size_t a_dim__ = (a__).dim();                                   \
@@ -89,10 +89,10 @@
           a_size__,                                                       \
           b_size__);                                                      \
     }                                                                     \
-  })
+  } while (0)
 
 #define ET_CHECK_SAME_SHAPE3(a__, b__, c__)                            \
-  ({                                                                   \
+  do {                                                                 \
     const size_t a_numel__ = (a__).numel();                            \
     const size_t b_numel__ = (b__).numel();                            \
     const size_t c_numel__ = (c__).numel();                            \
@@ -124,22 +124,22 @@
           b_size__,                                                    \
           c_size__);                                                   \
     }                                                                  \
-  })
+  } while (0)
 
 /// Asserts that all tensors have the same dtype.
 #define ET_CHECK_SAME_DTYPE2(a__, b__)                                   \
-  ({                                                                     \
+  do {                                                                   \
     const ::executorch::aten::ScalarType a_type__ = (a__).scalar_type(); \
     const ::executorch::aten::ScalarType b_type__ = (b__).scalar_type(); \
     ET_CHECK_MSG(                                                        \
         a_type__ == b_type__,                                            \
         ET_TENSOR_CHECK_PREFIX__ ": dtype={%" PRId8 ", %" PRId8 "}",     \
         static_cast<int8_t>(a_type__),                                   \
         static_cast<int8_t>(b_type__));                                  \
-  })
+  } while (0)
 
 #define ET_CHECK_SAME_DTYPE3(a__, b__, c__)                                 \
-  ({                                                                        \
+  do {                                                                      \
     const ::executorch::aten::ScalarType a_type__ = (a__).scalar_type();    \
     const ::executorch::aten::ScalarType b_type__ = (b__).scalar_type();    \
     const ::executorch::aten::ScalarType c_type__ = (c__).scalar_type();    \
@@ -150,7 +150,7 @@
         static_cast<int8_t>(a_type__),                                      \
         static_cast<int8_t>(b_type__),                                      \
         static_cast<int8_t>(c_type__));                                     \
-  })
+  } while (0)
 
 /**
  * Asserts that all tensors have the same shape and dtype.
@@ -159,7 +159,7 @@
  * macros independently, because it only calls ET_CHECK_MSG once.
  */
 #define ET_CHECK_SAME_SHAPE_AND_DTYPE2(a__, b__)                              \
-  ({                                                                          \
+  do {                                                                        \
     const size_t a_numel__ = (a__).numel();                                   \
     const size_t b_numel__ = (b__).numel();                                   \
     const size_t a_dim__ = (a__).dim();                                       \
@@ -189,10 +189,10 @@
           a_size__,                                                           \
           b_size__);                                                          \
     }                                                                         \
-  })
+  } while (0)
 
 #define ET_CHECK_SAME_SHAPE_AND_DTYPE3(a__, b__, c__)                    \
-  ({                                                                     \
+  do {                                                                   \
     const size_t a_numel__ = (a__).numel();                              \
     const size_t b_numel__ = (b__).numel();                              \
     const size_t c_numel__ = (c__).numel();                              \
@@ -233,13 +233,13 @@
           b_size__,                                                      \
           c_size__);                                                     \
     }                                                                    \
-  })
+  } while (0)
 
 /**
  * Assert that the input tensor is contiguous tensor.
  */
 #define ET_CHECK_CONTIGUOUS(a__)                                              \
-  ({                                                                          \
+  do {                                                                        \
     const ::executorch::aten::ArrayRef<executorch::aten::StridesType>         \
         strides = a__.strides();                                              \
     const ::executorch::aten::ArrayRef<executorch::aten::StridesType> sizes = \
@@ -260,15 +260,15 @@
           strides[i - 1],                                                     \
           strides[i] * sizes[i]);                                             \
     }                                                                         \
-  })
+  } while (0)
 
 /**
  * Assert the input two tensors share same strides.
  * Noted that this function does not make any check or promise on the contiguity
  * of any input tensors.
  */
 #define ET_CHECK_SAME_STRIDES2(a__, b__)                                       \
-  ({                                                                           \
+  do {                                                                         \
     ET_CHECK_MSG(                                                              \
         a__.dim() == b__.dim(),                                                \
         "Two tensors shall have same number of strides, but not %zu and %zu.", \
@@ -288,15 +288,15 @@
           (int32_t)a_strides[i],                                               \
           (int32_t)b_strides[i]);                                              \
     }                                                                          \
-  })
+  } while (0)
 
 /**
  * Assert the input three tensors share same strides.
  * Noted that this function does not make any check or promise on the contiguity
  * of any input tensors.
  */
 #define ET_CHECK_SAME_STRIDES3(a__, b__, c__)                           \
-  ({                                                                    \
+  do {                                                                  \
     ET_CHECK_MSG(                                                       \
         a__.dim() == b__.dim() && b__.dim() == c__.dim(),               \
         "Three tensors shall have same number of strides, "             \
@@ -322,17 +322,17 @@
           (int32_t)b_strides[i],                                        \
           (int32_t)c_strides[i]);                                       \
     }                                                                   \
-  })
+  } while (0)
 
 #define ET_CHECK_DEFAULT_OR_CHANNELSLAST_DIMORDER(t__)           \
-  ({                                                             \
+  do {                                                           \
     ET_CHECK_MSG(                                                \
         is_contiguous_dim_order(                                 \
             t__.dim_order().data(), t__.dim_order().size()) ||   \
             is_channels_last_dim_order(                          \
                 t__.dim_order().data(), t__.dim_order().size()), \
         "Tensor must have default or channels last dim order");  \
-  })
+  } while (0)
 
 /**
  * DEPRECATED: Please use ET_CHECK_OR_RETURN_FALSE instead and provide