Rename generic operator cpp/headers.

hsharma35 · web-flow · commit dc5092b5528d · 2025-12-02T05:48:06.000Z
Differential Revision: D88079745 Pull Request resolved: pytorch#16022
diff --git a/backends/cadence/generic/operators/op_dequantize_per_tensor.cpp b/backends/cadence/generic/operators/op_dequantize_per_tensor.cpp
@@ -6,6 +6,8 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/backends/cadence/generic/operators/op_dequantize_per_tensor.h>
+
 #include <executorch/backends/cadence/generic/kernels/kernels.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
diff --git a/backends/cadence/generic/operators/op_dequantize_per_tensor.h b/backends/cadence/generic/operators/op_dequantize_per_tensor.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/kernel/kernel_runtime_context.h>
+
+namespace impl {
+namespace generic {
+namespace native {
+
+::executorch::aten::Tensor& dequantize_per_tensor_out(
+    ::executorch::runtime::KernelRuntimeContext& context,
+    const ::executorch::aten::Tensor& input,
+    double scale,
+    int64_t zero_point,
+    int64_t quant_min,
+    int64_t quant_max,
+    ::executorch::aten::ScalarType dtype,
+    ::executorch::aten::Tensor& out);
+
+}
+} // namespace generic
+} // namespace impl
diff --git a/backends/cadence/generic/operators/op_im2row.cpp b/backends/cadence/generic/operators/op_im2row.cpp
@@ -6,10 +6,18 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/cadence/generic/operators/operators.h>
+#include <executorch/backends/cadence/generic/operators/op_im2row.h>
 
 #include <algorithm>
 
+#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
+
+#ifndef DISABLE_ALWAYS_INLINE
+#define ALWAYS_INLINE __attribute__((always_inline))
+#else
+#define ALWAYS_INLINE inline
+#endif
+
 namespace impl {
 namespace generic {
 namespace native {
@@ -20,7 +28,7 @@ using ::executorch::aten::Tensor;
 using ::executorch::runtime::KernelRuntimeContext;
 
 template <typename T>
-__attribute__((always_inline)) void im2row_(
+ALWAYS_INLINE void im2row_(
     const T* __restrict__ data_im,
     const int32_t in_zero_point,
     /* input parameters*/
@@ -76,7 +84,7 @@ __attribute__((always_inline)) void im2row_(
             // 'channels' contiguous values. Otherwise we will fill the output
             // with 0's.
             if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
-              std::memcpy(slice_col, slice_im, channels * sizeof(T));
+              memcpy(slice_col, slice_im, channels * sizeof(T));
             } else {
               std::fill_n(slice_col, channels, T(in_zero_point));
             }
@@ -115,8 +123,8 @@ __attribute__((always_inline)) void im2row_(
   }
 }
 
-void im2row_out(
-    __ET_UNUSED KernelRuntimeContext& ctx,
+Tensor& im2row_out(
+    ET_UNUSED KernelRuntimeContext& ctx,
     const Tensor& input,
     IntArrayRef kernel_size,
     IntArrayRef dilation,
@@ -170,7 +178,7 @@ void im2row_out(
         in_zero_point.const_data_ptr<int32_t>();                       \
     int32_t in_plane = in_c * in_h * in_w;                             \
     int32_t out_plane = kernel_h * kernel_w * in_c * out_h * out_w;    \
-    for (size_t n = 0; n < batch_size; ++n) {                          \
+    for (int32_t n = 0; n < batch_size; ++n) {                         \
       im2row_<ctype>(                                                  \
           &in_data[n * in_plane],                                      \
           per_tensor_quantized ? zero_point[0] : zero_point[n],        \
@@ -205,10 +213,12 @@ void im2row_out(
           torch::executor::toString(dtype));
   }
 #undef typed_im2row
+
+  return out;
 }
 
-void im2row_per_tensor_out(
-    __ET_UNUSED KernelRuntimeContext& ctx,
+Tensor& im2row_per_tensor_out(
+    ET_UNUSED KernelRuntimeContext& ctx,
     const Tensor& input,
     IntArrayRef kernel_size,
     IntArrayRef dilation,
@@ -291,6 +301,7 @@ void im2row_per_tensor_out(
           torch::executor::toString(dtype));
   }
 #undef typed_im2row_per_tensor
+  return out;
 }
 
 } // namespace native
diff --git a/backends/cadence/generic/operators/op_im2row.h b/backends/cadence/generic/operators/op_im2row.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/kernel/kernel_runtime_context.h>
+
+namespace impl {
+namespace generic {
+namespace native {
+
+::executorch::aten::Tensor& im2row_out(
+    __ET_UNUSED ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& input,
+    ::executorch::aten::IntArrayRef kernel_size,
+    ::executorch::aten::IntArrayRef dilation,
+    ::executorch::aten::IntArrayRef padding,
+    ::executorch::aten::IntArrayRef stride,
+    const ::executorch::aten::Tensor& in_zero_point,
+    bool channel_last,
+    ::executorch::aten::Tensor& out);
+
+::executorch::aten::Tensor& im2row_per_tensor_out(
+    __ET_UNUSED ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& input,
+    ::executorch::aten::IntArrayRef kernel_size,
+    ::executorch::aten::IntArrayRef dilation,
+    ::executorch::aten::IntArrayRef padding,
+    ::executorch::aten::IntArrayRef stride,
+    int64_t in_zero_point,
+    bool channel_last,
+    ::executorch::aten::Tensor& out);
+
+} // namespace native
+} // namespace generic
+} // namespace impl
diff --git a/backends/cadence/generic/operators/op_quantize_per_tensor.cpp b/backends/cadence/generic/operators/op_quantize_per_tensor.cpp
@@ -6,6 +6,8 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/backends/cadence/generic/operators/op_quantize_per_tensor.h>
+
 #include <executorch/backends/cadence/generic/kernels/kernels.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
diff --git a/backends/cadence/generic/operators/op_quantize_per_tensor.h b/backends/cadence/generic/operators/op_quantize_per_tensor.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/kernel/kernel_runtime_context.h>
+
+namespace impl {
+namespace generic {
+namespace native {
+
+::executorch::aten::Tensor& quantize_per_tensor_out(
+    ::executorch::runtime::KernelRuntimeContext& context,
+    const ::executorch::aten::Tensor& input,
+    double scale,
+    int64_t zero_point,
+    int64_t quant_min,
+    int64_t quant_max,
+    ::executorch::aten::ScalarType dtype,
+    ::executorch::aten::Tensor& out);
+}
+} // namespace generic
+} // namespace impl
diff --git a/backends/cadence/generic/operators/targets.bzl b/backends/cadence/generic/operators/targets.bzl
@@ -6,8 +6,8 @@ def define_common_targets():
 
     runtime.cxx_library(
         name = "im2row_out",
-        srcs = ["im2row_out.cpp"],
-        exported_headers = ["operators.h"],
+        srcs = ["op_im2row.cpp"],
+        exported_headers = ["op_im2row.h"],
         platforms = CXX,
         deps = [
             "//executorch/runtime/kernel:kernel_includes",
@@ -32,11 +32,10 @@ def define_common_targets():
         ],
     )
 
-    # Quantized operators that need cadence kernels for quantize/dequantize
     runtime.cxx_library(
         name = "dequantize_per_tensor",
-        srcs = ["dequantize_per_tensor.cpp"],
-        exported_headers = ["quantized_ops.h"],
+        srcs = ["op_dequantize_per_tensor.cpp"],
+        exported_headers = ["op_dequantize_per_tensor.h"],
         platforms = CXX,
         deps = [
             "//executorch/runtime/kernel:kernel_includes",
@@ -50,8 +49,8 @@ def define_common_targets():
 
     runtime.cxx_library(
         name = "quantize_per_tensor",
-        srcs = ["quantize_per_tensor.cpp"],
-        exported_headers = ["quantized_ops.h"],
+        srcs = ["op_quantize_per_tensor.cpp"],
+        exported_headers = ["op_quantize_per_tensor.h"],
         platforms = CXX,
         deps = [
             "//executorch/runtime/kernel:kernel_includes",