From 9ecec7f4f150572556556db1e71df4b29678a5c2 Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Thu, 17 Apr 2025 10:28:19 -0700 Subject: [PATCH] [cortex-m] Add scalar c++ op for dequantize_per_tensor Only buck build for now, CMake is next. No MVE, scalar only. Strictly the dtypes we care about update arg_meta to reflect that. Differential Revision: [D73164576](https://our.internmc.facebook.com/intern/diff/D73164576/) [ghstack-poisoned] --- .../cortex_m/ops/op_dequantize_per_tensor.cpp | 147 ++++++++++++++++++ backends/cortex_m/ops/operators.yaml | 6 + backends/cortex_m/ops/targets.bzl | 1 + .../test/op_dequantize_per_tensor_test.cpp | 48 ++++++ backends/cortex_m/test/targets.bzl | 1 + 5 files changed, 203 insertions(+) create mode 100644 backends/cortex_m/ops/op_dequantize_per_tensor.cpp create mode 100644 backends/cortex_m/test/op_dequantize_per_tensor_test.cpp diff --git a/backends/cortex_m/ops/op_dequantize_per_tensor.cpp b/backends/cortex_m/ops/op_dequantize_per_tensor.cpp new file mode 100644 index 00000000000..6d60d5de3b9 --- /dev/null +++ b/backends/cortex_m/ops/op_dequantize_per_tensor.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +// Check for Helium/MVE support +#if defined(__ARM_FEATURE_MVE) && (__ARM_FEATURE_MVE & 1) +#include +#define HAS_HELIUM_SIMD 1 +#endif + +namespace cortex_m { +namespace native { + +using Tensor = executorch::aten::Tensor; +using ScalarType = executorch::aten::ScalarType; +using KernelRuntimeContext = torch::executor::KernelRuntimeContext; + +namespace { + +/** + * Asserts that the parameters are valid for float to int8 quantization. + */ +void check_dequantize_args( + const Tensor& input, + int64_t quant_min, + int64_t quant_max, + ScalarType dtype, + Tensor& out) { + // Ensure input is char type + ET_CHECK_MSG( + input.scalar_type() == ScalarType::Char, + "input.scalar_type() %" PRId8 " is not char type", + static_cast(input.scalar_type())); + + // Check output dtype is float + ET_CHECK_MSG( + out.scalar_type() == ScalarType::Float, + "out.scalar_type() %" PRId8 " is not float", + static_cast(out.scalar_type())); + + // Check dtype is int8 (Char) + ET_CHECK_MSG( + dtype == ScalarType::Char, + "dtype %" PRId8 " is not int8 (Char)", + static_cast(dtype)); + + // Validate quant_min and quant_max for int8 + int32_t quant_min_lower_bound = std::numeric_limits::min(); + int32_t quant_max_upper_bound = std::numeric_limits::max(); + + ET_CHECK_MSG( + quant_min >= quant_min_lower_bound, + "quant_min out of bound for int8, expected quant_min_lower_bound: %" PRId32 + " actual quant_min: %" PRId64, + quant_min_lower_bound, + quant_min); + + ET_CHECK_MSG( + quant_max <= quant_max_upper_bound, + "quant_max out of bound for int8, expected quant_max_upper_bound: %" PRId32 + " actual quant_max: %" PRId64, + quant_max_upper_bound, + quant_max); +} + +/** + * Scalar implementation of quantization for a single value. + */ +template +T dequantize_val( + float scale, + int32_t zero_point, + K value, + int64_t quant_min, + int64_t quant_max) { + (void) quant_min; + (void) quant_max; + return static_cast((static_cast(value) - zero_point) * scale); +} + +} // namespace + +Tensor& dequantize_per_tensor_out( + KernelRuntimeContext& context, + const Tensor& input, + double scale, + int64_t zero_point, + int64_t quant_min, + int64_t quant_max, + ScalarType dtype, + Tensor& out) { + // Ignore context for now + (void)context; + + // Resize output tensor to match input dimensions + torch::executor::Error err = resize_tensor(out, input.sizes()); + ET_CHECK_MSG( + err == torch::executor::Error::Ok, + "Failed to resize out Tensor in dequantize_per_tensor_out"); + + // Validate input parameters + check_dequantize_args(input, quant_min, quant_max, dtype, out); + + // Pre-compute inverse scale for better performance + int32_t zp = static_cast(zero_point); + int32_t qmin = static_cast(quant_min); + int32_t qmax = static_cast(quant_max); + + // Get pointers to input and output data + const int8_t* input_data = input.const_data_ptr(); + float* out_data = out.mutable_data_ptr(); + const size_t numel = input.numel(); + +#if defined(HAS_HELIUM_SIMD) + // Helium MVE implementation for float32 to int8 quantization + #Error "Implement MVE version!" +#else + // Scalar implementation for float32 to int8 quantization + for (size_t i = 0; i < numel; i++) { + out_data[i] = dequantize_val(scale, zp, input_data[i], qmin, qmax); + } +#endif + + return out; +} + +Tensor& dequantize_per_tensor_out( + const Tensor& input, + double scale, + int64_t zero_point, + int64_t quant_min, + int64_t quant_max, + ScalarType dtype, + Tensor& out) { + KernelRuntimeContext context; + return dequantize_per_tensor_out(context, input, scale, zero_point, quant_min, quant_max, dtype, out); +} + +} // namespace native +} // namespace cortex_m diff --git a/backends/cortex_m/ops/operators.yaml b/backends/cortex_m/ops/operators.yaml index e4c28fc678a..0cc248effaa 100644 --- a/backends/cortex_m/ops/operators.yaml +++ b/backends/cortex_m/ops/operators.yaml @@ -9,3 +9,9 @@ kernels: - arg_meta: null kernel_name: cortex_m::quantize_per_tensor_out + +- func: cortex_m::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!) + variants: function + kernels: + - arg_meta: null + kernel_name: cortex_m::dequantize_per_tensor_out diff --git a/backends/cortex_m/ops/targets.bzl b/backends/cortex_m/ops/targets.bzl index 70c81b227c5..3368051de67 100644 --- a/backends/cortex_m/ops/targets.bzl +++ b/backends/cortex_m/ops/targets.bzl @@ -24,6 +24,7 @@ def define_operator_target(name: str): OPERATORS = [ "quantize_per_tensor", + "dequantize_per_tensor", ] def define_common_targets(): diff --git a/backends/cortex_m/test/op_dequantize_per_tensor_test.cpp b/backends/cortex_m/test/op_dequantize_per_tensor_test.cpp new file mode 100644 index 00000000000..6cd890024ed --- /dev/null +++ b/backends/cortex_m/test/op_dequantize_per_tensor_test.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include // Declares the operator +#include +#include +#include +#include +#include + +using executorch::aten::ScalarType; +using executorch::aten::Tensor; +using executorch::runtime::KernelRuntimeContext; +using torch::executor::testing::TensorFactory; + +// Test op +using cortex_m::native::dequantize_per_tensor_out; + +void test_dtype() { + TensorFactory tf; + + Tensor input = tf.full({3, 5}, 4); + double scale = 0.5; + + int64_t zero_point = 108; + int64_t quant_min = -128; + int64_t quant_max = 127; + + TensorFactory tfo; + Tensor out = tfo.zeros({3, 5}); + // (4 - 108) * 0.5 = -52 + Tensor expected = tfo.full({3, 5}, -52.0); + + KernelRuntimeContext ctx; + dequantize_per_tensor_out( + ctx, input, scale, zero_point, quant_min, quant_max, ScalarType::Char, out); + + EXPECT_TENSOR_EQ(out, expected); +} + +TEST(OpDequantizeOutTest, AllDtypesSupported) { + test_dtype(); +} diff --git a/backends/cortex_m/test/targets.bzl b/backends/cortex_m/test/targets.bzl index 2b8cc604043..1263f64eb41 100644 --- a/backends/cortex_m/test/targets.bzl +++ b/backends/cortex_m/test/targets.bzl @@ -8,6 +8,7 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") OPERATORS = [ "quantize_per_tensor", + "dequantize_per_tensor", ] def define_operator_test_target(op):