diff --git a/backends/cadence/hifi/operators/op_quantized_linear_out.cpp b/backends/cadence/hifi/operators/op_quantized_linear_out.cpp index 84aff1c2f41..d9f4e41bc39 100644 --- a/backends/cadence/hifi/operators/op_quantized_linear_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_linear_out.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -207,7 +208,7 @@ void inline _quantized_linear_per_tensor_asym8s( } void quantized_linear_out( - __ET_UNUSED KernelRuntimeContext& ctx, + KernelRuntimeContext& ctx, const Tensor& in, const Tensor& weight, const Tensor& bias, @@ -216,9 +217,26 @@ void quantized_linear_out( const Tensor& out_multiplier, const Tensor& out_shift, int64_t out_zero_point, - __ET_UNUSED const optional& offset, + const optional& offset, Tensor& out) { - if (out.scalar_type() == executorch::aten::ScalarType::Byte) { + if (out.scalar_type() == ::executorch::aten::ScalarType::Short && + in.scalar_type() == ::executorch::aten::ScalarType::Short && + weight.scalar_type() == ::executorch::aten::ScalarType::Char) { + ::impl::generic::native::quantized_linear_out( + ctx, + in, + weight, + bias, + in_zero_point, + weight_zero_point, + out_multiplier, + out_shift, + out_zero_point, + offset, + out); + } + + else if (out.scalar_type() == executorch::aten::ScalarType::Byte) { _quantized_linear_asym8u( in, weight, @@ -260,7 +278,24 @@ void quantized_linear_per_tensor_out( int64_t out_zero_point, __ET_UNUSED const optional& offset, Tensor& out) { - if (out.scalar_type() == executorch::aten::ScalarType::Byte) { + if (out.scalar_type() == ::executorch::aten::ScalarType::Short && + in.scalar_type() == ::executorch::aten::ScalarType::Short && + weight.scalar_type() == ::executorch::aten::ScalarType::Char) { + ::impl::generic::native::quantized_linear_per_tensor_out( + ctx, + in, + weight, + bias, + in_zero_point, + weight_zero_point, + out_multiplier, + out_shift, + out_zero_point, + offset, + out); + } + + else if (out.scalar_type() == executorch::aten::ScalarType::Byte) { _quantized_linear_per_tensor_asym8u( in, weight, diff --git a/backends/cadence/hifi/operators/targets.bzl b/backends/cadence/hifi/operators/targets.bzl index a25dfd1bcbc..5d135e320bf 100644 --- a/backends/cadence/hifi/operators/targets.bzl +++ b/backends/cadence/hifi/operators/targets.bzl @@ -87,7 +87,6 @@ OPERATORS = [ "quantized_fully_connected_asym8sxasym8s_asym8s_per_tensor_out", "quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor_out", "quantized_layer_norm", - "quantized_linear_out", "quantized_linear_asym8sxasym8s_asym8s_per_tensor_out", "quantized_linear_asym8uxasym8u_asym8u_per_tensor_out", "quantized_matmul_out", @@ -122,3 +121,7 @@ def define_common_targets(): # Define build targets for all operators registered in the tables above. for op in OPERATORS: define_operator(op) + + # quantized_linear_out and quantized_linear_per_tensor_out needs additional dependency for int16 support + define_operator("quantized_linear_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:quantize_linear_out", "fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:headers",]) + define_operator("quantized_linear_per_tensor_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:quantize_linear_out", "fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:headers",]) diff --git a/backends/cadence/hifi/operators/tests/test_op_quantized_linear_out.cpp b/backends/cadence/hifi/operators/tests/test_op_quantized_linear_out.cpp new file mode 100644 index 00000000000..fddf373290f --- /dev/null +++ b/backends/cadence/hifi/operators/tests/test_op_quantized_linear_out.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace impl { +namespace HiFi { +namespace native { +namespace { + +using ::executorch::aten::Scalar; +using ::executorch::aten::ScalarType; +using ::executorch::aten::Tensor; +using ::executorch::aten::TensorImpl; +using ::executorch::runtime::Error; +using ::executorch::runtime::KernelRuntimeContext; +using ::executorch::runtime::runtime_init; +using ::executorch::runtime::testing::TensorFactory; +using std::optional; +using std::string_view; + +class HiFiQuantizedLinearTest : public OperatorTest { + public: + protected: + void quantized_linear_out( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + int64_t in_zero_point, + const Tensor& weight_zero_point, + const Tensor& out_multiplier, + const Tensor& out_shift, + int64_t out_zero_point, + const optional& offset, + Tensor& output) { + return ::impl::HiFi::native::quantized_linear_out( + context_, + input, + weight, + bias, + in_zero_point, + weight_zero_point, + out_multiplier, + out_shift, + out_zero_point, + offset, + output); + } + + void quantized_linear_per_tensor_out( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + int64_t in_zero_point, + int64_t weight_zero_point, + int64_t out_multiplier, + int64_t out_shift, + int64_t out_zero_point, + const optional& offset, + Tensor& output) { + return ::impl::HiFi::native::quantized_linear_per_tensor_out( + context_, + input, + weight, + bias, + in_zero_point, + weight_zero_point, + out_multiplier, + out_shift, + out_zero_point, + offset, + output); + } +}; + +// Test quantized_linear_out with int16 activations (asym8s) +TEST_F(HiFiQuantizedLinearTest, QuantizedLinearInt16Test) { + TensorFactory tf_int16; + TensorFactory tf_int32; + TensorFactory tf_int8; + + // Simple 2D case: input [2, 3] x weight [4, 3] = output [2, 4] + // Values captured from e2e test with + // CadenceWith16BitLinearActivationsQuantizer + Tensor input = + tf_int16.make({2, 3}, {-28170, -26389, -32768, -31474, -32266, -29076}); + Tensor weight = tf_int8.make( + {4, 3}, {1, 87, -128, -114, -59, 44, -1, 127, -12, 44, -46, -29}); + Tensor bias = tf_int32.zeros({4}); + Tensor output = tf_int16.zeros({2, 4}); + + int64_t in_zero_point = -29822; + Tensor weight_zero_point = tf_int32.make({1}, {2}); + Tensor out_multiplier = tf_int32.make({1}, {2011373824}); + Tensor out_shift = tf_int32.make({1}, {-8}); + int64_t out_zero_point = -30847; + quantized_linear_out( + input, + weight, + bias, + in_zero_point, + weight_zero_point, + out_multiplier, + out_shift, + out_zero_point, + std::nullopt, + output); + // Expected output from e2e test + Tensor expected_output = tf_int16.make( + {2, 4}, {-28384, -32767, -29144, -30862, -31956, -29486, -31985, -30756}); + EXPECT_TENSOR_CLOSE(output, expected_output); +} + +} // namespace +} // namespace native +} // namespace HiFi +} // namespace impl