Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ namespace native {

using ::executorch::aten::Tensor;
using ::executorch::runtime::KernelRuntimeContext;
using ::impl::reference::kernels::dequantize;
using ::impl::reference::kernels::quantize;

void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
KernelRuntimeContext& ctx,
Expand Down Expand Up @@ -61,25 +63,19 @@ void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
}
} /* if Y is a scalar Tensor */
else if (Y_numel == 1) {
float y =
kernels::dequantize<int8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
float y = dequantize<int8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
for (size_t i = 0; i < X_numel; ++i) {
float x =
kernels::dequantize<int8_t>(X_data[i], X_scale_f, X_zero_point_i32);
float x = dequantize<int8_t>(X_data[i], X_scale_f, X_zero_point_i32);
float z = x + y;
out_data[i] =
kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
}
} /* if X is a scalar Tensor */
else if (X_numel == 1) {
float x =
kernels::dequantize<int8_t>(X_data[0], X_scale_f, X_zero_point_i32);
float x = dequantize<int8_t>(X_data[0], X_scale_f, X_zero_point_i32);
for (size_t i = 0; i < Y_numel; ++i) {
float y =
kernels::dequantize<int8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
float y = dequantize<int8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
float z = x + y;
out_data[i] =
kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
}
} /* other broadcasting cases */
else {
Expand Down Expand Up @@ -162,13 +158,10 @@ void quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
}

/* Apply the operation */
float x = kernels::dequantize<int8_t>(
X_data[X_idx], X_scale_f, X_zero_point_i32);
float y = kernels::dequantize<int8_t>(
Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
float x = dequantize<int8_t>(X_data[X_idx], X_scale_f, X_zero_point_i32);
float y = dequantize<int8_t>(Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
float z = x + y;
out_data[i] =
kernels::quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<int8_t>(z, inv_out_scale, out_zero_point_i32);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ namespace native {

using ::executorch::aten::Tensor;
using ::executorch::runtime::KernelRuntimeContext;
using ::impl::reference::kernels::dequantize;
using ::impl::reference::kernels::quantize;

void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
KernelRuntimeContext& ctx,
Expand Down Expand Up @@ -61,25 +63,19 @@ void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
}
} /* if Y is a scalar Tensor */
else if (Y_numel == 1) {
float y =
kernels::dequantize<uint8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
float y = dequantize<uint8_t>(Y_data[0], Y_scale_f, Y_zero_point_i32);
for (size_t i = 0; i < X_numel; ++i) {
float x =
kernels::dequantize<uint8_t>(X_data[i], X_scale_f, X_zero_point_i32);
float x = dequantize<uint8_t>(X_data[i], X_scale_f, X_zero_point_i32);
float z = x + y;
out_data[i] =
kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
}
} /* if X is a scalar Tensor */
else if (X_numel == 1) {
float x =
kernels::dequantize<uint8_t>(X_data[0], X_scale_f, X_zero_point_i32);
float x = dequantize<uint8_t>(X_data[0], X_scale_f, X_zero_point_i32);
for (size_t i = 0; i < Y_numel; ++i) {
float y =
kernels::dequantize<uint8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
float y = dequantize<uint8_t>(Y_data[i], Y_scale_f, Y_zero_point_i32);
float z = x + y;
out_data[i] =
kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
}
} /* other broadcasting cases */
else {
Expand Down Expand Up @@ -162,13 +158,10 @@ void quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
}

/* Apply the operation */
float x = kernels::dequantize<uint8_t>(
X_data[X_idx], X_scale_f, X_zero_point_i32);
float y = kernels::dequantize<uint8_t>(
Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
float x = dequantize<uint8_t>(X_data[X_idx], X_scale_f, X_zero_point_i32);
float y = dequantize<uint8_t>(Y_data[Y_idx], Y_scale_f, Y_zero_point_i32);
float z = x + y;
out_data[i] =
kernels::quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<uint8_t>(z, inv_out_scale, out_zero_point_i32);
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions backends/cadence/hifi/operators/op_quantized_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <cmath>
#include <tuple>

using ::cadence::impl::HiFi::kernels::dequantize;
using ::cadence::impl::HiFi::kernels::quantize;
using ::executorch::aten::IntArrayRef;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
Expand Down Expand Up @@ -80,11 +82,9 @@ void quantized_layer_norm_per_tensor_(
for (size_t j = 0; j < last_dim; ++j) {
// Since X is quantized, we dequantize it, compute fp32 result, and
// quantize the result to an int8/uint8 value.
float val = ::cadence::impl::HiFi::kernels::dequantize<T>(
x[j], input_scale, input_zero_point);
float val = dequantize<T>(x[j], input_scale, input_zero_point);
val = (val - mean) * inv_std * weight_data[j] + bias_data[j];
y[j] = ::cadence::impl::HiFi::kernels::quantize<T>(
val, output_inv_scale, output_zero_point);
y[j] = quantize<T>(val, output_inv_scale, output_zero_point);
}
}
}
Expand Down
19 changes: 8 additions & 11 deletions backends/cadence/reference/operators/dequantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ namespace impl {
namespace reference {
namespace native {

using executorch::aten::ScalarType;
using executorch::aten::Tensor;
using executorch::runtime::KernelRuntimeContext;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::KernelRuntimeContext;
using ::impl::reference::kernels::dequantize;

void dequantize_per_tensor_out(
KernelRuntimeContext& context,
Expand All @@ -31,22 +32,18 @@ void dequantize_per_tensor_out(

if (input.scalar_type() == ScalarType::Byte) {
const uint8_t* input_data = input.const_data_ptr<uint8_t>();
impl::reference::kernels::dequantize<uint8_t>(
out_data, input_data, scale, zero_point, numel);
dequantize<uint8_t>(out_data, input_data, scale, zero_point, numel);
} else if (input.scalar_type() == ScalarType::Char) {
const int8_t* input_data = input.const_data_ptr<int8_t>();
impl::reference::kernels::dequantize<int8_t>(
out_data, input_data, scale, zero_point, numel);
dequantize<int8_t>(out_data, input_data, scale, zero_point, numel);
} else if (
input.scalar_type() == ScalarType::Bits16 ||
input.scalar_type() == ScalarType::UInt16) {
const uint16_t* input_data = input.const_data_ptr<uint16_t>();
impl::reference::kernels::dequantize<uint16_t>(
out_data, input_data, scale, zero_point, numel);
dequantize<uint16_t>(out_data, input_data, scale, zero_point, numel);
} else if (input.scalar_type() == ScalarType::Short) {
const int16_t* input_data = input.const_data_ptr<int16_t>();
impl::reference::kernels::dequantize<int16_t>(
out_data, input_data, scale, zero_point, numel);
dequantize<int16_t>(out_data, input_data, scale, zero_point, numel);
} else {
ET_CHECK_MSG(
false,
Expand Down
1 change: 0 additions & 1 deletion backends/cadence/reference/operators/op_requantize_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ Tensor& requantize_out(
out_data[i] = \
kernels::quantize<dtype>(dequant, 1 / out_scale, out_zero_point); \
};

#define typed_requantize_in(ctype) \
switch (out_dtype) { \
case ScalarType::Byte: { \
Expand Down
32 changes: 16 additions & 16 deletions backends/cadence/reference/operators/quantized_add_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ namespace impl {
namespace reference {
namespace native {

using executorch::aten::Tensor;
using executorch::runtime::KernelRuntimeContext;
using ::executorch::aten::Tensor;
using ::executorch::runtime::KernelRuntimeContext;
using ::impl::reference::kernels::dequantize;
using ::impl::reference::kernels::quantize;

template <typename T>
void quantized_add_per_tensor_impl(
Expand Down Expand Up @@ -48,28 +50,28 @@ void quantized_add_per_tensor_impl(
// Simple case: tensors have the same shape, no broadcasting
if (X_numel == Y_numel && Y_numel == out_numel) {
for (size_t i = 0; i < X_numel; ++i) {
float x = kernels::dequantize<T>(X_data[i], X_scale_f, X_zero_point_i32);
float y = kernels::dequantize<T>(Y_data[i], Y_scale_f, Y_zero_point_i32);
float x = dequantize<T>(X_data[i], X_scale_f, X_zero_point_i32);
float y = dequantize<T>(Y_data[i], Y_scale_f, Y_zero_point_i32);
float z = x + y;
out_data[i] = kernels::quantize<T>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<T>(z, inv_out_scale, out_zero_point_i32);
}
}
// Y is a scalar tensor
else if (Y_numel == 1) {
float y = kernels::dequantize<T>(Y_data[0], Y_scale_f, Y_zero_point_i32);
float y = dequantize<T>(Y_data[0], Y_scale_f, Y_zero_point_i32);
for (size_t i = 0; i < X_numel; ++i) {
float x = kernels::dequantize<T>(X_data[i], X_scale_f, X_zero_point_i32);
float x = dequantize<T>(X_data[i], X_scale_f, X_zero_point_i32);
float z = x + y;
out_data[i] = kernels::quantize<T>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<T>(z, inv_out_scale, out_zero_point_i32);
}
}
// X is a scalar tensor
else if (X_numel == 1) {
float x = kernels::dequantize<T>(X_data[0], X_scale_f, X_zero_point_i32);
float x = dequantize<T>(X_data[0], X_scale_f, X_zero_point_i32);
for (size_t i = 0; i < Y_numel; ++i) {
float y = kernels::dequantize<T>(Y_data[i], Y_scale_f, Y_zero_point_i32);
float y = dequantize<T>(Y_data[i], Y_scale_f, Y_zero_point_i32);
float z = x + y;
out_data[i] = kernels::quantize<T>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<T>(z, inv_out_scale, out_zero_point_i32);
}
}
// General broadcasting case - simplified implementation
Expand All @@ -79,12 +81,10 @@ void quantized_add_per_tensor_impl(
size_t x_idx = (X_numel == 1) ? 0 : i % X_numel;
size_t y_idx = (Y_numel == 1) ? 0 : i % Y_numel;

float x =
kernels::dequantize<T>(X_data[x_idx], X_scale_f, X_zero_point_i32);
float y =
kernels::dequantize<T>(Y_data[y_idx], Y_scale_f, Y_zero_point_i32);
float x = dequantize<T>(X_data[x_idx], X_scale_f, X_zero_point_i32);
float y = dequantize<T>(Y_data[y_idx], Y_scale_f, Y_zero_point_i32);
float z = x + y;
out_data[i] = kernels::quantize<T>(z, inv_out_scale, out_zero_point_i32);
out_data[i] = quantize<T>(z, inv_out_scale, out_zero_point_i32);
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions backends/cadence/reference/operators/quantized_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::getLeadingDims;
using ::executorch::runtime::KernelRuntimeContext;
using ::impl::reference::kernels::dequantize;
using ::impl::reference::kernels::quantize;

namespace impl {
namespace reference {
Expand Down Expand Up @@ -74,10 +76,10 @@ void quantized_layer_norm_per_tensor_(
// y[j] = (x[j] - mean) / std * kGamma + kBeta;
// Since X is quantized, we dequantize it, compute fp32 result, and
// quantize the result to an int8/uint8 value.
float val = kernels::dequantize<T>(x[j], input_scale, input_zero_point);
float val = dequantize<T>(x[j], input_scale, input_zero_point);

val = (val - mean) * inv_std * weight_data[j] + bias_data[j];
y[j] = kernels::quantize<T>(val, output_inv_scale, output_zero_point);
y[j] = quantize<T>(val, output_inv_scale, output_zero_point);
}
}
}
Expand Down
Loading