Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/cadence/aot/ops_registrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def quantized_relu_meta(
out_multiplier: torch.Tensor,
out_shift: torch.Tensor,
) -> torch.Tensor:
return X.new_empty(X.size(), dtype=torch.uint8)
return X.new_empty(X.size(), dtype=X.dtype)


@register_fake("cadence::quantized_matmul")
Expand Down
5 changes: 4 additions & 1 deletion backends/cadence/hifi/operators/dequantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ void dequantize_per_tensor_out(
const int32_t* input_data = input.const_data_ptr<int32_t>();
dequantize<int32_t>(out_data, input_data, scale, zero_point, numel);
} else {
ET_CHECK_MSG(false, "Unhandled input dtype %hhd", input.scalar_type());
ET_CHECK_MSG(
false,
"Unhandled input dtype %hhd",
static_cast<int8_t>(input.scalar_type()));
}
}

Expand Down
5 changes: 4 additions & 1 deletion backends/cadence/hifi/operators/quantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ void quantize_per_tensor_out(
cadence::impl::HiFi::kernels::quantize<int32_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else {
ET_CHECK_MSG(false, "Unhandled input dtype %hhd", out.scalar_type());
ET_CHECK_MSG(
false,
"Unhandled output dtype %hhd",
static_cast<int8_t>(out.scalar_type()));
}
}

Expand Down
5 changes: 5 additions & 0 deletions backends/cadence/reference/operators/quantized_conv_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,11 @@ void quantized_conv_out(
output_scale,
(int8_t)output_zero_point,
per_tensor_quantized);
} else {
ET_CHECK_MSG(
false,
"Unhandled input dtype %hhd",
static_cast<int8_t>(input.scalar_type()));
}
}

Expand Down
57 changes: 48 additions & 9 deletions backends/cadence/reference/operators/quantized_linear_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ using executorch::aten::Tensor;
using executorch::runtime::getLeadingDims;
using executorch::runtime::KernelRuntimeContext;

void quantized_linear_out(
KernelRuntimeContext& ctx,
template <typename T>
void inline _typed_quantized_linear(
const Tensor& src,
const Tensor& weight,
const Tensor& bias,
Expand All @@ -27,14 +27,11 @@ void quantized_linear_out(
const Tensor& out_multiplier,
const Tensor& out_shift,
int64_t out_zero_point,
const executorch::aten::optional<Tensor>& offset,
Tensor& out) {
// Assuming uint8_t for now, but needs to be updated for other quantization
// types
const uint8_t* __restrict__ src_data = src.const_data_ptr<uint8_t>();
const uint8_t* __restrict__ weight_data = weight.const_data_ptr<uint8_t>();
const T* __restrict__ src_data = src.const_data_ptr<T>();
const T* __restrict__ weight_data = weight.const_data_ptr<T>();
const int32_t* __restrict__ bias_data = bias.const_data_ptr<int32_t>();
uint8_t* __restrict__ out_data = out.mutable_data_ptr<uint8_t>();
T* __restrict__ out_data = out.mutable_data_ptr<T>();

int32_t weight_zero_point = weight_zero_point_t.const_data_ptr<int32_t>()[0];

Expand Down Expand Up @@ -71,11 +68,53 @@ void quantized_linear_out(
(weight_data[j * N + k] - weight_zero_point);
}
out_data[i * M + j] =
kernels::quantize<uint8_t>(sum, out_scale, out_zero_point);
kernels::quantize<T>(sum, out_scale, out_zero_point);
}
}
}

void quantized_linear_out(
__ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& src,
const Tensor& weight,
const Tensor& bias,
int64_t src_zero_point,
const Tensor& weight_zero_point_t,
const Tensor& out_multiplier,
const Tensor& out_shift,
int64_t out_zero_point,
__ET_UNUSED const executorch::aten::optional<Tensor>& offset,
Tensor& out) {
if (out.scalar_type() == executorch::aten::ScalarType::Byte) {
_typed_quantized_linear<uint8_t>(
src,
weight,
bias,
src_zero_point,
weight_zero_point_t,
out_multiplier,
out_shift,
out_zero_point,
out);
} else if (out.scalar_type() == executorch::aten::ScalarType::Char) {
_typed_quantized_linear<int8_t>(
src,
weight,
bias,
src_zero_point,
weight_zero_point_t,
out_multiplier,
out_shift,
out_zero_point,
out);
} else {
ET_CHECK_MSG(
false,
"Unhandled input dtype %hhd",
static_cast<int8_t>(src.scalar_type()));
}
}

}; // namespace native
}; // namespace reference
}; // namespace impl
5 changes: 5 additions & 0 deletions backends/cadence/reference/operators/quantized_matmul_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ void quantized_matmul_out(
out_zero_point,
transposed,
out);
} else {
ET_CHECK_MSG(
false,
"Unhandled input dtype %hhd",
static_cast<int8_t>(X.scalar_type()));
}
}

Expand Down
Loading