Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions backends/cadence/aot/functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,12 @@
- arg_meta: null
kernel_name: impl::generic::quantize_per_tensor_asym16u_out

- func: cadence::quantize_per_tensor_asym32s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::quantize_per_tensor_asym32s_out

- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
Expand Down Expand Up @@ -238,6 +244,12 @@
- arg_meta: null
kernel_name: impl::generic::dequantize_per_tensor_asym16u_out

- func: cadence::dequantize_per_tensor_asym32s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::dequantize_per_tensor_asym32s_out

- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
Expand Down
11 changes: 11 additions & 0 deletions backends/cadence/aot/functions_hifi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,11 @@
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_asym16s_out

- func: cadence::quantize_per_tensor_asym32s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_asym32s_out

- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
Expand Down Expand Up @@ -339,6 +344,12 @@
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_asym16u_out

- func: cadence::dequantize_per_tensor_asym32s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_asym16s_out

- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
Expand Down
38 changes: 38 additions & 0 deletions backends/cadence/aot/ops_registrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@
"quantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"quantize_per_tensor_asym32s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"quantize_per_tensor_asym32s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"dequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
Expand Down Expand Up @@ -87,6 +94,13 @@
"dequantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"dequantize_per_tensor_asym32s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"dequantize_per_tensor_asym32s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"quantized_layer_norm(Tensor X, Tensor X_scale, Tensor X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point) -> (Tensor Y)"
)
Expand Down Expand Up @@ -641,6 +655,18 @@ def quantize_per_tensor_asym16u_meta(
return input.new_empty(input.size(), dtype=dtype)


@register_fake("cadence::quantize_per_tensor_asym32s")
def quantize_per_tensor_asym32s_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=dtype)


@register_fake("cadence::dequantize_per_tensor")
def dequantize_per_tensor_meta(
input: torch.Tensor,
Expand Down Expand Up @@ -701,6 +727,18 @@ def dequantize_per_tensor_asym16u_meta(
return input.new_empty(input.size(), dtype=torch.float)


@register_fake("cadence::dequantize_per_tensor_asym32s")
def dequantize_per_tensor_asym32s_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=torch.float)


@register_fake("cadence::quantized_add")
def quantized_add_meta(
X: torch.Tensor,
Expand Down
2 changes: 2 additions & 0 deletions backends/cadence/aot/type_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class CompileTimeTypeDispatchPass(ExportPass):
(torch.uint8,): "asym8u",
(torch.int16,): "asym16s",
(torch.uint16,): "asym16s",
(torch.int32,): "asym32s",
},
variant="default",
is_quant_op=True,
Expand All @@ -119,6 +120,7 @@ class CompileTimeTypeDispatchPass(ExportPass):
(torch.uint8,): "asym8u",
(torch.int16,): "asym16s",
(torch.uint16,): "asym16s",
(torch.int32,): "asym32s",
},
variant="default",
),
Expand Down
4 changes: 4 additions & 0 deletions backends/cadence/generic/kernels/kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ typed_quantize_val(int8_t);
typed_quantize_val(uint8_t);
typed_quantize_val(int16_t);
typed_quantize_val(uint16_t);
typed_quantize_val(int32_t);
#undef typed_quantize_val

#define typed_quantize_vec(dtype) \
Expand All @@ -86,6 +87,7 @@ typed_quantize_vec(int8_t);
typed_quantize_vec(uint8_t);
typed_quantize_vec(int16_t);
typed_quantize_vec(uint16_t);
typed_quantize_vec(int32_t);
#undef typed_quantize_vec

#define typed_dequantize_val(dtype) \
Expand All @@ -94,6 +96,7 @@ typed_dequantize_val(int8_t);
typed_dequantize_val(uint8_t);
typed_dequantize_val(int16_t);
typed_dequantize_val(uint16_t);
typed_dequantize_val(int32_t);
#undef typed_dequantize_val

#define typed_dequantize_vec(dtype) \
Expand All @@ -107,6 +110,7 @@ typed_dequantize_vec(int8_t);
typed_dequantize_vec(uint8_t);
typed_dequantize_vec(int16_t);
typed_dequantize_vec(uint16_t);
typed_dequantize_vec(int32_t);
#undef typed_dequantize_vec

} // namespace kernels
Expand Down
19 changes: 19 additions & 0 deletions backends/cadence/generic/operators/dequantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ Tensor& dequantize_per_tensor_out(
} else if (input.scalar_type() == ScalarType::Short) {
const int16_t* input_data = input.const_data_ptr<int16_t>();
dequantize<int16_t>(out_data, input_data, scale, zero_point, numel);
} else if (input.scalar_type() == ScalarType::Int) {
const int32_t* input_data = input.const_data_ptr<int32_t>();
dequantize<int32_t>(out_data, input_data, scale, zero_point, numel);
} else {
ET_CHECK_MSG(
false,
Expand Down Expand Up @@ -117,6 +120,22 @@ Tensor& dequantize_per_tensor_asym16u_out(
return out;
}

Tensor& dequantize_per_tensor_asym32s_out(
KernelRuntimeContext& context,
const Tensor& input,
double scale,
int64_t zero_point,
int64_t quant_min,
int64_t quant_max,
ScalarType dtype,
Tensor& out) {
float* out_data = out.mutable_data_ptr<float>();
size_t numel = out.numel();
const int32_t* input_data = input.const_data_ptr<int32_t>();
dequantize<int32_t>(out_data, input_data, scale, zero_point, numel);
return out;
}

} // namespace native
} // namespace generic
} // namespace impl
19 changes: 19 additions & 0 deletions backends/cadence/generic/operators/quantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ Tensor& quantize_per_tensor_out(
} else if (out.scalar_type() == ScalarType::Short) {
int16_t* out_data = out.mutable_data_ptr<int16_t>();
quantize<int16_t>(out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Int) {
int32_t* out_data = out.mutable_data_ptr<int32_t>();
quantize<int32_t>(out_data, input_data, 1. / scale, zero_point, numel);
} else {
ET_CHECK_MSG(
false,
Expand Down Expand Up @@ -119,6 +122,22 @@ Tensor& quantize_per_tensor_asym16u_out(
return out;
}

Tensor& quantize_per_tensor_asym32s_out(
KernelRuntimeContext& context,
const Tensor& input,
double scale,
int64_t zero_point,
int64_t quant_min,
int64_t quant_max,
ScalarType dtype,
Tensor& out) {
const float* input_data = input.const_data_ptr<float>();
size_t numel = out.numel();
int32_t* out_data = out.mutable_data_ptr<int32_t>();
quantize<int32_t>(out_data, input_data, 1. / scale, zero_point, numel);
return out;
}

}; // namespace native
}; // namespace generic
}; // namespace impl
2 changes: 2 additions & 0 deletions backends/cadence/hifi/kernels/kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ typed_quantize_val(int8_t);
typed_quantize_val(uint8_t);
typed_quantize_val(int16_t);
typed_quantize_val(uint16_t);
typed_quantize_val(int32_t);
#undef typed_quantize_val

#define typed_quantize_vec(dtype) \
Expand All @@ -150,6 +151,7 @@ typed_dequantize_val(int8_t);
typed_dequantize_val(uint8_t);
typed_dequantize_val(int16_t);
typed_dequantize_val(uint16_t);
typed_dequantize_val(int32_t);
#undef typed_dequantize_val

#define typed_dequantize_vec(dtype) \
Expand Down
18 changes: 18 additions & 0 deletions backends/cadence/hifi/operators/op_dequantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ void dequantize_per_tensor_out(
input.scalar_type() == ScalarType::UInt16) {
const uint16_t* input_data = input.const_data_ptr<uint16_t>();
dequantize<uint16_t>(out_data, input_data, scale, zero_point, numel);
} else if (input.scalar_type() == ScalarType::Int) {
const int32_t* input_data = input.const_data_ptr<int32_t>();
dequantize<int32_t>(out_data, input_data, scale, zero_point, numel);
} else {
ET_CHECK_MSG(
false,
Expand Down Expand Up @@ -98,6 +101,21 @@ void dequantize_per_tensor_asym16u_out(
dequantize<uint16_t>(out_data, input_data, scale, zero_point, numel);
}

void dequantize_per_tensor_asym32s_out(
KernelRuntimeContext& context,
const Tensor& input,
double scale,
int64_t zero_point,
int64_t quant_min,
int64_t quant_max,
ScalarType dtype,
Tensor& out) {
float* out_data = out.mutable_data_ptr<float>();
size_t numel = out.numel();
const int32_t* input_data = input.const_data_ptr<int32_t>();
dequantize<int32_t>(out_data, input_data, scale, zero_point, numel);
}

} // namespace native
} // namespace HiFi
} // namespace impl
18 changes: 18 additions & 0 deletions backends/cadence/hifi/operators/op_quantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ void quantize_per_tensor_out(
out.scalar_type() == ScalarType::UInt16) {
uint16_t* out_data = out.mutable_data_ptr<uint16_t>();
quantize<uint16_t>(out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Int) {
int32_t* out_data = out.mutable_data_ptr<int32_t>();
quantize<int32_t>(out_data, input_data, 1. / scale, zero_point, numel);
} else {
ET_KERNEL_CHECK_MSG(
ctx,
Expand Down Expand Up @@ -164,6 +167,21 @@ void quantize_per_tensor_asym16u_out(
quantize<uint16_t>(out_data, input_data, 1. / scale, zero_point, numel);
}

void quantize_per_tensor_asym32s_out(
KernelRuntimeContext& context,
const Tensor& input,
double scale,
int64_t zero_point,
int64_t quant_min,
int64_t quant_max,
ScalarType dtype,
Tensor& out) {
const float* input_data = input.const_data_ptr<float>();
size_t numel = out.numel();
int32_t* out_data = out.mutable_data_ptr<int32_t>();
quantize<int32_t>(out_data, input_data, 1. / scale, zero_point, numel);
}

}; // namespace native
}; // namespace HiFi
}; // namespace impl
Loading