diff --git a/backends/cadence/aot/functions_vision.yaml b/backends/cadence/aot/functions_vision.yaml index 8d9cdd16105..d7c25dd750c 100644 --- a/backends/cadence/aot/functions_vision.yaml +++ b/backends/cadence/aot/functions_vision.yaml @@ -195,6 +195,16 @@ - arg_meta: null kernel_name: impl::vision::native::quantized_conv_out +- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: impl::vision::native::quantized_conv2d_nchw_out + +- func: cadence::quantized_conv2d_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: impl::vision::native::quantized_conv2d_nhwc_out + - func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null diff --git a/backends/cadence/vision/operators/op_quantized_conv_out.cpp b/backends/cadence/vision/operators/op_quantized_conv_out.cpp index 1e1e6c8cdc7..b632f0931c2 100644 --- a/backends/cadence/vision/operators/op_quantized_conv_out.cpp +++ b/backends/cadence/vision/operators/op_quantized_conv_out.cpp @@ -603,6 +603,80 @@ void quantized_conv2d_nhwc_per_tensor_out( out); } +void quantized_conv2d_nchw_out( + KernelRuntimeContext& ctx, + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef dilation, + int64_t groups, + int64_t in_zero_point, + const Tensor& weight_zero_point, + const Tensor& bias_scale, + double output_scale, + int64_t output_zero_point, + const Tensor& out_multiplier, + const Tensor& out_shift, + Tensor& out) { + quantized_conv_out( + ctx, + input, + weight, + bias, + stride, + padding, + dilation, + groups, + in_zero_point, + weight_zero_point, + bias_scale, + output_scale, + output_zero_point, + out_multiplier, + out_shift, + false, // channel_last = false for NCHW + out); +} + +void quantized_conv2d_nhwc_out( + KernelRuntimeContext& ctx, + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef dilation, + int64_t groups, + int64_t in_zero_point, + const Tensor& weight_zero_point, + const Tensor& bias_scale, + double output_scale, + int64_t output_zero_point, + const Tensor& out_multiplier, + const Tensor& out_shift, + Tensor& out) { + quantized_conv_out( + ctx, + input, + weight, + bias, + stride, + padding, + dilation, + groups, + in_zero_point, + weight_zero_point, + bias_scale, + output_scale, + output_zero_point, + out_multiplier, + out_shift, + true, // channel_last = true for NHWC + out); +} + } // namespace native } // namespace vision } // namespace impl diff --git a/backends/cadence/vision/operators/operators.h b/backends/cadence/vision/operators/operators.h index 36c4486bf85..6842fad41fd 100644 --- a/backends/cadence/vision/operators/operators.h +++ b/backends/cadence/vision/operators/operators.h @@ -55,6 +55,42 @@ inline __attribute__((always_inline)) void linear_( } } +void quantized_conv2d_nchw_out( + ::executorch::runtime::KernelRuntimeContext& ctx, + const ::executorch::aten::Tensor& input, + const ::executorch::aten::Tensor& weight, + const ::executorch::aten::Tensor& bias, + ::executorch::aten::IntArrayRef stride, + ::executorch::aten::IntArrayRef padding, + ::executorch::aten::IntArrayRef dilation, + int64_t groups, + int64_t in_zero_point, + const ::executorch::aten::Tensor& weight_zero_point, + const ::executorch::aten::Tensor& bias_scale, + double output_scale, + int64_t output_zero_point, + const ::executorch::aten::Tensor& out_multiplier, + const ::executorch::aten::Tensor& out_shift, + ::executorch::aten::Tensor& out); + +void quantized_conv2d_nhwc_out( + ::executorch::runtime::KernelRuntimeContext& ctx, + const ::executorch::aten::Tensor& input, + const ::executorch::aten::Tensor& weight, + const ::executorch::aten::Tensor& bias, + ::executorch::aten::IntArrayRef stride, + ::executorch::aten::IntArrayRef padding, + ::executorch::aten::IntArrayRef dilation, + int64_t groups, + int64_t in_zero_point, + const ::executorch::aten::Tensor& weight_zero_point, + const ::executorch::aten::Tensor& bias_scale, + double output_scale, + int64_t output_zero_point, + const ::executorch::aten::Tensor& out_multiplier, + const ::executorch::aten::Tensor& out_shift, + ::executorch::aten::Tensor& out); + } // namespace native } // namespace vision } // namespace impl