Skip to content

Commit 96832de

Browse files
zonglinpengfacebook-github-bot
authored andcommitted
link new vision kernel internally
Summary: titled Reviewed By: hsharma35 Differential Revision: D83810321
1 parent b021fd0 commit 96832de

File tree

12 files changed

+184
-73
lines changed

12 files changed

+184
-73
lines changed

backends/cadence/vision/operators/op_dequantize_per_tensor.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,25 +31,24 @@ void dequantize_per_tensor_out(
3131

3232
if (input.scalar_type() == ScalarType::Byte) {
3333
const uint8_t* input_data = input.const_data_ptr<uint8_t>();
34-
impl::vision::native::kernels::dequantize<uint8_t>(
34+
kernels::dequantize<uint8_t>(
3535
out_data, input_data, scale, zero_point, numel);
3636
} else if (input.scalar_type() == ScalarType::Char) {
3737
const int8_t* input_data = input.const_data_ptr<int8_t>();
38-
impl::vision::native::kernels::dequantize<int8_t>(
39-
out_data, input_data, scale, zero_point, numel);
38+
kernels::dequantize<int8_t>(out_data, input_data, scale, zero_point, numel);
4039
} else if (
4140
input.scalar_type() == ScalarType::Bits16 ||
4241
input.scalar_type() == ScalarType::UInt16) {
4342
const uint16_t* input_data = input.const_data_ptr<uint16_t>();
44-
impl::vision::native::kernels::dequantize<uint16_t>(
43+
kernels::dequantize<uint16_t>(
4544
out_data, input_data, scale, zero_point, numel);
4645
} else if (input.scalar_type() == ScalarType::Short) {
4746
const int16_t* input_data = input.const_data_ptr<int16_t>();
48-
impl::vision::native::kernels::dequantize<int16_t>(
47+
kernels::dequantize<int16_t>(
4948
out_data, input_data, scale, zero_point, numel);
5049
} else if (input.scalar_type() == ScalarType::Int) {
5150
const int32_t* input_data = input.const_data_ptr<int32_t>();
52-
impl::vision::native::kernels::dequantize<int32_t>(
51+
kernels::dequantize<int32_t>(
5352
out_data, input_data, scale, zero_point, numel);
5453
} else {
5554
ET_CHECK_MSG(

backends/cadence/vision/operators/op_quantize_per_tensor.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,25 +33,25 @@ void quantize_per_tensor_out(
3333

3434
if (out.scalar_type() == ScalarType::Byte) {
3535
uint8_t* out_data = out.mutable_data_ptr<uint8_t>();
36-
impl::vision::native::kernels::quantize<uint8_t>(
36+
kernels::quantize<uint8_t>(
3737
out_data, input_data, 1. / scale, zero_point, numel);
3838
} else if (out.scalar_type() == ScalarType::Char) {
3939
int8_t* out_data = out.mutable_data_ptr<int8_t>();
40-
impl::vision::native::kernels::quantize<int8_t>(
40+
kernels::quantize<int8_t>(
4141
out_data, input_data, 1. / scale, zero_point, numel);
4242
} else if (
4343
out.scalar_type() == ScalarType::Bits16 ||
4444
out.scalar_type() == ScalarType::UInt16) {
4545
uint16_t* out_data = out.mutable_data_ptr<uint16_t>();
46-
impl::vision::native::kernels::quantize<uint16_t>(
46+
kernels::quantize<uint16_t>(
4747
out_data, input_data, 1. / scale, zero_point, numel);
4848
} else if (out.scalar_type() == ScalarType::Short) {
4949
int16_t* out_data = out.mutable_data_ptr<int16_t>();
50-
impl::vision::native::kernels::quantize<int16_t>(
50+
kernels::quantize<int16_t>(
5151
out_data, input_data, 1. / scale, zero_point, numel);
5252
} else if (out.scalar_type() == ScalarType::Int) {
5353
int32_t* out_data = out.mutable_data_ptr<int32_t>();
54-
impl::vision::native::kernels::quantize<int32_t>(
54+
kernels::quantize<int32_t>(
5555
out_data, input_data, 1. / scale, zero_point, numel);
5656
} else {
5757
ET_CHECK_MSG(

backends/cadence/vision/operators/op_quantized_conv_out.cpp

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic(
141141
if (quantized) {
142142
float val = bias_scale * acc;
143143
out_plane[_oh * ow + _ow] =
144-
::impl::vision::native::kernels::quantize<OT>(
145-
val, inv_out_scale, out_zero_point);
144+
kernels::quantize<OT>(val, inv_out_scale, out_zero_point);
146145
} else {
147146
out_plane[_oh * ow + _ow] = acc;
148147
}
@@ -267,8 +266,8 @@ __attribute__((noinline)) void conv2d_nhwc_core_generic(
267266
}
268267
if (quantized) {
269268
float val = bias_scale * acc;
270-
out_line[_oc] = ::impl::vision::native::kernels::quantize<OT>(
271-
val, inv_out_scale, out_zero_point);
269+
out_line[_oc] =
270+
kernels::quantize<OT>(val, inv_out_scale, out_zero_point);
272271
} else {
273272
out_line[_oc] = acc;
274273
}
@@ -530,6 +529,80 @@ void quantized_conv_per_tensor_out(
530529
}
531530
}
532531

532+
void quantized_conv2d_nchw_per_tensor_out(
533+
KernelRuntimeContext& ctx,
534+
const Tensor& input,
535+
const Tensor& weight,
536+
const Tensor& bias,
537+
IntArrayRef stride,
538+
IntArrayRef padding,
539+
IntArrayRef dilation,
540+
int64_t groups,
541+
int64_t in_zero_point,
542+
int64_t weight_zero_point,
543+
double bias_scale,
544+
double output_scale,
545+
int64_t output_zero_point,
546+
int64_t out_multiplier,
547+
int64_t out_shift,
548+
Tensor& out) {
549+
quantized_conv_per_tensor_out(
550+
ctx,
551+
input,
552+
weight,
553+
bias,
554+
stride,
555+
padding,
556+
dilation,
557+
groups,
558+
in_zero_point,
559+
weight_zero_point,
560+
bias_scale,
561+
output_scale,
562+
output_zero_point,
563+
out_multiplier,
564+
out_shift,
565+
false, // channel_last = false for NCHW
566+
out);
567+
}
568+
569+
void quantized_conv2d_nhwc_per_tensor_out(
570+
KernelRuntimeContext& ctx,
571+
const Tensor& input,
572+
const Tensor& weight,
573+
const Tensor& bias,
574+
IntArrayRef stride,
575+
IntArrayRef padding,
576+
IntArrayRef dilation,
577+
int64_t groups,
578+
int64_t in_zero_point,
579+
int64_t weight_zero_point,
580+
double bias_scale,
581+
double output_scale,
582+
int64_t output_zero_point,
583+
int64_t out_multiplier,
584+
int64_t out_shift,
585+
Tensor& out) {
586+
quantized_conv_per_tensor_out(
587+
ctx,
588+
input,
589+
weight,
590+
bias,
591+
stride,
592+
padding,
593+
dilation,
594+
groups,
595+
in_zero_point,
596+
weight_zero_point,
597+
bias_scale,
598+
output_scale,
599+
output_zero_point,
600+
out_multiplier,
601+
out_shift,
602+
true, // channel_last = true for NHWC
603+
out);
604+
}
605+
533606
} // namespace native
534607
} // namespace vision
535608
} // namespace impl

backends/cadence/vision/operators/op_softmax.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#include <api.h>
109
#include <executorch/backends/cadence/vision/kernels/kernels.h>
1110
#include <executorch/kernels/portable/cpu/util/activation_ops_util.h>
1211
#include <executorch/kernels/portable/cpu/util/functional_util.h>
1312
#include <executorch/kernels/portable/cpu/util/reduce_util.h>
1413
#include <executorch/runtime/kernel/kernel_includes.h>
15-
#include <idma_init.h>
14+
#include <include/api.h>
15+
#include <include_private/idma_init.h>
1616
#include <stdio.h>
1717

1818
using executorch::aten::ScalarType;

backends/cadence/vision/operators/quantized_ops.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ inline __attribute__((always_inline)) void quantized_linear_per_tensor_(
4949
(int32_t)weight_data[j * in_dim + k] - (int32_t)weight_zero_point;
5050
sum += x * w;
5151
}
52-
out_data[i * out_dim + j] = ::impl::vision::native::kernels::quantize<T>(
52+
out_data[i * out_dim + j] = impl::vision::kernels::quantize<T>(
5353
sum, requant_scale, out_zero_point);
5454
}
5555
}
@@ -121,8 +121,8 @@ inline __attribute__((always_inline)) void quantized_linear_per_channel_(
121121
// Compute the out_scale from out_multiplier and out_shift
122122
const float out_scale =
123123
-out_multiplier_data[j] * 1.0 / (1 << 31) * pow(2, out_shift_data[j]);
124-
out_data[i * out_dim + j] = ::impl::vision::native::kernels::quantize<T>(
125-
sum, out_scale, out_zero_point);
124+
out_data[i * out_dim + j] =
125+
impl::vision::kernels::quantize<T>(sum, out_scale, out_zero_point);
126126
}
127127
}
128128
}

backends/cadence/vision/operators/targets.bzl

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,25 @@ def define_operator(name: str, deps: list[str] | None = None) -> None:
2121
if deps == None:
2222
deps = []
2323

24+
# Determine which headers to export based on operator name
25+
exported_headers = ["operators.h"]
26+
27+
# Add quantized_ops.h header for quantized operators
28+
quantized_ops = [
29+
"quantized_fully_connected_out",
30+
"quantized_matmul_out",
31+
"quantized_layer_norm",
32+
"quantized_relu_out",
33+
"quantized_conv_out",
34+
"quantized_linear_out",
35+
"quantize_per_tensor",
36+
"dequantize_per_tensor",
37+
"requantize_out"
38+
]
39+
40+
if name in quantized_ops:
41+
exported_headers.append("quantized_ops.h")
42+
2443
runtime.cxx_library(
2544
name = op_name,
2645
srcs = [op_name + ".cpp"],
@@ -31,7 +50,7 @@ def define_operator(name: str, deps: list[str] | None = None) -> None:
3150
],
3251
compatible_with = ["ovr_config//cpu:xtensa"],
3352
deps = deps + common_deps,
34-
exported_headers = ["operators.h"],
53+
exported_headers = exported_headers,
3554
)
3655

3756
OPERATORS = [
Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,36 @@
11
#ifndef __IDMA__INIT_H__
22
#define __IDMA__INIT_H__
33

4-
#include "dtypes.h"
4+
#include "../include/dtypes.h"
55
#include "common.h"
66

7-
#define IDMA_BUFF_SIZE 16384 // 16 kb DRAM storage. Assume 4 buffers (2 input and 2 output)
7+
#define IDMA_BUFF_SIZE \
8+
16384 // 16 kb DRAM storage. Assume 4 buffers (2 input and 2 output)
89

910
#ifndef PLACE_IN_DRAM0
10-
#define PLACE_IN_DRAM0 __attribute__ ((aligned(2*IVP_SIMD_WIDTH), section(".dram0.data")))
11+
#define PLACE_IN_DRAM0 \
12+
__attribute__((aligned(2 * IVP_SIMD_WIDTH), section(".dram0.data")))
1113
#endif
1214

1315
#ifndef PLACE_IN_DRAM1
14-
#define PLACE_IN_DRAM1 __attribute__ ((aligned(2*IVP_SIMD_WIDTH), section(".dram1.data")))
16+
#define PLACE_IN_DRAM1 \
17+
__attribute__((aligned(2 * IVP_SIMD_WIDTH), section(".dram1.data")))
1518
#endif
1619

1720
float32_t data_dram0[IDMA_BUFF_SIZE / 2] PLACE_IN_DRAM0;
1821
float32_t data_dram1[IDMA_BUFF_SIZE / 2] PLACE_IN_DRAM1;
1922

20-
float32_t *inpData[2] = {&data_dram0[0], &data_dram1[0]};
21-
float32_t *outData[2] = {&data_dram0[IDMA_BUFF_SIZE / 4], &data_dram1[IDMA_BUFF_SIZE / 4]};
23+
float32_t* inpData[2] = {&data_dram0[0], &data_dram1[0]};
24+
float32_t* outData[2] = {
25+
&data_dram0[IDMA_BUFF_SIZE / 4],
26+
&data_dram1[IDMA_BUFF_SIZE / 4]};
2227

2328
IDMA_BUFFER_DEFINE(buffer_idma_ch0, 1, IDMA_2D_DESC);
2429
IDMA_BUFFER_DEFINE(buffer_idma_ch1, 1, IDMA_2D_DESC);
2530

26-
idma_buffer_t * descbuf[] = {
27-
buffer_idma_ch0,
28-
buffer_idma_ch1,
31+
idma_buffer_t* descbuf[] = {
32+
buffer_idma_ch0,
33+
buffer_idma_ch1,
2934
};
3035

31-
#endif // __IDMA__INIT_H__
36+
#endif // __IDMA__INIT_H__

0 commit comments

Comments
 (0)