Skip to content

Commit bec2936

Browse files
mcremon-metafacebook-github-bot
authored andcommitted
Add missing pieces for quantized conv1d support
Summary: As titled. This should fix the all registration issues and the `conv1d_ncl` version is functional. `conv1d_nlc` seems to have issues still. Looking into that next. Reviewed By: hsharma35 Differential Revision: D82771653
1 parent 2283294 commit bec2936

15 files changed

+1687
-149
lines changed

backends/cadence/aot/TARGETS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ executorch_generated_lib(
149149
"//executorch/backends/cadence/generic/operators:dequantize_per_tensor",
150150
"//executorch/backends/cadence/generic/operators:quantize_per_tensor",
151151
"//executorch/backends/cadence/generic/operators:quantized_add_out",
152+
"//executorch/backends/cadence/generic/operators:quantized_conv1d_ncl_out",
153+
"//executorch/backends/cadence/generic/operators:quantized_conv1d_nlc_out",
152154
"//executorch/backends/cadence/generic/operators:quantized_conv2d_nchw_out",
153155
"//executorch/backends/cadence/generic/operators:quantized_conv2d_nhwc_out",
154156
"//executorch/backends/cadence/generic/operators:quantized_fully_connected_out",

backends/cadence/aot/functions.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,16 @@
250250
- arg_meta: null
251251
kernel_name: impl::generic::dequantize_per_tensor_asym32s_out
252252

253+
- func: cadence::quantized_conv1d_ncl.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
254+
kernels:
255+
- arg_meta: null
256+
kernel_name: impl::generic::quantized_conv1d_ncl_out
257+
258+
- func: cadence::quantized_conv1d_nlc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
259+
kernels:
260+
- arg_meta: null
261+
kernel_name: impl::generic::quantized_conv1d_nlc_out
262+
253263
- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
254264
kernels:
255265
- arg_meta: null
@@ -419,6 +429,16 @@
419429
- arg_meta: null
420430
kernel_name: impl::generic::quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out
421431

432+
- func: cadence::quantized_conv1d_ncl.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
433+
kernels:
434+
- arg_meta: null
435+
kernel_name: impl::generic::quantized_conv1d_ncl_per_tensor_out
436+
437+
- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
438+
kernels:
439+
- arg_meta: null
440+
kernel_name: impl::generic::quantized_conv1d_nlc_per_tensor_out
441+
422442
- func: cadence::quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
423443
kernels:
424444
- arg_meta: null

backends/cadence/aot/functions_hifi.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,16 @@
350350
- arg_meta: null
351351
kernel_name: impl::HiFi::dequantize_per_tensor_asym16s_out
352352

353+
- func: cadence::quantized_conv1d_ncl.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
354+
kernels:
355+
- arg_meta: null
356+
kernel_name: impl::HiFi::quantized_conv1d_ncl_out
357+
358+
- func: cadence::quantized_conv1d_nlc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
359+
kernels:
360+
- arg_meta: null
361+
kernel_name: impl::HiFi::quantized_conv1d_nlc_out
362+
353363
- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
354364
kernels:
355365
- arg_meta: null
@@ -430,6 +440,16 @@
430440
- arg_meta: null
431441
kernel_name: impl::HiFi::quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out
432442

443+
- func: cadence::quantized_conv1d_ncl.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
444+
kernels:
445+
- arg_meta: null
446+
kernel_name: impl::HiFi::quantized_conv1d_ncl_per_tensor_out
447+
448+
- func: cadence::quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
449+
kernels:
450+
- arg_meta: null
451+
kernel_name: impl::HiFi::quantized_conv1d_nlc_per_tensor_out
452+
433453
- func: cadence::quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
434454
kernels:
435455
- arg_meta: null

backends/cadence/aot/ops_registrations.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,30 @@
235235
lib.define(
236236
"quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
237237
)
238+
lib.define(
239+
"quantized_conv1d_nlc(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)"
240+
)
241+
lib.define(
242+
"quantized_conv1d_nlc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)"
243+
)
244+
lib.define(
245+
"quantized_conv1d_ncl(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)"
246+
)
247+
lib.define(
248+
"quantized_conv1d_ncl.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)"
249+
)
250+
lib.define(
251+
"quantized_conv1d_ncl.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
252+
)
253+
lib.define(
254+
"quantized_conv1d_ncl.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
255+
)
256+
lib.define(
257+
"quantized_conv1d_nlc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
258+
)
259+
lib.define(
260+
"quantized_conv1d_nlc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
261+
)
238262
lib.define(
239263
"quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
240264
)
@@ -934,6 +958,94 @@ def quantized_conv2d_nhwc_meta(
934958
return input.new_empty(output_size, dtype=input.dtype)
935959

936960

961+
@register_fake("cadence::quantized_conv1d_nlc")
962+
def quantized_conv1d_nlc_meta(
963+
input: torch.Tensor,
964+
weight: torch.Tensor,
965+
bias: torch.Tensor,
966+
stride: Tuple[int],
967+
padding: Tuple[int],
968+
dilation: Tuple[int],
969+
groups: int,
970+
in_zero_point: int,
971+
weight_zero_point: torch.Tensor,
972+
bias_scale: torch.Tensor,
973+
output_scale: float,
974+
output_zero_point: int,
975+
out_multiplier: torch.Tensor,
976+
out_shift: torch.Tensor,
977+
) -> torch.Tensor:
978+
out_channels, *kernel_size, _ = weight.shape
979+
980+
in_size = input.shape
981+
# Assert that the input tensor has at least 3 dimensions, and at most 6
982+
assert len(in_size) > 2
983+
assert len(in_size) < 6
984+
985+
# Compute the output tensor size
986+
output_size = (
987+
get_conv1d_output_size(
988+
in_size,
989+
out_channels,
990+
stride[1],
991+
padding[1],
992+
dilation[1],
993+
kernel_size[0],
994+
True,
995+
)
996+
if len(in_size) == 3
997+
else get_conv2d_output_size(
998+
in_size, out_channels, stride, padding, dilation, kernel_size, True
999+
)
1000+
)
1001+
1002+
return input.new_empty(output_size, dtype=input.dtype)
1003+
1004+
1005+
@register_fake("cadence::quantized_conv1d_ncl")
1006+
def quantized_conv1d_ncl_meta(
1007+
input: torch.Tensor,
1008+
weight: torch.Tensor,
1009+
bias: torch.Tensor,
1010+
stride: Tuple[int],
1011+
padding: Tuple[int],
1012+
dilation: Tuple[int],
1013+
groups: int,
1014+
in_zero_point: int,
1015+
weight_zero_point: torch.Tensor,
1016+
bias_scale: torch.Tensor,
1017+
output_scale: float,
1018+
output_zero_point: int,
1019+
out_multiplier: torch.Tensor,
1020+
out_shift: torch.Tensor,
1021+
) -> torch.Tensor:
1022+
out_channels, _, *kernel_size = weight.shape
1023+
1024+
in_size = input.shape
1025+
# Assert that the input tensor has at least 3 dimensions, and at most 6
1026+
assert len(in_size) > 2
1027+
assert len(in_size) < 6
1028+
1029+
# Compute the output tensor size
1030+
output_size = (
1031+
get_conv1d_output_size(
1032+
in_size,
1033+
out_channels,
1034+
stride[1],
1035+
padding[1],
1036+
dilation[1],
1037+
kernel_size[0],
1038+
False,
1039+
)
1040+
if len(in_size) == 3
1041+
else get_conv2d_output_size(
1042+
in_size, out_channels, stride, padding, dilation, kernel_size, False
1043+
)
1044+
)
1045+
1046+
return input.new_empty(output_size, dtype=input.dtype)
1047+
1048+
9371049
@register_fake("cadence::quantized_conv2d_nchw")
9381050
def quantized_conv2d_nchw_meta(
9391051
input: torch.Tensor,
@@ -2371,6 +2483,68 @@ def roi_align_box_processor_meta(
23712483
return rois.new_empty((rois.shape[0], 80), dtype=torch.uint8)
23722484

23732485

2486+
@register_fake("cadence::quantized_conv1d_ncl.per_tensor")
2487+
def quantized_conv1d_ncl_per_tensor_meta(
2488+
input: torch.Tensor,
2489+
weight: torch.Tensor,
2490+
bias: torch.Tensor,
2491+
stride: Tuple[int],
2492+
padding: Tuple[int],
2493+
dilation: Tuple[int],
2494+
groups: int,
2495+
in_zero_point: int,
2496+
weight_zero_point: int,
2497+
bias_scale: float,
2498+
output_scale: float,
2499+
output_zero_point: int,
2500+
out_multiplier: int,
2501+
out_shift: int,
2502+
) -> torch.Tensor:
2503+
assert input.dim() == 3 and weight.dim() == 3
2504+
out_channels, _, kernel_size = weight.shape
2505+
output_size = get_conv1d_output_size(
2506+
input.shape,
2507+
out_channels,
2508+
stride[1],
2509+
padding[1],
2510+
dilation[1],
2511+
kernel_size,
2512+
False,
2513+
)
2514+
return input.new_empty(output_size, dtype=input.dtype)
2515+
2516+
2517+
@register_fake("cadence::quantized_conv1d_nlc.per_tensor")
2518+
def quantized_conv1d_nlc_per_tensor_meta(
2519+
input: torch.Tensor,
2520+
weight: torch.Tensor,
2521+
bias: torch.Tensor,
2522+
stride: Tuple[int],
2523+
padding: Tuple[int],
2524+
dilation: Tuple[int],
2525+
groups: int,
2526+
in_zero_point: int,
2527+
weight_zero_point: int,
2528+
bias_scale: float,
2529+
output_scale: float,
2530+
output_zero_point: int,
2531+
out_multiplier: int,
2532+
out_shift: int,
2533+
) -> torch.Tensor:
2534+
assert input.dim() == 3 and weight.dim() == 3
2535+
out_channels, _, kernel_size = weight.shape
2536+
output_size = get_conv1d_output_size(
2537+
input.shape,
2538+
out_channels,
2539+
stride[1],
2540+
padding[1],
2541+
dilation[1],
2542+
kernel_size,
2543+
True,
2544+
)
2545+
return input.new_empty(output_size, dtype=input.dtype)
2546+
2547+
23742548
@register_fake("cadence::quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor")
23752549
def quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_meta(
23762550
input: torch.Tensor,

backends/cadence/aot/quantizer/patterns.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def get_anchors(
247247
)
248248

249249
def replacement_op(self) -> OpOverload:
250-
return torch.ops.cadence.quantized_conv2d_nchw.default
250+
return torch.ops.cadence.quantized_conv1d_ncl.default
251251

252252

253253
class Conv2dPattern(QuantizationPattern):
@@ -459,29 +459,35 @@ def get_anchors(
459459
output=[(relu_node,)], # Output is from the relu node
460460
)
461461

462-
def replacement_op(self) -> OpOverload:
463-
return torch.ops.cadence.quantized_conv2d_nchw.default
464-
465462

466463
# Conv1d + regular relu op fusion
467464
class Conv1dReluPattern0(ConvReluBasePattern):
468465
def partition_types(self) -> List[OpOverload]:
469466
return [torch.ops.aten.conv1d.default, torch.ops.aten.relu.default]
470467

468+
def replacement_op(self) -> OpOverload:
469+
return torch.ops.cadence.quantized_conv1d_ncl.default
471470

472471
# Conv1d + alternate relu op fusion
473472
class Conv1dReluPattern1(ConvReluBasePattern):
474473
def partition_types(self) -> List[OpOverload]:
475474
return [torch.ops.aten.conv1d.default, torch.ops.aten.relu_.default]
476475

476+
def replacement_op(self) -> OpOverload:
477+
return torch.ops.cadence.quantized_conv1d_ncl.default
477478

478479
# Conv2d + regular relu op fusion
479480
class Conv2dReluPattern0(ConvReluBasePattern):
480481
def partition_types(self) -> List[OpOverload]:
481482
return [torch.ops.aten.conv2d.default, torch.ops.aten.relu.default]
482483

484+
def replacement_op(self) -> OpOverload:
485+
return torch.ops.cadence.quantized_conv2d_nchw.default
483486

484487
# Conv2d + alternate relu op fusion
485488
class Conv2dReluPattern1(ConvReluBasePattern):
486489
def partition_types(self) -> List[OpOverload]:
487490
return [torch.ops.aten.conv2d.default, torch.ops.aten.relu_.default]
491+
492+
def replacement_op(self) -> OpOverload:
493+
return torch.ops.cadence.quantized_conv2d_nchw.default

0 commit comments

Comments
 (0)