|
13 | 13 | from executorch.backends.nxp.quantizer.utils import get_bias_qparams |
14 | 14 | from torch import fx |
15 | 15 | from torch._ops import OpOverload |
| 16 | +from torch.fx import Node |
16 | 17 | from torchao.quantization.pt2e import PerChannelMinMaxObserver |
17 | 18 | from torchao.quantization.pt2e.quantizer import ( |
18 | 19 | DerivedQuantizationSpec, |
19 | 20 | FixedQParamsQuantizationSpec, |
20 | 21 | QuantizationSpec, |
21 | 22 | SharedQuantizationSpec, |
22 | 23 | ) |
| 24 | + |
23 | 25 | from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY |
24 | 26 |
|
25 | 27 |
|
@@ -199,7 +201,6 @@ def partition_types(self) -> list[OpOverload]: |
199 | 201 | def get_anchors( |
200 | 202 | self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] |
201 | 203 | ) -> PartitionAnchors: |
202 | | - # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge... |
203 | 204 | addmm_node = fused_partition[0].nodes[-1] |
204 | 205 |
|
205 | 206 | bias_qspec = DerivedQuantizationSpec( |
@@ -745,3 +746,147 @@ def get_anchors( |
745 | 746 | return get_anchors_for_fixed_quant_specs( |
746 | 747 | fused_partition, scale=1.0 / 128.0, zero_point=0 |
747 | 748 | ) |
| 749 | + |
| 750 | + |
| 751 | +class ActivationsConcatClusterPattern(QuantizationPattern): |
| 752 | + """ |
| 753 | + Quantizer for activations concat cluster pattern. |
| 754 | +
|
| 755 | + The quantizer matches a pattern where concat node is preceded by activation nodes preceded by Conv 2D or Linear. |
| 756 | + All activation nodes quantization parameters must be the same. Only activations, that have support for fusion |
| 757 | + to preceding compute node on Neutron are allowed. This cluster is usually produced by MoveActivationBeforeConcat |
| 758 | + pass. Cluster schema: |
| 759 | +
|
| 760 | + │ │ |
| 761 | + ┌──────▼──────┐ ┌──────▼──────┐ |
| 762 | + │ aten.conv2d │ ... │ aten.conv2d │ |
| 763 | + └──────┬──────┘ └──────┬──────┘ |
| 764 | + │ │ |
| 765 | + ┌─────▼─────┐ ┌─────▼─────┐ |
| 766 | + │ aten.relu │ ... │ aten.relu │ |
| 767 | + └─────┬─────┘ └─────┬─────┘ |
| 768 | + └───────┐ ┌───────┘ |
| 769 | + ┌──▼─────▼─┐ |
| 770 | + │ aten.cat │ |
| 771 | + └────┬─────┘ |
| 772 | + │ |
| 773 | + """ |
| 774 | + |
| 775 | + def __init__(self, neutron_quantizer): |
| 776 | + self.neutron_quantizer = neutron_quantizer |
| 777 | + self.neutron_target_info = ( |
| 778 | + self.neutron_quantizer.neutron_target_spec.neutron_target_info |
| 779 | + ) |
| 780 | + |
| 781 | + @staticmethod |
| 782 | + def _all_activations_are_equal(activations: list[Node]) -> bool: |
| 783 | + first_input_node = activations[0] |
| 784 | + hardtanh_t = [ |
| 785 | + torch.ops.aten.hardtanh.default, |
| 786 | + torch.ops.aten.hardtanh_.default, |
| 787 | + ] |
| 788 | + relu_t = [ |
| 789 | + torch.ops.aten.relu.default, |
| 790 | + torch.ops.aten.relu_.default, |
| 791 | + ] |
| 792 | + tanh_t = [ |
| 793 | + torch.ops.aten.tanh.default, |
| 794 | + torch.ops.aten.tanh_.default, |
| 795 | + ] |
| 796 | + |
| 797 | + def _activations_are_equal(activation1: Node, activation2: Node) -> bool: |
| 798 | + if ( # Targets are equal also with their inplace variants |
| 799 | + activation1.target in hardtanh_t |
| 800 | + and activation2.target in hardtanh_t |
| 801 | + or activation1.target in relu_t |
| 802 | + and activation2.target in relu_t |
| 803 | + or activation1.target in tanh_t |
| 804 | + and activation2.target in tanh_t |
| 805 | + or activation1.target == torch.ops.aten.sigmoid.default |
| 806 | + and activation2.target == torch.ops.aten.sigmoid.default |
| 807 | + ): |
| 808 | + return True |
| 809 | + elif ( # Hardtanh with min_val 0 and max_val 'inf' is equal to Relu |
| 810 | + activation1.target in hardtanh_t |
| 811 | + and activation1.args[1:] == (0.0, float("inf")) |
| 812 | + and activation2.target in relu_t |
| 813 | + or activation1.target in relu_t |
| 814 | + and activation2.target in hardtanh_t |
| 815 | + and activation2.args[1:] == (0.0, float("inf")) |
| 816 | + ): |
| 817 | + return True |
| 818 | + else: |
| 819 | + return False |
| 820 | + |
| 821 | + return all( |
| 822 | + _activations_are_equal(activation, first_input_node) |
| 823 | + for activation in activations |
| 824 | + ) |
| 825 | + |
| 826 | + def partition_types(self) -> list[OpOverload]: |
| 827 | + return [torch.ops.aten.cat.default] |
| 828 | + |
| 829 | + def get_anchors( |
| 830 | + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] |
| 831 | + ) -> PartitionAnchors | None: |
| 832 | + cat_node = fused_partition[0].nodes[-1] |
| 833 | + |
| 834 | + # Check all cat inputs are supported activations |
| 835 | + if not all( |
| 836 | + self.neutron_target_info.is_supported_fused_activation__aten(input_node) |
| 837 | + for input_node in cat_node.all_input_nodes |
| 838 | + ): |
| 839 | + return None |
| 840 | + |
| 841 | + # Check all cat inputs are equal activations |
| 842 | + if not self._all_activations_are_equal(cat_node.all_input_nodes): |
| 843 | + return None |
| 844 | + |
| 845 | + # Check compute nodes are Conv 2D or Linear |
| 846 | + if not all( |
| 847 | + self.neutron_target_info.is_fusable_conv_or_linear__aten(compute_node) |
| 848 | + for input_node in cat_node.all_input_nodes |
| 849 | + for compute_node in input_node.all_input_nodes |
| 850 | + ): |
| 851 | + return None |
| 852 | + |
| 853 | + # Annotate compute nodes |
| 854 | + for input_node in cat_node.all_input_nodes: |
| 855 | + for compute_node in input_node.all_input_nodes: |
| 856 | + if compute_node.target not in self.neutron_quantizer.op_to_quantizer: |
| 857 | + return None |
| 858 | + compute_node_quantizer = self.neutron_quantizer.op_to_quantizer[ |
| 859 | + compute_node.target |
| 860 | + ] |
| 861 | + compute_node_quantizer.annotate(gm) |
| 862 | + del compute_node.meta["quantization_annotation"].output_qspec |
| 863 | + |
| 864 | + # Annotate activations |
| 865 | + for input_node in cat_node.all_input_nodes: |
| 866 | + if input_node.target not in self.neutron_quantizer.op_to_quantizer: |
| 867 | + return None |
| 868 | + activation_quantizer = self.neutron_quantizer.op_to_quantizer[ |
| 869 | + input_node.target |
| 870 | + ] |
| 871 | + activation_quantizer.annotate(gm) |
| 872 | + input_node.meta["quantization_annotation"].input_qspec_map = {} |
| 873 | + |
| 874 | + # Annotate cat node |
| 875 | + inputs = [] |
| 876 | + first_input_node = cat_node.all_input_nodes[0] |
| 877 | + for idx in range(len(cat_node.all_input_nodes)): |
| 878 | + inputs.append( |
| 879 | + ( |
| 880 | + cat_node, |
| 881 | + NodeArgsIdx(0, idx), |
| 882 | + SharedQuantizationSpec(first_input_node), |
| 883 | + ) |
| 884 | + ) |
| 885 | + outputs = [(cat_node, SharedQuantizationSpec(first_input_node))] |
| 886 | + |
| 887 | + return PartitionAnchors( |
| 888 | + inputs=inputs, |
| 889 | + weights=[], |
| 890 | + biases=[], |
| 891 | + output=outputs, |
| 892 | + ) |
0 commit comments