Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions backends/qualcomm/quantizer/custom_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,40 @@ def annotate_single_in_single_out(
_annotated=True,
)

def annotate_single_in_share_out(
node: Node, quantization_config: QuantizationConfig
) -> None:

input_qspec_map = {}
input_act = node.args[0]
input_qspec_map[input_act] = quantization_config.input_activation

node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
input_qspec_map=input_qspec_map,
output_qspec=SharedQuantizationSpec((input_act, node)),
_annotated=True,
)

def annotate_stack(node: Node, quantization_config: QuantizationConfig) -> None:
input_nodes = node.args[0]

first_input_node = input_nodes[0]
input_qspec_map = {}
input_qspec_map[first_input_node] = quantization_config.input_activation
share_qparams_with_input_act0_qspec = SharedQuantizationSpec(
(first_input_node, node)
)

for input_node in input_nodes[1:]:
if input_node not in input_qspec_map:
input_qspec_map[input_node] = share_qparams_with_input_act0_qspec

node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
input_qspec_map=input_qspec_map,
output_qspec=share_qparams_with_input_act0_qspec,
_annotated=True,
)

def annotate_matmul_input1(node: Node):
quantization_config_8a8w = get_8a8w_qnn_ptq_config(
act_symmetric=True, act_observer=MinMaxObserver
Expand All @@ -247,6 +281,12 @@ def annotate_matmul_input1(node: Node):
]:
annotate_single_in_single_out(node, quantization_config_8a8w)
node = node.args[0]
elif node.target == torch.ops.aten.stack.default:
annotate_stack(node, quantization_config_8a8w)
node = node.args[0]
elif node.target == torch.ops.aten.flatten.using_ints:
annotate_single_in_share_out(node, quantization_config_8a8w)
node = node.args[0]
elif node.target == torch.ops.aten.cat.default:
annotate_cat(node, quantization_config_8a8w)
# For v, we tag 8a until conv op.
Expand Down
Loading