Skip to content

Commit bb55ae3

Browse files
committed
Update
[ghstack-poisoned]
2 parents 9af3f90 + ee50beb commit bb55ae3

File tree

6 files changed

+33
-1
lines changed

6 files changed

+33
-1
lines changed

backends/qualcomm/quantizer/custom_annotation.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,29 @@ def annotate_matmul_input1(node: Node, quantization_config: QuantizationConfig):
118118
if "SDPA" in full_qualified_name:
119119
annotate_matmul(node, quantization_config_16a8w)
120120
annotate_matmul_input1(node.args[1], quantization_config_8a8w)
121+
122+
123+
def custom_annotate_matmul_16a8w(gm: torch.fx.GraphModule):
124+
"""
125+
Annotate matmul op with 16a8w quantization config
126+
"""
127+
128+
def annotate_matmul(node: Node, quantization_config: QuantizationConfig):
129+
input_qspec_map = {}
130+
input_act = node.args[0]
131+
input_spec = quantization_config.input_activation
132+
input_qspec_map[input_act] = input_spec
133+
input_act1 = node.args[1]
134+
input_spec1 = quantization_config.weight
135+
input_qspec_map[input_act1] = input_spec1
136+
node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
137+
input_qspec_map=input_qspec_map,
138+
output_qspec=quantization_config.output_activation,
139+
_annotated=True,
140+
)
141+
142+
# Annotate 16a8w for matmul op to get better performance
143+
quantization_config_16a8w = get_16a8w_qnn_ptq_config()
144+
for node in gm.graph.nodes:
145+
if node.op == "call_function" and node.target == torch.ops.aten.matmul.default:
146+
annotate_matmul(node, quantization_config_16a8w)

backends/qualcomm/serialization/qnn_compile_spec_schema.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class QcomChipset(IntEnum):
3333
SM8450 = 36 # v69
3434
SM8475 = 42 # v69
3535
SM8550 = 43 # v73
36+
SSG2115P = 46 # v73
3637
SM8650 = 57 # v75
3738

3839

@@ -47,6 +48,7 @@ class SocInfo:
4748
QcomChipset.SM8475: SocInfo(QcomChipset.SM8475, HtpInfo(HtpArch.V69, 8)),
4849
QcomChipset.SM8550: SocInfo(QcomChipset.SM8550, HtpInfo(HtpArch.V73, 8)),
4950
QcomChipset.SM8650: SocInfo(QcomChipset.SM8650, HtpInfo(HtpArch.V75, 8)),
51+
QcomChipset.SSG2115P: SocInfo(QcomChipset.SSG2115P, HtpInfo(HtpArch.V73, 2)),
5052
}
5153

5254

backends/qualcomm/serialization/schema.fbs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ enum QcomChipset: int {
3232
SM8450 = 36,
3333
SM8475 = 42,
3434
SM8550 = 43,
35+
SSG2115P = 46,
3536
SM8650 = 57,
3637
}
3738

@@ -170,7 +171,7 @@ table QnnExecuTorchOptions {
170171

171172
/// Profiling level of the delegate and the backend. Default is off.
172173
profile_level:QnnExecuTorchProfileLevel;
173-
174+
174175
/// Enables usage of shared buffer between application and backend for graph I/O.
175176
shared_buffer:bool;
176177

backends/qualcomm/tests/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ class TestQNN(unittest.TestCase):
118118
model: QcomChipset = None
119119
compiler_specs: List[CompileSpec] = None
120120
arch_table = {
121+
"SSG2115P": QcomChipset.SSG2115P,
121122
"SM8650": QcomChipset.SM8650,
122123
"SM8550": QcomChipset.SM8550,
123124
"SM8475": QcomChipset.SM8475,

examples/qualcomm/oss_scripts/llama2/llama.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848

4949

5050
soc_to_chipset_map = {
51+
"SSG2115P": QcomChipset.SSG2115P,
5152
"SM8650": QcomChipset.SM8650,
5253
"SM8550": QcomChipset.SM8550,
5354
"SM8475": QcomChipset.SM8475,

examples/qualcomm/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def __init__(
8383
self.debug_output_path = f"{self.workspace}/debug_output.bin"
8484
self.output_folder = f"{self.workspace}/outputs"
8585
self.arch_table = {
86+
"SSG2115P": "73",
8687
"SM8650": "75",
8788
"SM8550": "73",
8889
"SM8475": "69",

0 commit comments

Comments
 (0)