Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion backends/cadence/aot/export_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import logging
import tempfile

import torch

from executorch.backends.cadence.aot.ops_registrations import * # noqa
from typing import Any, Tuple

Expand All @@ -17,18 +19,42 @@
export_to_cadence_edge_executorch,
fuse_pt2,
)

from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
from executorch.backends.cadence.runtime import runtime
from executorch.backends.cadence.runtime.executor import BundledProgramManager
from executorch.exir import ExecutorchProgramManager
from torch import nn
from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
from torch.ao.quantization.quantizer.xnnpack_quantizer_utils import (
QuantizationConfig,
QuantizationSpec,
)

from .utils import save_bpte_program, save_pte_program


FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)

act_qspec = QuantizationSpec(
dtype=torch.int8,
quant_min=-128,
quant_max=127,
qscheme=torch.per_tensor_affine,
is_dynamic=False,
observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
)

wgt_qspec = QuantizationSpec(
dtype=torch.int8,
quant_min=-128,
quant_max=127,
qscheme=torch.per_tensor_affine,
is_dynamic=False,
observer_or_fake_quant_ctr=MinMaxObserver,
)


def export_model(
model: nn.Module,
Expand All @@ -39,8 +65,15 @@ def export_model(
working_dir = tempfile.mkdtemp(dir="/tmp")
logging.debug(f"Created work directory {working_dir}")

qconfig = QuantizationConfig(
act_qspec,
act_qspec,
wgt_qspec,
None,
)

# Instantiate the quantizer
quantizer = CadenceQuantizer()
quantizer = CadenceQuantizer(qconfig)

# Convert the model
converted_model = convert_pt2(model, example_inputs, quantizer)
Expand Down
19 changes: 13 additions & 6 deletions backends/cadence/aot/quantizer/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,20 @@ def get_supported_operators(cls) -> List[OperatorConfig]:


class CadenceQuantizer(ComposableQuantizer):
def __init__(self) -> None:
static_qconfig = QuantizationConfig(
act_qspec,
act_qspec,
wgt_qspec,
None,
def __init__(
self, quantization_config: Optional[QuantizationConfig] = None
) -> None:
static_qconfig = (
QuantizationConfig(
act_qspec,
act_qspec,
wgt_qspec,
None,
)
if not quantization_config
else quantization_config
)

super().__init__(
[
CadenceAtenQuantizer(AddmmPattern(), static_qconfig),
Expand Down
Loading