Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions backends/transforms/duplicate_dynamic_quant_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import operator

import torch
from executorch.exir.program._program import _update_exported_program_graph_module

from torch.ao.quantization.pt2e.utils import (
_filter_sym_size_users,
Expand Down Expand Up @@ -194,3 +195,11 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
graph_module.graph.eliminate_dead_code()
graph_module.recompile()
return PassResult(graph_module, True)


def duplicate_dynamic_quant_chain_pass(
ep: torch.export.ExportedProgram,
) -> torch.export.ExportedProgram:
res = DuplicateDynamicQuantChainPass()(ep.graph_module)
assert res is not None
return _update_exported_program_graph_module(ep, res.graph_module)
1 change: 1 addition & 0 deletions backends/xnnpack/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,6 @@ runtime.python_library(
":xnnpack_preprocess",
"//executorch/backends/xnnpack/partition:xnnpack_partitioner",
"//executorch/backends/xnnpack/utils:xnnpack_utils",
"//executorch/backends/xnnpack/recipes:xnnpack_recipes"
],
)
3 changes: 2 additions & 1 deletion backends/xnnpack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
XnnpackDynamicallyQuantizedPartitioner,
XnnpackPartitioner,
)
from .recipes.recipes import get_xnnpack_recipe

# Exposed Configs in XNNPACK Package
from .utils.configs import (
Expand All @@ -23,12 +24,12 @@
# XNNPACK Backend
from .xnnpack_preprocess import XnnpackBackend


__all__ = [
"XnnpackDynamicallyQuantizedPartitioner",
"XnnpackPartitioner",
"XnnpackBackend",
"capture_graph_for_xnnpack",
"get_xnnpack_recipe",
"get_xnnpack_capture_config",
"get_xnnpack_edge_compile_config",
"get_xnnpack_executorch_backend_config",
Expand Down
19 changes: 19 additions & 0 deletions backends/xnnpack/recipes/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
load("@fbcode_macros//build_defs:python_library.bzl", "python_library")


oncall("executorch")

python_library(
name = "xnnpack_recipes",
srcs = [
"recipes.py",
],
deps = [
"//caffe2:torch",
"//executorch/exir:lib",
"//executorch/export:recipe",
"//executorch/backends/transforms:duplicate_dynamic_quant_chain",
"//executorch/backends/xnnpack/quantizer:xnnpack_quantizer",
"//executorch/backends/xnnpack/partition:xnnpack_partitioner",
],
)
80 changes: 80 additions & 0 deletions backends/xnnpack/recipes/recipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict
from typing import Any, Callable

from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
duplicate_dynamic_quant_chain_pass,
)

from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner

from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
get_symmetric_quantization_config,
XNNPACKQuantizer,
)
from executorch.export.recipe import ExportRecipe, QuantizationRecipe
from torchao.quantization.quant_api import int8_dynamic_activation_int4_weight


def get_generic_fp32_cpu_recipe() -> ExportRecipe:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if namespaced to XNNPACK then cpu may not be needed?

Suggested change
def get_generic_fp32_cpu_recipe() -> ExportRecipe:
def get_fp32_recipe() -> ExportRecipe:

return ExportRecipe(
name="fp32_recipe",
quantization_recipe=None,
partitioners=[XnnpackPartitioner()],
)


def get_dynamic_quant_recipe() -> ExportRecipe:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

organizationally maybe quant recipes can be in a separate folder?

# Create quantizer
quantizer = XNNPACKQuantizer()
operator_config = get_symmetric_quantization_config(
is_per_channel=True, is_dynamic=True
)
quantizer.set_global(operator_config)

# Create quantization recipe
quant_recipe = QuantizationRecipe(
quantizer=quantizer,
)

# Create export recipe
return ExportRecipe(
name="dynamic_quant_recipe",
quantization_recipe=quant_recipe,
partitioners=[XnnpackPartitioner()],
pre_edge_transform_passes=duplicate_dynamic_quant_chain_pass,
)


def get_8a4w_config(group_size: int = 32) -> ExportRecipe:
# Create quantization recipe
quant_recipe = QuantizationRecipe(
quantizer=None,
ao_base_config=[
int8_dynamic_activation_int4_weight(group_size=32),
],
)

# Create export recipe
return ExportRecipe(
name="8a4w_quant_recipe",
quantization_recipe=quant_recipe,
partitioners=[XnnpackPartitioner()],
)


RECIPE_MAP: dict[str, Callable[[], ExportRecipe]] = {
"FP32_CPU_ACCELERATED_RECIPE": get_generic_fp32_cpu_recipe,
"DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE": get_dynamic_quant_recipe,
Comment on lines +72 to +73
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit

Suggested change
"FP32_CPU_ACCELERATED_RECIPE": get_generic_fp32_cpu_recipe,
"DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE": get_dynamic_quant_recipe,
"FP32_RECIPE": get_fp32_recipe,
"DYNAMIC_QUANT_RECIPE": get_dynamic_quant_recipe,

"8A4W_CPU_ACCELERATED_RECIPE": get_8a4w_config,
}


def get_xnnpack_recipe(recipe_name: str, **kwargs: Any) -> ExportRecipe:
assert recipe_name in RECIPE_MAP, f"Recipe {recipe_name} not found."
return RECIPE_MAP[recipe_name](**kwargs)
1 change: 1 addition & 0 deletions backends/xnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def define_common_targets():
"//executorch/extension/threadpool:threadpool",
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
"//executorch/runtime/executor:pte_data_map" + aten_suffix,
"//executorch/backends/xnnpack/recipes:xnnpack_recipes",
],
# XnnpackBackend.cpp needs to compile with executor as whole
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
Expand Down
11 changes: 11 additions & 0 deletions backends/xnnpack/test/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,14 @@ runtime.python_test(
"libtorch",
],
)

runtime.python_test(
name = "test_xnnpack_recipes",
srcs = glob([
"recipes/*.py",
]),
deps = [
"//executorch/backends/xnnpack:xnnpack_delegate",
"//executorch/export:lib",
],
)
94 changes: 94 additions & 0 deletions backends/xnnpack/test/recipes/test_xnnpack_recipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

import unittest

import torch
from executorch.backends.xnnpack import get_xnnpack_recipe
from executorch.exir.schema import DelegateCall, Program
from executorch.export import export
from torch import nn
from torch.testing._internal.common_quantization import TestHelperModules


class TestXnnpackRecipes(unittest.TestCase):
def setUp(self) -> None:
super().setUp()

def tearDown(self) -> None:
super().tearDown()
Comment on lines +20 to +24
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need these?


def check_fully_delegated(self, program: Program) -> None:
instructions = program.execution_plan[0].chains[0].instructions
assert instructions is not None
self.assertEqual(len(instructions), 1)
self.assertIsInstance(instructions[0].instr_args, DelegateCall)

def test_basic_recipe(self) -> None:
m_eager = TestHelperModules.TwoLinearModule().eval()
example_inputs = [(torch.randn(9, 8),)]
session = export(
model=m_eager,
example_inputs=example_inputs,
export_recipe=get_xnnpack_recipe("FP32_CPU_ACCELERATED_RECIPE"),
)
self.assertTrue(
torch.allclose(
session.run_method("forward", example_inputs[0])[0],
m_eager(*example_inputs[0]),
)
)
self.check_fully_delegated(session.get_executorch_program())

def test_dynamic_quant_recipe(self) -> None:
with torch.no_grad():
m_eager = TestHelperModules.TwoLinearModule().eval()
example_inputs = [(torch.randn(9, 8),)]
session = export(
model=m_eager,
example_inputs=example_inputs,
export_recipe=get_xnnpack_recipe(
"DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE"
),
)
self.assertTrue(
torch.allclose(
session.run_method("forward", example_inputs[0])[0],
m_eager(*example_inputs[0]),
atol=1e-1,
)
)
self.check_fully_delegated(session.get_executorch_program())

def test_8a4w_recipe(self) -> None:
class SimpleLinearModel(nn.Module):
def __init__(self) -> None:
super(SimpleLinearModel, self).__init__()
self.layer1 = nn.Linear(32, 2)

def forward(self, x) -> torch.Tensor:
x = self.layer1(x)
return x

model = SimpleLinearModel()
example_inputs = [(torch.randn(1, 32),)]
session = export(
model=model,
example_inputs=example_inputs,
export_recipe=get_xnnpack_recipe(
"8A4W_CPU_ACCELERATED_RECIPE", group_size=32
),
)
self.assertTrue(
torch.allclose(
session.run_method("forward", example_inputs[0])[0],
model(*example_inputs[0]),
atol=1e-1,
)
)
self.check_fully_delegated(session.get_executorch_program())
Loading