pytorch
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/aot/TARGETS‎
Lines changed: 13 additions & 0 deletions b/‎backends/cadence/aot/TARGETS‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 39 additions & 44 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 39 additions & 44 deletions
diff --git a/‎backends/cadence/aot/compiler_funcs.py‎
Lines changed: 63 additions & 0 deletions b/‎backends/cadence/aot/compiler_funcs.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎backends/cadence/aot/export_example.py‎
Lines changed: 10 additions & 2 deletions b/‎backends/cadence/aot/export_example.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎backends/qualcomm/_passes/layout_transform.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/_passes/layout_transform.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/quantizer/annotators.py‎
Lines changed: 4 additions & 2 deletions b/‎backends/qualcomm/quantizer/annotators.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎backends/qualcomm/quantizer/custom_annotation.py‎
Lines changed: 7 additions & 4 deletions b/‎backends/qualcomm/quantizer/custom_annotation.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎backends/qualcomm/runtime/backends/QnnOpPackageManager.h‎
Lines changed: 1 addition & 0 deletions b/‎backends/qualcomm/runtime/backends/QnnOpPackageManager.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/qualcomm/scripts/build.sh‎
Lines changed: 8 additions & 0 deletions b/‎backends/qualcomm/scripts/build.sh‎
Lines changed: 8 additions & 0 deletions
@@ -269,7 +269,7 @@ jobs:
         if [[ ${{ matrix.os}} == "bare_metal" ]]; then
           bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
         elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
-          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
+          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
           cmake --build cmake-out -j9 --target install --config Release
           CXXFLAGS=${cxx_flags}  cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
           cmake --build cmake-out/test -j9 --config Release
 
@@ -41,6 +41,7 @@ python_library(
         ":ops_registrations",
         ":passes",
         ":replace_ops",
+        ":compiler_funcs",
         ":utils",
         "//caffe2:torch",
         "//executorch/backends/cadence/aot/quantizer:fusion_pass",
@@ -332,6 +333,18 @@ python_library(
     ],
 )
 
+python_library(
+    name = "compiler_funcs",
+    srcs = [
+        "compiler_funcs.py",
+    ],
+    typing = True,
+    deps = [
+        "//caffe2:torch",
+        "//pytorch/ao:torchao",
+    ],
+)
+
 
 python_unittest(
     name = "test_graph_builder",
 
@@ -12,6 +12,11 @@
 
 import executorch.backends.cadence.aot.ops_registrations  # noqa
 import torch
+from executorch.backends.cadence.aot.compiler_funcs import (
+    convert as convert_fn,
+    prepare as prepare_fn,
+    trace as trace_fn,
+)
 from executorch.backends.cadence.aot.memory_planning import (
     CadenceMemoryPlanning,
     print_memory_planning_info,
@@ -35,16 +40,13 @@
 from executorch.exir.passes import ToOutVarPass
 from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
 from executorch.exir.program._program import to_edge
-from torch._inductor.decomposition import remove_decompositions
 
 from torch.export.exported_program import ExportedProgram
-from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 from .passes import apply_exir_ops_passes, apply_torch_ops_passes
 
 from .utils import print_ops_info
 
-
 default_quantizer = CadenceDefaultQuantizer()
 
 
@@ -62,13 +64,6 @@ def trace(
     Trace the model with export and return an ExportedProgram.
     """
 
-    # Make the model inference mode by calling model.eval()
-    model.eval()
-
-    # Get default decompositions
-    decomp_table = torch.export.default_decompositions()
-
-    # Select ops to keep
     ops_to_keep = [
         torch.ops.aten.conv1d.default,
         torch.ops.aten.conv2d.default,
@@ -78,63 +73,54 @@ def trace(
         torch.ops.aten.rms_norm.default,
     ]
 
-    # Remove decompositions for the ops we want to keep
-    # pyre-fixme[6]: For 1st argument expected `Dict[typing.Callable[..., typing.Any
-    remove_decompositions(decomp_table, ops_to_keep)
-
-    # Export with dynamo
-    program = torch.export.export(model, inputs, strict=True).run_decompositions(
-        decomp_table
+    program = trace_fn(
+        model, inputs, is_qat=False, strict=True, ops_to_keep=ops_to_keep
     )
 
     if dump_graphs:
         logging.info("Graph before quantization:")
-        logging.info(program.module().graph.print_tabular())
+        logging.info(program.graph_module.graph.print_tabular())
 
     return program
 
 
-def prepare_and_convert_pt2(
+def prepare_pt2(
     program: ExportedProgram,
-    inputs: tuple[object, ...],
     quantizer: CadenceQuantizer,
-    calibration_data: Optional[list[tuple[object, ...]]] = None,
     dump_graphs: bool = False,
 ) -> torch.fx.GraphModule:
     """
-    Prepare and convert a model using the given quantizer.
+    Prepare a model using the given quantizer.
     The quantizer must be supplied and be the same as the one used to
     fuse the model later, if applicable. If you do not expect that behavior,
     please use quantize_and_fuse_pt2 instead, which will instantiate a
     default quantizer for you if needed.
-    If calibration data is provided, it will be used to calibrate the model. If
-    not, the inputs will be used for calibration instead, which is useful for
-    unit tests but should not be used for end-to-end use cases.
-    Returns a GraphModule with the converted model.
+    Returns a GraphModule with the prepared model.
     """
 
-    # Get the graph module from the ExportedProgram
-    model_gm = program.module()
+    prepared_model = prepare_fn(program, quantizer, is_qat=False)
 
-    assert isinstance(model_gm, torch.fx.GraphModule)
+    if dump_graphs:
+        logging.info("Graph after preparation:")
+        logging.info(prepared_model.graph.print_tabular())
 
-    # Prepare
-    prepared_model = prepare_pt2e(model_gm, quantizer)
+    return prepared_model
 
-    # Calibrate
-    # If no calibration data is provided, use the inputs
-    if calibration_data is None:
-        calibration_data = [inputs]
 
-    for samples in calibration_data:
-        prepared_model(*samples)
+def convert_pt2(
+    graph_module: torch.fx.GraphModule,
+    dump_graphs: bool = False,
+) -> torch.fx.GraphModule:
+    """
+    Convert the model
+    Returns a GraphModule with the converted model.
+    """
 
-    # Convert
-    converted_model = convert_pt2e(prepared_model)
+    converted_model = convert_fn(graph_module)
 
     if dump_graphs:
-        logging.info("Graph after quantization (before fusion):")
-        logging.info(model_gm.graph.print_tabular())
+        logging.info("Graph after convert:")
+        logging.info(converted_model.graph.print_tabular())
 
     return converted_model
 
@@ -192,10 +178,19 @@ def quantize_pt2(
         logging.info("Graph after trace:")
         logging.info(program.graph.print_tabular())
 
+    # Get prepared graph module
+    prepared_gm = prepare_pt2(program, quantizer, dump_graphs=dump_graphs)
+
+    # Calibrate
+    # If no calibration data is provided, use the inputs
+    if calibration_data is None:
+        calibration_data = [inputs]
+
+    for samples in calibration_data:
+        prepared_gm(*samples)
+
     # Get converted graph module
-    converted_gm = prepare_and_convert_pt2(
-        program, inputs, quantizer, calibration_data, dump_graphs=dump_graphs
-    )
+    converted_gm = convert_pt2(prepared_gm, dump_graphs=dump_graphs)
 
     # Get fused model
     fused_gm = fuse_pt2(converted_gm, quantizer)
 
@@ -0,0 +1,63 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+
+from typing import Optional
+
+import torch
+from torch._inductor.decomposition import remove_decompositions
+from torchao.quantization.pt2e.quantize_pt2e import (
+    convert_pt2e,
+    prepare_pt2e,
+    prepare_qat_pt2e,
+)
+from torchao.quantization.pt2e.quantizer import Quantizer
+
+
+@torch.no_grad()
+def trace(
+    model: torch.nn.Module,
+    inputs: tuple[object, ...],
+    is_qat: bool = False,
+    strict: bool = False,
+    ops_to_keep: Optional[list[torch._ops.OpOverload]] = None,
+) -> torch.export.ExportedProgram:
+    if is_qat:
+        model.train()
+    else:
+        model.eval()
+
+    decomp_table = torch.export.default_decompositions()
+    # pyre-fixme[6]: For 1st argument expected `Dict[typing.Callable[..., typing.Any
+    remove_decompositions(decomp_table, ops_to_keep)
+    program = torch.export.export_for_training(
+        model, inputs, strict=strict
+    ).run_decompositions(decomp_table)
+
+    return program
+
+
+def prepare(
+    traced_program: torch.export.ExportedProgram,
+    quantizer: Quantizer,
+    is_qat: bool = False,
+) -> torch.fx.GraphModule:
+    traced_model = traced_program.module()
+    assert isinstance(traced_model, torch.fx.GraphModule)
+
+    if is_qat:
+        prepared_model = prepare_qat_pt2e(traced_model, quantizer)
+    else:
+        prepared_model = prepare_pt2e(traced_model, quantizer)
+
+    return prepared_model
+
+
+def convert(prepared_model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    converted_model = convert_pt2e(prepared_model)
+    return converted_model
@@ -15,9 +15,10 @@
 from typing import Any, Tuple
 
 from executorch.backends.cadence.aot.compiler import (
+    convert_pt2,
     export_to_executorch_gen_etrecord,
     fuse_pt2,
-    prepare_and_convert_pt2,
+    prepare_pt2,
     trace,
 )
 
@@ -52,8 +53,15 @@ def export_model(
     # Trace the model
     ep = trace(model, example_inputs)
 
+    # Prepare the model
+    prepared_gm = prepare_pt2(ep, quantizer)
+
+    # Calibrate the model
+    for samples in [example_inputs]:
+        prepared_gm(*samples)
+
     # Convert the model
-    converted_model = prepare_and_convert_pt2(ep, example_inputs, quantizer)
+    converted_model = convert_pt2(prepared_gm)
 
     # Get reference outputs from converted model
     ref_outputs = converted_model(*example_inputs)
 
@@ -103,8 +103,8 @@ class LayoutTransform(ExportPass):
         exir_ops.edge.aten.pow.Tensor_Scalar,
         exir_ops.edge.aten.prelu.default,
         exir_ops.edge.aten.repeat.default,
-        exir_ops.edge.aten.round.default,
         exir_ops.edge.aten.relu.default,
+        exir_ops.edge.aten.round.default,
         exir_ops.edge.aten.sigmoid.default,
         exir_ops.edge.aten.split_with_sizes.default,
         exir_ops.edge.aten.split_with_sizes_copy.default,
 
@@ -278,7 +278,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->
     )
 
 
-@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])
+@register_annotator(
+    [torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]
+)
 def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_binary(node, quantization_config)
 
@@ -1311,7 +1313,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:
     )
 
 
-@register_annotator([torch.ops.aten.zeros.default])
+@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])
 def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:
     if _is_annotated([node]) or not _is_float_tensor(node):
         return
 
@@ -153,7 +153,9 @@ def annotate_prefill_kv_output(gm: torch.fx.GraphModule, kv_quant_attrs: dict):
                 )
 
 
-def annotate_matmul_16a8w(gm: torch.fx.GraphModule) -> None:  # noqa: C901
+def annotate_matmul_16a8w(  # noqa: C901
+    gm: torch.fx.GraphModule, annotate_conv=True
+) -> None:
     """
     This function is specific for matmul op 16a8w.
     For k, we will tag such as the below, and
@@ -317,9 +319,10 @@ def annotate_matmul_input1(node: Node):
                 # The arguments of cat op: (the past kv cache, the new kv cache)
                 node = node.args[0][1]
             elif node.target == torch.ops.aten.conv2d.default:
-                annotate_conv2d(
-                    node, quantization_config=quantization_config_8a4w_per_channel
-                )
+                if annotate_conv:
+                    annotate_conv2d(
+                        node, quantization_config=quantization_config_8a4w_per_channel
+                    )
                 break
             elif node.target in [torch.ops.aten.add.Tensor, torch.ops.aten.sub.Tensor]:
                 break
 
@@ -7,6 +7,7 @@
  */
 #pragma once
 #include <mutex>
+#include <string>
 #include <unordered_set>
 
 namespace executorch {
 
@@ -85,6 +85,7 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI='arm64-v8a' \
@@ -104,6 +105,9 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DANDROID_ABI='arm64-v8a' \
         -DANDROID_PLATFORM=android-30 \
         -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
+        -DSUPPORT_REGEX_LOOKAHEAD=ON \
+        -DBUILD_TESTING=OFF \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
@@ -134,6 +138,7 @@ if [ "$BUILD_X86_64" = true ]; then
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
         -S $PRJ_ROOT \
         -B $BUILD_ROOT \
@@ -157,6 +162,9 @@ if [ "$BUILD_X86_64" = true ]; then
        -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
        -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
        -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
+       -DSUPPORT_REGEX_LOOKAHEAD=ON \
+       -DBUILD_TESTING=OFF \
+       -DEXECUTORCH_ENABLE_LOGGING=ON \
        -B$EXAMPLE_ROOT
 
    cmake --build $EXAMPLE_ROOT -j$BUILD_JOB_NUMBER
Original file line number	Diff line number	Diff line change
`@@ -278,7 +278,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->`
`278`	`278`	`)`
`279`	`279`
`280`	`280`
`281`		`-@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])`
	`281`	`+@register_annotator(`
	`282`	`+ [torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]`
	`283`	`+)`
`282`	`284`	`def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:`
`283`	`285`	`annotate_binary(node, quantization_config)`
`284`	`286`
`@@ -1311,7 +1313,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:`
`1311`	`1313`	`)`
`1312`	`1314`
`1313`	`1315`
`1314`		`-@register_annotator([torch.ops.aten.zeros.default])`
	`1316`	`+@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])`
`1315`	`1317`	`def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:`
`1316`	`1318`	`if _is_annotated([node]) or not _is_float_tensor(node):`
`1317`	`1319`	`return`