pytorch
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/android.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/android.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.lintrunner.toml‎
Lines changed: 2 additions & 1 deletion b/‎.lintrunner.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/cadence/aot/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 37 additions & 4 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 37 additions & 4 deletions
diff --git a/‎backends/cadence/aot/export_example.py‎
Lines changed: 38 additions & 6 deletions b/‎backends/cadence/aot/export_example.py‎
Lines changed: 38 additions & 6 deletions
diff --git a/‎backends/cadence/aot/quantizer/quantizer.py‎
Lines changed: 13 additions & 6 deletions b/‎backends/cadence/aot/quantizer/quantizer.py‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎backends/cadence/build_cadence_runner.sh‎
Lines changed: 2 additions & 0 deletions b/‎backends/cadence/build_cadence_runner.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/cadence/reference/operators/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/reference/operators/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
@@ -205,6 +205,7 @@ jobs:
 
   # Let's see how expensive this job is, we might want to tone it down by running it periodically
   benchmark-on-device:
+    if: always()
     permissions:
       id-token: write
       contents: read
 
@@ -80,6 +80,11 @@ jobs:
           # Reuse the script that install Android on ET Docker image
           sudo -E bash .ci/docker/common/install_android.sh
 
+          # After https://github.com/ReactiveCircus/android-emulator-runner/releases/tag/v2.33.0 release,
+          # it seems that we need to chown the Android setup to the current user instead of root to
+          # avoid permission issue
+          sudo chown -R "${USER}" /opt/android
+
       - name: Gradle cache
         uses: gradle/actions/setup-gradle@v3
 
 
@@ -279,6 +279,7 @@ jobs:
           path: ${{ runner.temp }}/artifacts/
 
   benchmark-on-device:
+    if: always()
     needs:
       - set-parameters
       - upload-benchmark-app
 
@@ -151,7 +151,8 @@ command = [
     'lintrunner_adapters',
     'run',
     'grep_linter',
-    '--pattern= Executorch\W+',
+    # Exclude "ExecuTorch" pattern within URLs
+    '--pattern= Executorch(?!\\W*(://|\\.[a-z]{2,}))\\W+',
     '--linter-name=ExecuTorchCapitalization',
     '--error-name=Incorrect capitalization for ExecuTorch',
     """--error-description=
 
@@ -43,6 +43,7 @@ python_library(
         "//executorch/backends/transforms:decompose_sdpa",
         "//executorch/backends/transforms:remove_clone_ops",
         "//executorch/exir:lib",
+        "//executorch/devtools:lib",
     ],
 )
 
 
@@ -7,6 +7,7 @@
 # pyre-strict
 
 import logging
+from pathlib import Path
 from typing import Optional
 
 import torch
@@ -29,7 +30,13 @@
     DecomposeScaledDotProductAttention,
 )
 from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
-from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge
+from executorch.devtools import generate_etrecord
+from executorch.exir import (
+    EdgeCompileConfig,
+    EdgeProgramManager,
+    ExecutorchProgramManager,
+    to_edge,
+)
 from torch.ao.quantization.pt2e.export_utils import model_is_exported
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
@@ -197,11 +204,12 @@ def export_to_edge(
 # Export the model and lower it to an EdgeProgramManager (in edge IR), and
 # apply passes specific to Cadence DSP execution. Return both to print the
 # differences.
-def export_to_cadence(
+def export_to_cadence_edge_executorch(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
     dump_graphs: bool = False,
-) -> EdgeProgramManager:
+    output_dir: Optional[str] = None,
+) -> ExecutorchProgramManager:
     edge_prog_manager = export_to_edge(model, inputs)
 
     # Run a couple required passes for quant/dequant ops
@@ -225,4 +233,29 @@ def export_to_cadence(
         cadence_prog_manager.exported_program().graph_module,
     )
 
-    return cadence_prog_manager
+    # Get executorch program after Cadence specific passes
+    exec_prog: ExecutorchProgramManager = cadence_prog_manager.to_executorch()
+    if output_dir:
+        _gen_etrecord(edge_prog_manager, exec_prog, Path(output_dir))
+    else:
+        logging.warning("No output directory provided, skipping ETRecord generation")
+
+    return exec_prog
+
+
+def _gen_etrecord(
+    edge_program: EdgeProgramManager,
+    et_program: ExecutorchProgramManager,
+    output_dir: Path,
+) -> None:
+    etrec_path = output_dir / "etrecord.bin"
+    try:
+        generate_etrecord(
+            et_record=etrec_path,
+            edge_dialect_program=edge_program,
+            executorch_program=et_program,
+        )
+        logging.info(f"Generated ETRecord at {etrec_path}")
+    except Exception:
+        # Any errors here shouldn't block the rest of the flow
+        logging.exception("Encountered exception while generating ETRecord")
@@ -9,26 +9,52 @@
 import logging
 import tempfile
 
+import torch
+
 from executorch.backends.cadence.aot.ops_registrations import *  # noqa
 from typing import Any, Tuple
 
 from executorch.backends.cadence.aot.compiler import (
     convert_pt2,
-    export_to_cadence,
+    export_to_cadence_edge_executorch,
     fuse_pt2,
 )
+
 from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
 from executorch.backends.cadence.runtime import runtime
 from executorch.backends.cadence.runtime.executor import BundledProgramManager
 from executorch.exir import ExecutorchProgramManager
 from torch import nn
+from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
+from torch.ao.quantization.quantizer.xnnpack_quantizer_utils import (
+    QuantizationConfig,
+    QuantizationSpec,
+)
 
 from .utils import save_bpte_program, save_pte_program
 
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
 
+act_qspec = QuantizationSpec(
+    dtype=torch.int8,
+    quant_min=-128,
+    quant_max=127,
+    qscheme=torch.per_tensor_affine,
+    is_dynamic=False,
+    observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
+)
+
+wgt_qspec = QuantizationSpec(
+    dtype=torch.int8,
+    quant_min=-128,
+    quant_max=127,
+    qscheme=torch.per_tensor_affine,
+    is_dynamic=False,
+    observer_or_fake_quant_ctr=MinMaxObserver,
+)
+
 
 def export_model(
     model: nn.Module,
@@ -39,8 +65,15 @@ def export_model(
     working_dir = tempfile.mkdtemp(dir="/tmp")
     logging.debug(f"Created work directory {working_dir}")
 
+    qconfig = QuantizationConfig(
+        act_qspec,
+        act_qspec,
+        wgt_qspec,
+        None,
+    )
+
     # Instantiate the quantizer
-    quantizer = CadenceQuantizer()
+    quantizer = CadenceQuantizer(qconfig)
 
     # Convert the model
     converted_model = convert_pt2(model, example_inputs, quantizer)
@@ -53,10 +86,9 @@ def export_model(
     quantized_model = fuse_pt2(converted_model, quantizer)
 
     # Get edge program after Cadence specific passes
-    cadence_prog_manager = export_to_cadence(quantized_model, example_inputs)
-
-    # Get executorch program after Cadence specific passes
-    exec_prog: ExecutorchProgramManager = cadence_prog_manager.to_executorch()
+    exec_prog: ExecutorchProgramManager = export_to_cadence_edge_executorch(
+        quantized_model, example_inputs, working_dir
+    )
 
     logging.info("Final exported graph:\n")
     exec_prog.exported_program().graph_module.graph.print_tabular()
 
@@ -141,13 +141,20 @@ def get_supported_operators(cls) -> List[OperatorConfig]:
 
 
 class CadenceQuantizer(ComposableQuantizer):
-    def __init__(self) -> None:
-        static_qconfig = QuantizationConfig(
-            act_qspec,
-            act_qspec,
-            wgt_qspec,
-            None,
+    def __init__(
+        self, quantization_config: Optional[QuantizationConfig] = None
+    ) -> None:
+        static_qconfig = (
+            QuantizationConfig(
+                act_qspec,
+                act_qspec,
+                wgt_qspec,
+                None,
+            )
+            if not quantization_config
+            else quantization_config
         )
+
         super().__init__(
             [
                 CadenceAtenQuantizer(AddmmPattern(), static_qconfig),
 
@@ -25,6 +25,7 @@ main() {
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_DEVTOOLS=ON \
     -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+    -DEXECUTORCH_ENABLE_LOGGING=ON \
     -Bcmake-out .
   cmake --build cmake-out --target install --config Release -j16
 
@@ -35,6 +36,7 @@ main() {
   cmake -DCMAKE_PREFIX_PATH="${cmake_prefix_path}" \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_CADENCE_CPU_RUNNER=ON \
+    -DEXECUTORCH_ENABLE_LOGGING=ON \
     -B"${build_dir}" \
     "${example_dir}"
   cmake --build "${build_dir}" --config Release -j16
 
@@ -27,6 +27,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/dtype_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/index_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/kernel_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/matmul_ops_util.cpp"
Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@ python_library(`
`43`	`43`	`"//executorch/backends/transforms:decompose_sdpa",`
`44`	`44`	`"//executorch/backends/transforms:remove_clone_ops",`
`45`	`45`	`"//executorch/exir:lib",`
	`46`	`+ "//executorch/devtools:lib",`
`46`	`47`	`],`
`47`	`48`	`)`
`48`	`49`