Update on "bump pt core pin to 0723"

Gasoonjia · Gasoonjia · commit 85a6ac6f36a9 · 2025-07-24T12:24:18.000-07:00
Differential Revision: [D78579692](https://our.internmc.facebook.com/intern/diff/D78579692/) [ghstack-poisoned]
diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py
@@ -62,4 +62,13 @@ def all_flows() -> dict[str, TestFlow]:
     except Exception as e:
         logger.info(f"Skipping Core ML flow registration: {e}")
 
+    try:
+        from executorch.backends.test.suite.flows.vulkan import VULKAN_TEST_FLOW
+
+        flows += [
+            VULKAN_TEST_FLOW,
+        ]
+    except Exception as e:
+        logger.info(f"Skipping Vulkan flow registration: {e}")
+
     return {f.name: f for f in flows if f is not None}
diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py
@@ -0,0 +1,17 @@
+from executorch.backends.test.suite.flow import TestFlow
+from executorch.backends.vulkan.test.tester import VulkanTester
+
+
+def _create_vulkan_flow(
+    name: str,
+    quantize: bool = False,
+) -> TestFlow:
+    return TestFlow(
+        name,
+        backend="vulkan",
+        tester_factory=VulkanTester,
+        quantize=quantize,
+    )
+
+
+VULKAN_TEST_FLOW = _create_vulkan_flow("vulkan")
diff --git a/backends/vulkan/test/TARGETS b/backends/vulkan/test/TARGETS
@@ -1,4 +1,5 @@
 load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 
 oncall("executorch")
 
@@ -57,3 +58,12 @@ python_unittest(
         "//executorch/backends/vulkan:vulkan_preprocess",
     ],
 )
+
+runtime.python_library(
+    name = "tester",
+    srcs = ["tester.py"],
+    deps = [
+        "//executorch/backends/vulkan/partitioner:vulkan_partitioner",
+        "//executorch/backends/vulkan:vulkan_preprocess",
+    ]
+)
diff --git a/backends/vulkan/test/tester.py b/backends/vulkan/test/tester.py
@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, List, Optional, Tuple
+
+import executorch
+import executorch.backends.test.harness.stages as BaseStages
+
+import torch
+from executorch.backends.test.harness import Tester as TesterBase
+from executorch.backends.test.harness.stages import StageType
+from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner
+from executorch.exir import EdgeCompileConfig
+from executorch.exir.backend.partitioner import Partitioner
+
+
+class Partition(BaseStages.Partition):
+    def __init__(self, partitioner: Optional[Partitioner] = None):
+        super().__init__(
+            partitioner=partitioner or VulkanPartitioner(),
+        )
+
+
+class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
+    def __init__(
+        self,
+        partitioners: Optional[List[Partitioner]] = None,
+        edge_compile_config: Optional[EdgeCompileConfig] = None,
+    ):
+        super().__init__(
+            default_partitioner_cls=VulkanPartitioner,
+            partitioners=partitioners,
+            edge_compile_config=edge_compile_config
+            or EdgeCompileConfig(_check_ir_validity=False),
+        )
+
+
+class VulkanTester(TesterBase):
+    def __init__(
+        self,
+        module: torch.nn.Module,
+        example_inputs: Tuple[torch.Tensor],
+        dynamic_shapes: Optional[Tuple[Any]] = None,
+    ):
+        stage_classes = (
+            executorch.backends.test.harness.Tester.default_stage_classes()
+            | {
+                StageType.PARTITION: Partition,
+                StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
+            }
+        )
+
+        super().__init__(
+            module=module,
+            stage_classes=stage_classes,
+            example_inputs=example_inputs,
+            dynamic_shapes=dynamic_shapes,
+        )
diff --git a/docs/source/backends-arm-ethos-u.md b/docs/source/backends-arm-ethos-u.md
@@ -35,15 +35,12 @@ from executorch.exir import (
 )
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torchvision.models import mobilenetv2
+import executorch.kernels.quantized
 
 mobilenet_v2 = mobilenetv2.mobilenet_v2(
     weights=mobilenetv2.MobileNet_V2_Weights.DEFAULT
 ).eval()
 example_inputs = (torch.randn(1, 3, 224, 224),)
-# .so suffix is .dylib on MacOS.
-torch.ops.load_library(
-    "cmake-out-aot-lib/kernels/quantized/libquantized_ops_aot_lib.so"
-)
 
 compile_spec = ArmCompileSpecBuilder().ethosu_compile_spec(
         "ethos-u55-128",
diff --git a/exir/passes/quantize_io_pass.py b/exir/passes/quantize_io_pass.py
@@ -1,15 +1,21 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree
 
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
 import logging
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Sequence, Union
 
 import numpy as np
 
 import torch
+import torch.fx as fx
 
 from executorch.exir import EdgeProgramManager, ExportedProgram
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -316,3 +322,93 @@ def call(self, graph_module: torch.fx.GraphModule):
             self.edge_manager_update_quant_config_method(i, self.dequant_args[i])
 
         return PassResult(graph_module, True)
+
+
+def extract_io_quant_params(
+    edge_prog: EdgeProgramManager,
+    *,
+    input_idxs: Sequence[int] = (0,),
+    output_idxs: Sequence[int] = (0,),
+) -> Dict[str, Dict[str, Dict[str, Any]]]:
+    """
+    Returns quantization parameters such as scale/zero_point:
+      {
+        "inputs": {
+          <placeholder_name>: {"scale": float, "zero_point": int}
+        },
+        "outputs": {
+          <node_name>: {"scale": float, "zero_point": int}
+        }
+      }
+
+    Note that this function will strip out the IO quantize/dequantize ops as
+    it records their parameters, so if you need to preserve the original graph
+    you need to make a copy with copy.deepcopy before.
+
+    Note that `to_edge_transform_and_lower` should be called before.
+    """
+    # Use IO passes
+    passes = []
+    for idx in input_idxs:
+        passes.append(QuantizeInputs(edge_prog, [idx]))
+    for idx in output_idxs:
+        passes.append(QuantizeOutputs(edge_prog, [idx]))
+
+    # Apply them
+    edge_prog = edge_prog.transform(passes)
+
+    cfg = getattr(edge_prog, "_config_methods", {}) or {}
+
+    # We need GraphModule to find node names
+    gm = edge_prog.exported_program().graph_module
+
+    input_names = _gather_io_names(gm, side="input")
+    output_names = _gather_io_names(gm, side="output")
+
+    # Build the result dict
+    result = {"inputs": {}, "outputs": {}}
+    for key, val in cfg.items():
+        if key.startswith("input"):
+            prefix, section, names = "input", "inputs", input_names
+        elif key.startswith("output"):
+            prefix, section, names = "output", "outputs", output_names
+        else:
+            continue
+
+        idx_str, param = key[len(prefix) :].split("_", 1)
+        idx = int(idx_str)
+        name = names[idx]
+        # We need to map 'zp' to 'zero_point'
+        out_param = "zero_point" if param in ("zp", "zero_point") else param
+        result[section].setdefault(name, {})[out_param] = val
+
+    return result
+
+
+def _gather_io_names(gm: fx.GraphModule, side: str):
+    """
+    For 'input', returns placeholder names in graph order.
+    For 'output', returns names of output nodes.
+    """
+    if side == "input":
+        return [n.name for n in gm.graph.nodes if n.op == "placeholder"]
+
+    if side == "output":
+
+        def _flatten(args):
+            out = []
+
+            def rec(x):
+                if isinstance(x, (tuple, list)):
+                    for y in x:
+                        rec(y)
+                elif isinstance(x, fx.Node):
+                    out.append(x)
+
+            rec(args)
+            return out
+
+        output_node = next(n for n in gm.graph.nodes if n.op == "output")
+        return [n.name for n in _flatten(output_node.args)]
+
+    raise ValueError(f"Unknown side: {side}")
diff --git a/exir/tests/test_extract_io_quant_params.py b/exir/tests/test_extract_io_quant_params.py
@@ -0,0 +1,93 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+import unittest
+
+import torch
+from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
+    get_symmetric_quantization_config,
+    XNNPACKQuantizer,
+)
+from executorch.exir import to_edge_transform_and_lower
+from executorch.exir.passes.quantize_io_pass import extract_io_quant_params
+
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
+
+class SimpleAdd(torch.nn.Module):
+    def forward(self, x, y):
+        return x + y
+
+
+class TestExtractIOQuantParamsPT2E(unittest.TestCase):
+    def setUp(self):
+        self.example_inputs = (
+            torch.ones(1, 5),
+            torch.full(
+                (
+                    1,
+                    5,
+                ),
+                2.0,
+            ),
+        )
+        self.mod = SimpleAdd().eval()
+
+        # Setup XNNPACK quantizer for example
+        self.quantizer = XNNPACKQuantizer()
+        operator_config = get_symmetric_quantization_config()
+        self.quantizer.set_global(operator_config)
+
+        exported = torch.export.export_for_training(
+            self.mod,
+            copy.deepcopy(self.example_inputs),
+            strict=True,
+        )
+        prepared = prepare_pt2e(exported.module(), self.quantizer)
+
+        # Call observers to calibrate
+        _ = prepared(*self.example_inputs)
+
+        converted = convert_pt2e(prepared)
+
+        # Export again with quant parameters
+        final_export = torch.export.export_for_training(
+            converted,
+            self.example_inputs,
+            strict=True,
+        )
+
+        # Lower to EdgeProgramManager
+        self.edge_prog = to_edge_transform_and_lower(final_export)
+
+    def test_roundtrip_extracts_io_params(self):
+        # Get dict with quant parameters
+        q = extract_io_quant_params(
+            self.edge_prog,
+            input_idxs=(0, 1),
+            output_idxs=(0,),
+        )
+
+        # Validate structure
+        self.assertIn("inputs", q)
+        self.assertIn("outputs", q)
+        self.assertEqual(len(q["inputs"]), 2)
+        self.assertEqual(len(q["outputs"]), 1)
+
+        # Each entry must have a float 'scale' and int 'zero_point'
+        for name, params in q["inputs"].items():
+            self.assertIsInstance(name, str)
+            self.assertIsInstance(params["scale"], float)
+            self.assertIsInstance(params["zero_point"], int)
+
+        out_name, out_params = next(iter(q["outputs"].items()))
+        self.assertIsInstance(out_name, str)
+        self.assertIsInstance(out_params["scale"], float)
+        self.assertIsInstance(out_params["zero_point"], int)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/extension/data_loader/CMakeLists.txt b/extension/data_loader/CMakeLists.txt
@@ -16,6 +16,14 @@ if(NOT EXECUTORCH_ROOT)
   set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
 endif()
 
+include(CheckIncludeFile)
+check_include_file(sys/mman.h ET_HAVE_SYS_MMAN_H)
+
+if(NOT ET_HAVE_SYS_MMAN_H AND NOT WIN32)
+  list(REMOVE_ITEM _extension_data_loader__srcs
+       "extension/data_loader/mmap_data_loader.cpp"
+  )
+endif()
 list(TRANSFORM _extension_data_loader__srcs PREPEND "${EXECUTORCH_ROOT}/")
 add_library(extension_data_loader ${_extension_data_loader__srcs})
 target_link_libraries(extension_data_loader executorch_core)
diff --git a/extension/llm/runner/text_decoder_runner.cpp b/extension/llm/runner/text_decoder_runner.cpp
@@ -66,11 +66,6 @@ ::executorch::runtime::Result<executorch::aten::Tensor> TextDecoderRunner::step(
       start_pos_tensor = from_blob(
           &start_pos, sizes_vec, ::executorch::aten::ScalarType::Long);
     }
-    ET_LOG(
-        Info,
-        "Start pos tensor numel: %zu, tokens numel: %zu",
-        start_pos_tensor->numel(),
-        tokens->numel());
     auto outputs_res = module_->forward({tokens, start_pos_tensor});
     ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
     ET_CHECK_MSG(