Update

swolchok · swolchok · commit 939aabd59275 · 2025-03-12T16:20:55.000-07:00
[ghstack-poisoned]
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -234,7 +234,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable
-      python-version: '3.11'
+      python-version: "3.11"
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |
diff --git a/backends/vulkan/_passes/tag_memory_meta_pass.py b/backends/vulkan/_passes/tag_memory_meta_pass.py
@@ -6,7 +6,7 @@
 
 import logging
 from copy import deepcopy
-from typing import Set
+from typing import Any, Set
 
 import executorch.backends.vulkan.utils as utils
 
@@ -190,20 +190,24 @@ def propose_node_layout(
             return next(iter(valid_layouts))
 
     def should_annotate(self, node) -> bool:
-        if not isinstance(node, torch.fx.Node):
-            return False
-
-        if not utils.is_tensor_node(node):
-            return False
-
-        # Storage type and memory layout for tensorref will be determined at runtime
-        # so there's no use in setting those attributes ahead of time.
-        if node.meta.get("vkdg_tensorref", False):
-            return False
-
-        # Skip annotating output node. The output tensors should be annotated by the
-        # time the output node is observed.
-        if node.op == "output":
+        if isinstance(node, torch.fx.Node):
+            if not utils.is_tensor_node(node):
+                return False
+
+            # Storage type and memory layout for tensorref will be determined at runtime
+            # so there's no use in setting those attributes ahead of time.
+            if node.meta.get("vkdg_tensorref", False):
+                return False
+
+            # Skip annotating output node. The output tensors should be annotated by the
+            # time the output node is observed.
+            if node.op == "output":
+                return False
+        elif isinstance(node, (list, tuple)):
+            return all(
+                isinstance(n, torch.fx.Node) and self.should_annotate(n) for n in node
+            )
+        else:
             return False
 
         return True
@@ -215,6 +219,70 @@ def should_delay_annotation(self, node: torch.fx.Node) -> bool:
         # time the prepack node is observed.
         return node.target == exir_ops.edge.et_vk.prepack.default
 
+    def set_or_transition_arg_node(
+        self,
+        i: int,
+        arg: torch.fx.Node,
+        node: torch.fx.Node,
+        graph_module: torch.fx.GraphModule,
+        dirty: bool,
+    ) -> bool:
+        assert isinstance(arg, torch.fx.Node)
+
+        storage = utils.get_node_storage_type(node)
+        assert storage is not None
+        layout = utils.get_node_memory_layout(node)
+        assert layout is not None
+
+        arg_storage = utils.get_node_storage_type(arg)
+        arg_layout = utils.get_node_memory_layout(arg)
+
+        if arg_storage is None:
+            utils.set_node_spec_attr(arg, "vk_storage_type", storage)
+            arg_storage = storage
+        if arg_layout is None:
+            utils.set_node_spec_attr(arg, "vk_memory_layout", layout)
+            arg_layout = layout
+
+        if arg_storage == storage and arg_layout == layout:
+            return False
+
+        if not dirty:
+            logger.info(
+                f"[Vulkan Delegate] Inserting transition(s) for {node.format_node()}:"
+            )
+
+        insert_transition_node(graph_module, node, arg, storage, layout)
+
+        logger.info(
+            f"   args {i} ({arg}): ({arg_storage}, {arg_layout}) -> ({storage}, {layout})"
+        )
+
+        return True
+
+    def set_or_transition_arg(
+        self,
+        i: int,
+        arg: Any,
+        node: torch.fx.Node,
+        graph_module: torch.fx.GraphModule,
+        dirty: bool,
+    ) -> bool:
+        if isinstance(arg, torch.fx.Node):
+            return self.set_or_transition_arg_node(i, arg, node, graph_module, dirty)
+        elif isinstance(arg, (list, tuple)):
+            need_transition = False
+            for arg_node in arg:
+                need_transition = (
+                    self.set_or_transition_arg_node(
+                        i, arg_node, node, graph_module, need_transition
+                    )
+                    or need_transition
+                )
+            return need_transition
+        else:
+            return False
+
     # noqa
     def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         for node in graph_module.graph.nodes:
@@ -226,36 +294,16 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
 
             set_memory_metadata(node, storage, layout)
 
-            inserting_transitions_for_node = False
+            need_transition = False
             for i, arg in enumerate(node.args):
                 if not self.should_annotate(arg):
                     continue
 
-                assert isinstance(arg, torch.fx.Node)
-
-                arg_storage = utils.get_node_storage_type(arg)
-                arg_layout = utils.get_node_memory_layout(arg)
-
-                if arg_storage is None:
-                    utils.set_node_spec_attr(arg, "vk_storage_type", storage)
-                    arg_storage = storage
-                if arg_layout is None:
-                    utils.set_node_spec_attr(arg, "vk_memory_layout", layout)
-                    arg_layout = layout
-
-                if arg_storage == storage and arg_layout == layout:
-                    continue
-
-                if not inserting_transitions_for_node:
-                    inserting_transitions_for_node = True
-                    logger.info(
-                        f"[Vulkan Delegate] Inserting transition(s) for {node.format_node()}:"
+                need_transition = (
+                    self.set_or_transition_arg(
+                        i, arg, node, graph_module, need_transition
                     )
-
-                insert_transition_node(graph_module, node, arg, storage, layout)
-
-                logger.info(
-                    f"   args {i} ({arg}): ({arg_storage}, {arg_layout}) -> ({storage}, {layout})"
+                    or need_transition
                 )
 
         return PassResult(graph_module, True)
diff --git a/build/build_apple_frameworks.sh b/build/build_apple_frameworks.sh
@@ -53,6 +53,7 @@ libmicrokernels-prod.a,\
 
 FRAMEWORK_KERNELS_CUSTOM="kernels_custom:\
 libcustom_ops.a,\
+libextension_threadpool.a,\
 :"
 
 FRAMEWORK_KERNELS_OPTIMIZED="kernels_optimized:\
diff --git a/extension/benchmark/android/benchmark/app/build.gradle.kts b/extension/benchmark/android/benchmark/app/build.gradle.kts
@@ -14,8 +14,8 @@ android {
 
   defaultConfig {
     applicationId = "org.pytorch.minibench"
-    minSdk = 34
-    targetSdk = 34
+    minSdk = 28
+    targetSdk = 33
     versionCode = 1
     versionName = "1.0"
 
diff --git a/extension/pybindings/portable_lib.py b/extension/pybindings/portable_lib.py
@@ -45,6 +45,7 @@
     _load_for_executorch_from_buffer,  # noqa: F401
     _load_for_executorch_from_bundled_program,  # noqa: F401
     _reset_profile_results,  # noqa: F401
+    _unsafe_reset_threadpool,  # noqa: F401
     BundledModule,  # noqa: F401
     ExecuTorchModule,  # noqa: F401
     MethodMeta,  # noqa: F401
diff --git a/extension/pybindings/pybindings.cpp b/extension/pybindings/pybindings.cpp
@@ -23,6 +23,7 @@
 #include <executorch/extension/data_loader/buffer_data_loader.h>
 #include <executorch/extension/data_loader/mmap_data_loader.h>
 #include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
+#include <executorch/extension/threadpool/threadpool.h>
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/data_loader.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
@@ -1064,6 +1065,14 @@ PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) {
       "_reset_profile_results",
       []() { EXECUTORCH_RESET_PROFILE_RESULTS(); },
       call_guard);
+  m.def(
+      "_unsafe_reset_threadpool",
+      [](int num_threads) {
+        executorch::extension::threadpool::get_threadpool()
+            ->_unsafe_reset_threadpool(num_threads);
+      },
+      py::arg("num_threads"),
+      call_guard);
 
   py::class_<PyModule>(m, "ExecuTorchModule")
       .def("load_bundled_input", &PyModule::load_bundled_input, call_guard)
diff --git a/extension/pybindings/pybindings.pyi b/extension/pybindings/pybindings.pyi
@@ -264,3 +264,12 @@ def _reset_profile_results() -> None:
         This API is experimental and subject to change without notice.
     """
     ...
+
+@experimental("This API is experimental and subject to change without notice.")
+def _unsafe_reset_threadpool(num_threads: int) -> None:
+    """
+    .. warning::
+
+        This API is experimental and subject to change without notice.
+    """
+    ...
diff --git a/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl b/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl
@@ -54,6 +54,7 @@ def executorch_pybindings(python_module_name, srcs = [], cppdeps = [], visibilit
         ],
         deps = [
             "//executorch/runtime/core:core",
+            "//executorch/extension/threadpool:threadpool",
         ] + cppdeps,
         external_deps = [
             "pybind11",