pytorch
diff --git a/‎backends/qualcomm/runtime/backends/QnnFunctionInterface.h‎
Lines changed: 1 addition & 0 deletions b/‎backends/qualcomm/runtime/backends/QnnFunctionInterface.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/qualcomm/runtime/backends/QnnGraphCommon.h‎
Lines changed: 5 additions & 1 deletion b/‎backends/qualcomm/runtime/backends/QnnGraphCommon.h‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/qualcomm/runtime/backends/QnnProfiler.cpp‎
Lines changed: 37 additions & 1 deletion b/‎backends/qualcomm/runtime/backends/QnnProfiler.cpp‎
Lines changed: 37 additions & 1 deletion
diff --git a/‎backends/qualcomm/serialization/qnn_compile_spec_schema.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/qualcomm/serialization/qnn_compile_spec_schema.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/qualcomm/serialization/schema.fbs‎
Lines changed: 1 addition & 0 deletions b/‎backends/qualcomm/serialization/schema.fbs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/qualcomm/utils/utils.py‎
Lines changed: 4 additions & 1 deletion b/‎backends/qualcomm/utils/utils.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/vulkan/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/vulkan/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/_passes/insert_prepack_nodes.py‎
Lines changed: 12 additions & 21 deletions b/‎backends/vulkan/_passes/insert_prepack_nodes.py‎
Lines changed: 12 additions & 21 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.h‎
Lines changed: 16 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.h‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.yaml‎
Lines changed: 3 additions & 3 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.yaml‎
Lines changed: 3 additions & 3 deletions
@@ -70,6 +70,7 @@ class QnnInterface {
   DEFINE_SHIM_FUNCTION_INTERFACE(log_set_log_level, logSetLogLevel);
   // --------- QnnProfile ---------
   DEFINE_SHIM_FUNCTION_INTERFACE(profile_create, profileCreate);
+  DEFINE_SHIM_FUNCTION_INTERFACE(profile_set_config, profileSetConfig);
   DEFINE_SHIM_FUNCTION_INTERFACE(profile_get_events, profileGetEvents);
   DEFINE_SHIM_FUNCTION_INTERFACE(profile_get_sub_events, profileGetSubEvents);
   DEFINE_SHIM_FUNCTION_INTERFACE(profile_get_event_data, profileGetEventData);
 
@@ -52,7 +52,7 @@ class QnnGraph {
 
   Qnn_ErrorHandle_t GraphFinalize() {
     return implementation_.GetQnnInterface().qnn_graph_finalize(
-        handle_, nullptr /* profile_handle */, nullptr /* signal_handle */);
+        handle_, profile_->GetHandle(), nullptr /* signal_handle */);
   };
   Qnn_ErrorHandle_t ProfileExecuteData(
       executorch::runtime::EventTracer* event_tracer) {
@@ -62,6 +62,10 @@ class QnnGraph {
     return handle_;
   }
 
+  QnnProfile* GetProfile() {
+    return profile_.get();
+  }
+
  protected:
   virtual executorch::runtime::Error MakeConfig(
       std::vector<const QnnGraph_Config_t*>& config) {
 
@@ -19,8 +19,21 @@ QnnProfile::QnnProfile(
     : handle_(nullptr), implementation_(implementation), backend_(backend) {
   if (profile_level != QnnExecuTorchProfileLevel::kProfileOff) {
     const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+
+    QnnProfile_Level_t qnnProfileLevel = 0;
+    if (profile_level == QnnExecuTorchProfileLevel::kProfileBasic) {
+      qnnProfileLevel = QNN_PROFILE_LEVEL_BASIC;
+    } else if (
+        profile_level == QnnExecuTorchProfileLevel::kProfileDetailed ||
+        profile_level == QnnExecuTorchProfileLevel::kProfileOptrace) {
+      qnnProfileLevel = QNN_PROFILE_LEVEL_DETAILED;
+    } else {
+      QNN_EXECUTORCH_LOG_WARN("Invalid profile level");
+      return;
+    }
+
     Qnn_ErrorHandle_t error = qnn_interface.qnn_profile_create(
-        backend_->GetHandle(), static_cast<int>(profile_level), &handle_);
+        backend_->GetHandle(), qnnProfileLevel, &handle_);
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_WARN(
           "Failed to create profile_handle for backend "
@@ -31,6 +44,29 @@ QnnProfile::QnnProfile(
       // ignore error and continue to create backend handle...
       handle_ = nullptr;
     }
+
+    if (profile_level == QnnExecuTorchProfileLevel::kProfileOptrace) {
+      if (handle_ == nullptr) {
+        QNN_EXECUTORCH_LOG_WARN(
+            "Prfoile handle is null, cannot enable optrace");
+        return;
+      }
+
+      QnnProfile_Config_t qnnProfileConfig = QNN_PROFILE_CONFIG_INIT;
+      qnnProfileConfig.option = QNN_PROFILE_CONFIG_OPTION_ENABLE_OPTRACE;
+      std::array<const QnnProfile_Config_t*, 2> profileConfigs = {
+          &qnnProfileConfig, nullptr};
+      error =
+          qnn_interface.qnn_profile_set_config(handle_, profileConfigs.data());
+
+      if (error != QNN_SUCCESS) {
+        QNN_EXECUTORCH_LOG_WARN(
+            "Failed to set optrace for backend "
+            " %u, error=%d",
+            qnn_interface.GetBackendId(),
+            QNN_GET_ERROR_CODE(error));
+      }
+    }
   }
 }
 
 
@@ -115,6 +115,7 @@ class QnnExecuTorchProfileLevel(IntEnum):
     kProfileOff = 0
     kProfileBasic = 1
     kProfileDetailed = 2
+    kProfileOptrace = 3
 
 
 @dataclass
 
@@ -135,6 +135,7 @@ enum QnnExecuTorchProfileLevel: int {
   kProfileOff = 0,
   kProfileBasic,
   kProfileDetailed,
+  kProfileOptrace,
 }
 
 /// QNN backends currently supported
 
@@ -770,6 +770,7 @@ def generate_qnn_executorch_compiler_spec(
     online_prepare: bool = False,
     dump_intermediate_outputs: bool = False,
     profile: bool = False,
+    optrace: bool = False,
     shared_buffer: bool = False,
     is_from_context_binary: bool = False,
 ) -> List[CompileSpec]:
@@ -831,7 +832,9 @@ def generate_qnn_executorch_compiler_spec(
     if saver:
         qnn_executorch_options.library_path = "libQnnSaver.so"
 
-    if profile:
+    if optrace:
+        qnn_executorch_options.profile_level = QnnExecuTorchProfileLevel.kProfileOptrace
+    elif profile:
         qnn_executorch_options.profile_level = (
             QnnExecuTorchProfileLevel.kProfileDetailed
         )
 
@@ -12,6 +12,7 @@ runtime.python_library(
     deps = [
         "//caffe2:torch",
         "//executorch/exir:pass_base",
+        "//executorch/backends/vulkan:utils_lib",
     ],
 )
 
 
@@ -6,15 +6,17 @@
 
 # pyre-strict
 
+from copy import deepcopy
+
 import executorch.backends.vulkan.custom_ops_lib  # noqa
 
 import torch
 
 from executorch.backends.vulkan.op_registry import handles_own_prepacking
+from executorch.backends.vulkan.utils import is_param_node
 
 from executorch.exir.dialects._ops import ops as exir_ops
 
-from torch._export.utils import is_buffer, is_param
 from torch.export import ExportedProgram
 
 
@@ -29,25 +31,8 @@ def insert_prepack_nodes(program: ExportedProgram) -> ExportedProgram:
     argument into the operator implementation.
     """
 
-    def is_get_attr_node(node: torch.fx.Node) -> bool:
-        return isinstance(node, torch.fx.Node) and node.op == "get_attr"
-
-    def is_constant(node: torch.fx.Node) -> bool:
-        return node.name in program.graph_signature.inputs_to_lifted_tensor_constants
-
-    def is_param_node(node: torch.fx.Node) -> bool:
-        """
-        Check if the given node is a parameter within the exported program
-        """
-        return (
-            is_get_attr_node(node)
-            or is_param(program, node)
-            or is_buffer(program, node)
-            or is_constant(node)
-        )
-
     def prepack_not_required(node: torch.fx.Node) -> bool:
-        if not is_param_node(node):
+        if not is_param_node(program, node):
             return True
 
         for user in node.users:
@@ -69,9 +54,15 @@ def prepack_not_required(node: torch.fx.Node) -> bool:
                 exir_ops.edge.et_vk.prepack.default,
                 (node,),
             )
-            prepack_node.meta["spec"] = node.meta["spec"]
+            # This pass assumes that the SpecPropPass() has already been applied
+            assert "spec" in node.meta
+            # Validate that the original node is marked as a constant. Constant tensors
+            # do not participate in memory planning.
+            assert node.meta["spec"].const
+            prepack_node.meta["val"] = node.meta["val"]
+            prepack_node.meta["spec"] = deepcopy(node.meta["spec"])
             # Set the mem_obj_id to -1 to indicate that this node requires a dedicated
-            # memory object. This pass must be executed AFTER the memory planning pass.
+            # memory object.
             prepack_node.meta["spec"].mem_obj_id = -1
             node.replace_all_uses_with(prepack_node, lambda x, y=prepack_node: x != y)
 
 
@@ -612,6 +612,22 @@ class ComputeGraph final {
     return {t, staging};
   }
 
+  /*
+   * Add an input tensor with the specified properties along with its staging
+   * buffer.
+   */
+  inline IOValueRef add_input_tensor(
+      const std::vector<int64_t>& sizes,
+      const vkapi::ScalarType dtype,
+      const utils::StorageType storage_type,
+      const utils::GPUMemoryLayout memory_layout,
+      const int64_t shared_object_idx = -1) {
+    ValueRef t = add_tensor(
+        sizes, dtype, storage_type, memory_layout, shared_object_idx);
+    ValueRef staging = set_input_tensor(t);
+    return {t, staging};
+  }
+
   SharedObject& get_shared_object(const int64_t idx);
 
   //
 
@@ -9,11 +9,11 @@ bitw8_image_to_nchw_nobitw8buffer:
     STORAGE: texture3d
     DTYPE: int8
   generate_variant_forall:
-    DTYPE:
-      - VALUE: int8
-      - VALUE: uint8
     STORAGE:
       - VALUE: texture2d
       - VALUE: texture3d
+    DTYPE:
+      - VALUE: int8
+      - VALUE: uint8
   shader_variants:
     - NAME: bitw8_image_to_nchw_nobitw8buffer
Original file line number	Diff line number	Diff line change
`@@ -135,6 +135,7 @@ enum QnnExecuTorchProfileLevel: int {`
`135`	`135`	`kProfileOff = 0,`
`136`	`136`	`kProfileBasic,`
`137`	`137`	`kProfileDetailed,`
	`138`	`+ kProfileOptrace,`
`138`	`139`	`}`
`139`	`140`
`140`	`141`	`/// QNN backends currently supported`
Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@ runtime.python_library(`
`12`	`12`	`deps = [`
`13`	`13`	`"//caffe2:torch",`
`14`	`14`	`"//executorch/exir:pass_base",`
	`15`	`+ "//executorch/backends/vulkan:utils_lib",`
`15`	`16`	`],`
`16`	`17`	`)`
`17`	`18`