2024-12-09 nightly release (b9db0a3)

pytorchbot · pytorchbot · commit aea603016eca · 2024-12-09T11:35:38.000Z
diff --git a/backends/vulkan/runtime/graph/containers/Value.h b/backends/vulkan/runtime/graph/containers/Value.h
@@ -58,7 +58,7 @@ struct Value final {
       bool as_bool;
     } u;
 
-    api::vTensor as_tensor;
+    std::unique_ptr<api::vTensor> as_tensor;
     api::StagingBuffer as_staging;
     TensorRef as_tensorref;
 
@@ -106,15 +106,18 @@ struct Value final {
     rhs.payload.member_name.~dtor_name();                                \
     break;
 
+#define CASE_MOVE_UNIQUE_PTR_TYPE(type_tag, member_name)      \
+  case type_tag:                                              \
+    payload.member_name = std::move(rhs.payload.member_name); \
+    break;
+
   Value(Value&& rhs) noexcept : tag(rhs.tag) {
     switch (tag) {
       // Scalar types
       CASE_MOVE_TRIVIALLY_COPYABLE_TYPE(TypeTag::INT, as_int);
       CASE_MOVE_TRIVIALLY_COPYABLE_TYPE(TypeTag::DOUBLE, as_double);
       CASE_MOVE_TRIVIALLY_COPYABLE_TYPE(TypeTag::BOOL, as_bool);
-      // Tensor and tensor adjacent types
-      CASE_MOVE_MOVEABLE_TYPE(
-          TypeTag::TENSOR, api::vTensor, as_tensor, vTensor);
+      // Tensor adjacent types
       CASE_MOVE_MOVEABLE_TYPE(
           TypeTag::STAGING, api::StagingBuffer, as_staging, StagingBuffer);
       CASE_MOVE_MOVEABLE_TYPE(
@@ -132,6 +135,8 @@ struct Value final {
       CASE_MOVE_MOVEABLE_TYPE(
           TypeTag::STRING, std::string, as_string, basic_string);
       CASE_MOVE_MOVEABLE_TYPE(TypeTag::SYMINT, SymInt, as_symint, SymInt);
+      // Tensor type
+      CASE_MOVE_UNIQUE_PTR_TYPE(TypeTag::TENSOR, as_tensor);
 
       case TypeTag::NONE:
         clearToNone();
@@ -142,6 +147,7 @@ struct Value final {
 
 #undef CASE_MOVE_TRIVIALLY_COPYABLE_TYPE
 #undef CASE_MOVE_MOVEABLE_TYPE
+#undef CASE_MOVE_UNIQUE_PTR_TYPE
 
   //
   // Accessors
@@ -157,9 +163,6 @@ struct Value final {
 
   ~Value() {
     switch (tag) {
-      case TypeTag::TENSOR:
-        payload.as_tensor.~vTensor();
-        break;
       case TypeTag::STAGING:
         payload.as_staging.~StagingBuffer();
         break;
@@ -184,6 +187,9 @@ struct Value final {
       case TypeTag::SYMINT:
         payload.as_symint.~SymInt();
         break;
+      case TypeTag::TENSOR:
+        payload.as_tensor.reset();
+        break;
       // Manually list out the types so that if a type here is added later and
       // not handled the compiler can catch it.
       case TypeTag::NONE:
@@ -252,12 +258,6 @@ struct Value final {
     return payload.member_name;                             \
   }
 
-  SUPPORT_TRIVIALLY_MOVEABLE_TYPE(
-      api::vTensor,
-      Tensor,
-      TypeTag::TENSOR,
-      as_tensor);
-
   SUPPORT_TRIVIALLY_MOVEABLE_TYPE(
       api::StagingBuffer,
       Staging,
@@ -302,9 +302,36 @@ struct Value final {
 
   SUPPORT_TRIVIALLY_MOVEABLE_TYPE(SymInt, SymInt, TypeTag::SYMINT, as_symint);
 
-#undef SUPPORT_TRIVIALLY_COPYABLE_TYPE
 #undef SUPPORT_TRIVIALLY_MOVEABLE_TYPE
 
+#define SUPPORT_UNIQUE_PTR_TYPE(type, type_name, type_tag, member_name) \
+  explicit Value(type t) : tag(type_tag) {                              \
+    payload.member_name = std::make_unique<type>(std::move(t));         \
+  }                                                                     \
+  inline bool is##type_name() const {                                   \
+    return tag == type_tag;                                             \
+  }                                                                     \
+  inline type& to##type_name() const {                                  \
+    VK_CHECK_COND(                                                      \
+        is##type_name(),                                                \
+        "Expected value to have type " #type_name ", got ",             \
+        tag,                                                            \
+        " instead.");                                                   \
+    return *payload.member_name;                                        \
+  }                                                                     \
+  inline const type& toConst##type_name() const {                       \
+    VK_CHECK_COND(                                                      \
+        is##type_name(),                                                \
+        "Expected value to have type " #type_name ", got ",             \
+        tag,                                                            \
+        " instead.");                                                   \
+    return *payload.member_name;                                        \
+  }
+
+  SUPPORT_UNIQUE_PTR_TYPE(api::vTensor, Tensor, TypeTag::TENSOR, as_tensor);
+
+#undef SUPPORT_UNIQUE_PTR_TYPE
+
  private:
   Payload payload;
   TypeTag tag;
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -1087,8 +1087,8 @@ TEST_F(VulkanComputeAPITest, print_object_sizes) {
 
   // Current known size on 64 bit system: 1040 B
   EXPECT_TRUE(sizeof(vTensor) < 1200);
-  // Current known size on 64 bit system: 1056 B
-  EXPECT_TRUE(sizeof(Value) < 1200);
+  // Current known size on 64 bit system: 120 B
+  EXPECT_TRUE(sizeof(Value) < 128);
   // Current known size on 64 bit system: 120 B
   EXPECT_TRUE(sizeof(StagingBuffer) < 500);
   // Current known size on 64 bit system: 384 B
diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py
@@ -48,6 +48,7 @@
 from executorch.exir.passes.executorch_prim_ops_registry import is_sym_op
 from executorch.exir.print_program import _stacktrace_to_framelist, inspect_node
 from executorch.exir.schema import (
+    AllocationDetails,
     BackendDelegate,
     BackendDelegateDataReference,
     BackendDelegateInlineData,
@@ -328,6 +329,59 @@ def _emit_list(self, val: List[_Argument], val_type: _SchemaType) -> EValue:
             ExportErrorType.NOT_SUPPORTED, f"Unknown list type: {val_type}"
         )
 
+    def _get_allocation_info(self, spec: TensorSpec) -> AllocationDetails:
+        """Returns the allocation info for a given TensorSpec."""
+        self._internal_assert_emitter(
+            isinstance(spec.mem_id, int) and spec.mem_id >= 0,
+            self.node,
+            f"Non-const tensor should be an activation tensor: mem_id {spec.mem_id}",
+        )
+
+        self._internal_assert_emitter(
+            isinstance(spec.mem_offset, int) and spec.mem_offset >= 0,
+            self.node,
+            f"Non-const tensor should be an activation tensor: mem_offset {spec.mem_offset}",
+        )
+        try:
+            allocation_info = make_allocation_info(spec.mem_id, spec.mem_offset)
+        except AddressSpaceOverflowException as e:
+            raise InternalError(
+                self._emit_node_specific_error(
+                    self.node,
+                    (
+                        f"{e}\nHint: If you are using a memory pass based on dynamic shape bounds, "
+                        f"such as ConstraintBasedSymShapeEvalPass, this may be the cause of an "
+                        f"unbacked SymInt with its upper bound lazily set to 2^64-1 (uint64 max) "
+                        "during torch.export()."
+                    ),
+                )
+            )
+        return allocation_info
+
+    def _save_new_const_tensor(
+        self,
+        spec: TensorSpec,
+        buffer_data: bytes,
+        hashed: str,
+        allocation_info: Optional[AllocationDetails],
+    ) -> int:
+        """Saves a new constant tensor to the constant buffer and returns the buffer idx"""
+
+        self.program_state.allocated_specs.append(spec)
+        # +1 because the first buffer location is reserved.
+
+        # Update buffer_idx to point to the end of the list where we are adding the new buffer.
+        buffer = Buffer(storage=buffer_data)
+        if allocation_info:
+            buffer_idx = len(self.program_state.mutable_buffer)
+            self.program_state.cached_spec_mutable_hash_values[hashed] = buffer_idx
+            self.program_state.mutable_buffer.append(buffer)
+        else:
+            buffer_idx = len(self.program_state.constant_buffer)
+            self.program_state.cached_spec_hash_values[hashed] = buffer_idx
+            self.program_state.constant_buffer.append(buffer)
+        return buffer_idx
+
     def _tensor_spec_to_evalue(self, spec: TensorSpec) -> EValue:
         """Constructs an EValue from the given TensorSpec."""
 
@@ -339,35 +393,12 @@ def _tensor_spec_to_evalue(self, spec: TensorSpec) -> EValue:
         # default algos to set offsets, so need to check both.
         if spec.mem_id is not None and spec.mem_offset is not None:
             # Tensor is an activation.
-            self._internal_assert_emitter(
-                isinstance(spec.mem_id, int) and spec.mem_id >= 0,
-                self.node,
-                f"Non-const tensor should be an activation tensor: mem_id {spec.mem_id}",
-            )
-
-            self._internal_assert_emitter(
-                isinstance(spec.mem_offset, int) and spec.mem_offset >= 0,
-                self.node,
-                f"Non-const tensor should be an activation tensor: mem_offset {spec.mem_offset}",
-            )
-            try:
-                allocation_info = make_allocation_info(spec.mem_id, spec.mem_offset)
-            except AddressSpaceOverflowException as e:
-                raise InternalError(
-                    self._emit_node_specific_error(
-                        self.node,
-                        (
-                            f"{e}\nHint: If you are using a memory pass based on dynamic shape bounds, "
-                            f"such as ConstraintBasedSymShapeEvalPass, this may be the cause of an "
-                            f"unbacked SymInt with its upper bound lazily set to 2^64-1 (uint64 max) "
-                            "during torch.export()."
-                        ),
-                    )
-                )
+            allocation_info = self._get_allocation_info(spec)
 
+        # Tensor is either a constant tensor, or a mutable tensor with an initial state.
         if spec.const:
             # Tensor with a blob we need to serialize. May not actually be constant at runtime
-            # if it's a weight with an associated gradient
+            # if it's a weight with an associated gradient.
             spec_array_type = (
                 ctypes.c_char * typing.cast(torch.UntypedStorage, spec.storage).nbytes()
             )
@@ -392,23 +423,11 @@ def _tensor_spec_to_evalue(self, spec: TensorSpec) -> EValue:
             else:
                 buffer_idx = self.program_state.cached_spec_hash_values.get(hashed, -1)
 
-            # Haven't seen this constant before
+            # Haven't seen this constant before.
             if buffer_idx == -1:
-                # Update buffer_idx to point to the end of the list where we are adding the new buffer.
-                buffer = Buffer(storage=buffer_data)
-                self.program_state.allocated_specs.append(spec)
-                # +1 because the first buffer location is reserved
-
-                if allocation_info:
-                    buffer_idx = len(self.program_state.mutable_buffer)
-                    self.program_state.cached_spec_mutable_hash_values[hashed] = (
-                        buffer_idx
-                    )
-                    self.program_state.mutable_buffer.append(buffer)
-                else:
-                    buffer_idx = len(self.program_state.constant_buffer)
-                    self.program_state.cached_spec_hash_values[hashed] = buffer_idx
-                    self.program_state.constant_buffer.append(buffer)
+                buffer_idx = self._save_new_const_tensor(
+                    spec, buffer_data, hashed, allocation_info
+                )
 
             if spec.const and spec.nbytes() != len(buffer_data):
                 raise InternalError(
diff --git a/exir/passes/replace_view_copy_with_view_pass.py b/exir/passes/replace_view_copy_with_view_pass.py
@@ -109,6 +109,7 @@ def __init__(self, base: TensorSpec, shape: List[int]) -> None:
             "mem_obj_id",
             "mem_offset",
             "dtype",  # property
+            "extra_tensor_info",  # property
         ]
 
         # Make sure _self_fields and _base_fields are disjoint
diff --git a/exir/schema.py b/exir/schema.py
@@ -43,14 +43,20 @@ class TensorShapeDynamism(IntEnum):
     DYNAMIC_UNBOUND = 2
 
 
+class TensorDataLocation(IntEnum):
+    SEGMENT = 0
+    EXTERNAL = 1
+
+
 @dataclass
 class ExtraTensorInfo:
     """
     Check program.fbs for explanations of this enum.
     """
 
-    mutable_data_segments_idx: Optional[int] = None
+    mutable_data_segments_idx: int = 0
     fully_qualified_name: Optional[str] = None
+    location: TensorDataLocation = TensorDataLocation.SEGMENT
 
 
 @dataclass
diff --git a/exir/tensor.py b/exir/tensor.py
@@ -18,7 +18,7 @@
 import executorch.exir.schema as schema
 import torch
 from executorch.exir.error import internal_assert
-from executorch.exir.schema import ScalarType, TensorShapeDynamism
+from executorch.exir.schema import ExtraTensorInfo, ScalarType, TensorShapeDynamism
 from executorch.exir.sym_util import eval_shape
 
 
@@ -132,6 +132,7 @@ def __init__(
         is_sparse: bool = False,
         const: bool = False,
         requires_grad: bool = False,
+        extra_tensor_info: Optional[ExtraTensorInfo] = None,
     ) -> None:
         self.scalar_type = dtype
         self.const = const
@@ -146,6 +147,7 @@ def __init__(
         self.is_sparse = is_sparse
         self.init_mem_planning_fields()
         self.shape_dynamism: TensorShapeDynamism = determine_tensor_dynanism(self.shape)
+        self.extra_tensor_info = extra_tensor_info
 
     @property
     def allocated_memory(self) -> int:
@@ -346,6 +348,7 @@ def to_list(
         allocation_info=allocation_info,
         layout=layout_enum(spec.layout),
         shape_dynamism=spec.shape_dynamism,
+        extra_tensor_info=spec.extra_tensor_info,
     )
     return flatbuffer_tensor
 
diff --git a/schema/program.fbs b/schema/program.fbs
@@ -53,18 +53,32 @@ enum TensorShapeDynamism : byte {
   DYNAMIC_UNBOUND = 2,
 }
 
+// Indicates where a tensor is stored.
+enum TensorDataLocation : byte {
+  // Stored in a segment of the PTE file.
+  SEGMENT = 0,
+  // Stored outside of the PTE file.
+  EXTERNAL = 1,
+}
 
 // Table to put additional information about tensors in that is not applicable
 // to the vast majority of tensors in the vast majority of programs.
 table ExtraTensorInfo {
   // [Optional] Specifies the SubsegmentOffsets in
   //  program.mutable_data_segments that specifies where the data is located in.
   //  If not present and the data is located in a segment, then the data is in
-  //  the first index.
+  //  index zero.
   mutable_data_segments_idx: uint64;
 
   // [Optional] The unique name of the tensor. e.g. 'mod.linear.weight'
   fully_qualified_name: string;
+
+  // [Optional] Specifies where the tensor's data is stored.
+  // - SEGMENT (default): Data is stored in a segment.
+  // - EXTERNAL: Data is stored outside of the PTE file. fully_qualified_name
+  //   must be non-empty, and is used as a key to find the tensor's external
+  //   data. Tensor.data_buffer_idx is ignored.
+  location: TensorDataLocation;
 }
 
 table Tensor {

Original file line number	Diff line number	Diff line change
`@@ -109,6 +109,7 @@ def __init__(self, base: TensorSpec, shape: List[int]) -> None:`
`109`	`109`	`"mem_obj_id",`
`110`	`110`	`"mem_offset",`
`111`	`111`	`"dtype", # property`
	`112`	`+ "extra_tensor_info", # property`
`112`	`113`	`]`
`113`	`114`
`114`	`115`	`# Make sure _self_fields and _base_fields are disjoint`