tensorflow · mergify · Mar 7, 2025 · Mar 7, 2025
@@ -61,7 +61,7 @@ extern "C" {
 #ifdef TFL_COMPILE_LIBRARY
 #define TFL_CAPI_EXPORT __declspec(dllexport)
 #else
-#define TFL_CAPI_EXPORT __declspec(dllimport)
+#define TFL_CAPI_EXPORT
 #endif  // TFL_COMPILE_LIBRARY
 #else
 #define TFL_CAPI_EXPORT __attribute__((visibility("default")))

@@ -102,6 +102,60 @@ void TfLiteVarArrayFree(T* a) {
   free(a);
 }
 
+#ifndef TF_LITE_STATIC_MEMORY
+
+TfLiteQuantization TfLiteQuantizationClone(const TfLiteQuantization& src) {
+  TfLiteQuantization dst;
+  dst.type = src.type;
+  switch (src.type) {
+    case kTfLiteNoQuantization:
+      break;
+    case kTfLiteAffineQuantization: {
+      dst.params = calloc(1, sizeof(TfLiteAffineQuantization));
+      const TfLiteAffineQuantization* const src_params =
+          (TfLiteAffineQuantization*)(src.params);
+      TfLiteAffineQuantization* const dst_params =
+          (TfLiteAffineQuantization*)(dst.params);
+      dst_params->quantized_dimension = src_params->quantized_dimension;
+      dst_params->scale = TfLiteFloatArrayCopy(src_params->scale);
+      dst_params->zero_point = TfLiteIntArrayCopy(src_params->zero_point);
+      break;
+    }
+  }
+  return dst;
+}
+
+TfLiteSparsity TfLiteSparsityClone(const TfLiteSparsity& src) {
+  TfLiteSparsity dst = src;
+  dst.traversal_order = TfLiteIntArrayCopy(src.traversal_order);
+  dst.block_map = TfLiteIntArrayCopy(src.block_map);
+  if (src.dim_metadata) {
+    dst.dim_metadata = reinterpret_cast<TfLiteDimensionMetadata*>(
+        calloc(1, sizeof(TfLiteDimensionMetadata) * src.dim_metadata_size));
+    for (int i = 0; i < src.dim_metadata_size; ++i) {
+      dst.dim_metadata[i] = src.dim_metadata[i];
+      dst.dim_metadata[i].array_segments =
+          TfLiteIntArrayCopy(src.dim_metadata[i].array_segments);
+      dst.dim_metadata[i].array_indices =
+          TfLiteIntArrayCopy(src.dim_metadata[i].array_indices);
+    }
+  }
+  return dst;
+}
+
+// Clones the source sparsity to a newly allocated object.
+TfLiteSparsity* TfLiteSparsityClone(const TfLiteSparsity* const src) {
+  if (!src) {
+    return nullptr;
+  }
+  TfLiteSparsity* dst =
+      reinterpret_cast<TfLiteSparsity*>(calloc(1, sizeof(TfLiteSparsity)));
+  *dst = TfLiteSparsityClone(*src);
+  return dst;
+}
+
+#endif  // TF_LITE_STATIC_MEMORY
+
 }  // namespace
 
 extern "C" {
@@ -234,6 +288,55 @@ void TfLiteTensorFree(TfLiteTensor* t) {
   t->sparsity = nullptr;
 }
 
+TfLiteTensor TfLiteTensorClone(const TfLiteTensor src) {
+  // We copy all of the source data first, then we clone the fields that can't
+  // be shared between two tensor instances.
+  TfLiteTensor dst = src;
+  // Data that is owned by the original tensor mut be cloned. Check
+  // TfLiteTensorFree to find out which members are owned.
+  if (src.data.data) {
+    const TfLiteAllocationStrategy allocation_strategy =
+        TfLiteTensorGetAllocationStrategy(&src);
+    switch (allocation_strategy) {
+      case kTfLiteAllocationStrategyUnknown:
+        // We don't know the allocation strategy, which means that the tensor
+        // doesn't own its data: we keep the copied pointer to the data.
+        break;
+      case kTfLiteAllocationStrategyNone:
+        break;
+      case kTfLiteAllocationStrategyMMap:
+        // Mmapped data is read-only and external to the interpreter. We keep
+        // the copied pointer to the data.
+        break;
+      case kTfLiteAllocationStrategyArena:
+        // Arena tensors are allocated when the graph is prepared. There is no
+        // data associated to such a tensor between runs so we don't care about
+        // the value of `data`.
+        break;
+      case kTfLiteAllocationStrategyMalloc:
+        dst.data.data = malloc(src.bytes);
+        std::memcpy(dst.data.data, src.data.data, src.bytes);
+        break;
+      case kTfLiteAllocationStrategyNew:
+        // Special case for variant objects. They are allocated using new/delete
+        // but require using the `CloneTo` function.
+        if (src.allocation_type == kTfLiteVariantObject) {
+          dst.data.data = reinterpret_cast<const VariantData*>(src.data.data)
+                              ->CloneTo(nullptr);
+        } else {
+          dst.data.data = new char[src.bytes];
+          std::memcpy(dst.data.data, src.data.data, src.bytes);
+        }
+        break;
+    }
+  }
+  dst.dims = TfLiteIntArrayCopy(src.dims);
+  dst.dims_signature = TfLiteIntArrayCopy(src.dims_signature);
+  dst.quantization = TfLiteQuantizationClone(src.quantization);
+  dst.sparsity = TfLiteSparsityClone(src.sparsity);
+  return dst;
+}
+
 void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
                        TfLiteQuantizationParams quantization, char* buffer,
                        size_t size, TfLiteAllocationType allocation_type,
@@ -334,6 +437,14 @@ TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
 TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
   return TfLiteTensorResizeMaybeCopy(num_bytes, tensor, true);
 }
+
+const TfLiteIntArray* TfLiteTensorGetDimsSignature(const TfLiteTensor* t) {
+  if (t->dims_signature != nullptr && t->dims_signature->size != 0) {
+    return t->dims_signature;
+  } else {
+    return t->dims;
+  }
+}
 #endif  // TF_LITE_STATIC_MEMORY
 
 const char* TfLiteTypeGetName(TfLiteType type) {
@@ -399,11 +510,13 @@ TfLiteAllocationStrategy TfLiteTensorGetAllocationStrategy(
     case kTfLiteDynamic:
       return kTfLiteAllocationStrategyMalloc;
     case kTfLitePersistentRo:
-      return kTfLiteAllocationStrategyUnknown;
+      return kTfLiteAllocationStrategyMalloc;
     case kTfLiteCustom:
       return kTfLiteAllocationStrategyUnknown;
     case kTfLiteVariantObject:
       return kTfLiteAllocationStrategyNew;
+    case kTfLiteNonCpu:
+      return kTfLiteAllocationStrategyUnknown;
   }
   return kTfLiteAllocationStrategyUnknown;
 }
@@ -428,6 +541,8 @@ TfLiteRunStability TfLiteTensorGetBufferAddressStability(
       return kTfLiteRunStabilityUnknown;
     case kTfLiteVariantObject:
       return kTfLiteRunStabilityAcrossRuns;
+    case kTfLiteNonCpu:
+      return kTfLiteRunStabilityUnknown;
   }
   return kTfLiteRunStabilityUnknown;
 }
@@ -451,6 +566,8 @@ TfLiteRunStability TfLiteTensorGetDataStability(const TfLiteTensor* const t) {
       return kTfLiteRunStabilityUnknown;
     case kTfLiteVariantObject:
       return kTfLiteRunStabilitySingleRun;
+    case kTfLiteNonCpu:
+      return kTfLiteRunStabilityUnknown;
   }
   return kTfLiteRunStabilityUnknown;
 }
@@ -477,11 +594,13 @@ TfLiteRunStep TfLiteTensorGetDataKnownStep(const TfLiteTensor* t) {
       return kTfLiteRunStepUnknown;
     case kTfLiteVariantObject:
       return kTfLiteRunStepEval;
+    case kTfLiteNonCpu:
+      return kTfLiteRunStepUnknown;
   }
   return kTfLiteRunStepUnknown;
 }
 
-// Returns the operation steop when the shape of a tensor is computed.
+// Returns the operation step when the shape of a tensor is computed.
 //
 // Some operations can precompute the shape of their results before the
 // evaluation step. This makes the shape available earlier for subsequent
@@ -504,6 +623,8 @@ TfLiteRunStep TfLiteTensorGetShapeKnownStep(const TfLiteTensor* t) {
       return kTfLiteRunStepUnknown;
     case kTfLiteVariantObject:
       return kTfLiteRunStepEval;
+    case kTfLiteNonCpu:
+      return kTfLiteRunStepUnknown;
   }
   return kTfLiteRunStepUnknown;
 }

@@ -396,6 +396,9 @@ typedef union TfLitePtrUnion {
 ///  * `kTfLiteVariantObject`: Allocation is an arbitrary type-erased C++
 ///  object.
 ///        Allocation and deallocation are done through `new` and `delete`.
+///  * `kTfLiteNonCpu`: Tensor buffer is in non-CPU memory, such as AHWB, GPU
+///        memory. This tensor is not accessed by the CPU.
+///        This is only used by LiteRt API.
 typedef enum TfLiteAllocationType {
   kTfLiteMemNone = 0,
   kTfLiteMmapRo,
@@ -405,6 +408,7 @@ typedef enum TfLiteAllocationType {
   kTfLitePersistentRo,
   kTfLiteCustom,
   kTfLiteVariantObject,
+  kTfLiteNonCpu,
 } TfLiteAllocationType;
 
 /// Memory allocation strategies.
@@ -553,8 +557,10 @@ typedef struct TfLiteTensor {
   /// only populated when unknown dimensions exist in a read-write tensor (i.e.
   /// an input or output tensor). (e.g.  `dims` contains [1, 1, 1, 3] and
   /// `dims_signature` contains [1, -1, -1, 3]).  If no unknown dimensions exist
-  /// then `dims_signature` is either null, or set to an empty array.  Note that
-  /// this field only exists when TF_LITE_STATIC_MEMORY is not defined.
+  /// then `dims_signature` is either null, or set to an empty array.  Use
+  /// `TfLiteTensorGetDimsSignature` to get `dims_signature` if non-empty or
+  /// otherwise fallback to `dims`.  Note that this field only exists when
+  /// TF_LITE_STATIC_MEMORY is not defined.
   const TfLiteIntArray* dims_signature;
 } TfLiteTensor;
 
@@ -734,6 +740,9 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
 /// quantization, sparsity, ...
 TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst);
 
+/// Returns a tensor holding a deep copy of src.
+TfLiteTensor TfLiteTensorClone(TfLiteTensor src);
+
 /// Change the size of the memory block owned by `tensor` to `num_bytes`.
 /// Tensors with allocation types other than `kTfLiteDynamic` will be ignored
 /// and a `kTfLiteOk` will be returned. `tensor`'s internal data buffer will be
@@ -753,6 +762,12 @@ TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
 /// start of the region up to the minimum of the old and new sizes. In the case
 /// of NULL tensor, or an error allocating new memory, returns `kTfLiteError`.
 TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
+
+/// Returns the shape of the tensor, with -1 for any unknown dimension sizes.
+/// If any dimension is unknown, this is the same as `t->dims_signature`.
+/// If all dimensions are known, this is the same as `t->dims`.
+/// (`dims_signature` is NULL or empty if all dimensions are known.)
+const TfLiteIntArray* TfLiteTensorGetDimsSignature(const TfLiteTensor* t);
 #endif  // TF_LITE_STATIC_MEMORY
 
 /// WARNING: This is an experimental interface that is subject to change.

@@ -92,6 +92,26 @@ void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
   }
 }
 
+void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements,
+                           int8_t* dst_buffer) {
+  // num_elements means the number of elements regardless of packed or unpacked.
+  // For example, 3 elements means both
+  //   1) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes.
+  //      stored in src_buffer[0] and src_buffer[1] (i = 0..1)
+  //   2) Unpacked: 3 int8's = 3 bytes.
+  //      stored in dst_buffer[0], dst_buffer[1] and dst_buffer[2] (j = 0..2)
+  for (int i = 0; i < num_elements - 1; i += 2) {
+    dst_buffer[i / 2] = src_buffer[i] & 0x0F;
+    dst_buffer[i / 2] |= src_buffer[i + 1] << 4;
+  }
+  auto packed_size = (num_elements + 1) / 2;
+
+  // Copy the final nibble if the buffer is odd-lengthed
+  if (num_elements % 2 != 0) {
+    dst_buffer[packed_size - 1] = src_buffer[num_elements - 1] & 0x0F;
+  }
+}
+
 }  // namespace tensor_utils
 }  // namespace tflite
 

@@ -617,6 +617,20 @@ void ApplySignbitToVector(const float* __restrict__ vector, int v_size,
 void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
                              int8_t* dst_buffer);
 
+// Pack `src_buffer` into a densely packed buffer of int4 values.
+// Parameters:
+//   src_buffer   : Buffer containing int4 values stored in int8 memory.
+//   num_elements : Number of elements stored in the buffer. Note that this can
+//                  be smaller than the size of `src_buffer` by 1 if it's odd,
+//                  in which case the last nibble in `src_buffer` is ignored.
+//                  This should be equal to the size of `dst_buffer`.
+//   dst_buffer   : Buffer to pack into. Should be allocated by the caller.
+//                  Size should be at least `num_elements`.
+// Notes:
+//   For example, given `src_buffer = {0x02, 0x01, 0x04, 0x03}`, calling this
+//   function will return `dst_buffer = {0x12, 0x34}`.
+void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements,
+                           int8_t* dst_buffer);
 }  // namespace tensor_utils
 
 }  // namespace tflite

@@ -21,10 +21,12 @@
 """
 
 import copy
+import functools
 import random
 import re
 import struct
 import sys
+from typing import Optional, Type, TypeVar, Union
 
 import flatbuffers
 
@@ -453,3 +455,69 @@ def count_resource_variables(model):
       if builtin_code == schema_fb.BuiltinOperator.VAR_HANDLE:
         unique_shared_names.add(op.builtinOptions.sharedName)
   return len(unique_shared_names)
+
+
+OptsT = TypeVar('OptsT')
+
+
+def get_options_as(
+    op: Union[schema_fb.Operator, schema_fb.OperatorT], opts_type: Type[OptsT]
+) -> Optional[OptsT]:
+  """Get the options of an operator as the specified type.
+
+  Requested type must be an object-api type (ends in 'T').
+
+  Args:
+    op: The operator to get the options from.
+    opts_type: The type of the options to get.
+
+  Returns:
+    The options as the specified type, or None if the options are not of the
+    specified type.
+
+  Raises:
+    ValueError: If the specified type is not a valid options type.
+  """
+
+  err = ValueError(f'Unsupported options type: {opts_type}')
+  type_name: str = opts_type.__name__
+  if not type_name.endswith('T'):
+    raise err
+  base_type_name = type_name.removesuffix('T')
+  is_opt_1_type = hasattr(schema_fb.BuiltinOptions, base_type_name)
+  if not is_opt_1_type and not hasattr(
+      schema_fb.BuiltinOptions2, base_type_name
+  ):
+    raise err
+
+  @functools.singledispatch
+  def _get_opts(unused_op):
+    return None
+
+  @_get_opts.register
+  def _(op: schema_fb.Operator):
+    if not is_opt_1_type:
+      enum_val = getattr(schema_fb.BuiltinOptions2, base_type_name)
+      opts_creator = schema_fb.BuiltinOptions2Creator
+      raw_ops = op.BuiltinOptions2()
+      actual_enum_val = op.BuiltinOptions2Type()
+    else:
+      enum_val = getattr(schema_fb.BuiltinOptions, base_type_name)
+      opts_creator = schema_fb.BuiltinOptionsCreator
+      raw_ops = op.BuiltinOptions()
+      actual_enum_val = op.BuiltinOptionsType()
+    if raw_ops is None or actual_enum_val != enum_val:
+      return None
+    return opts_creator(enum_val, raw_ops)
+
+  @_get_opts.register
+  def _(op: schema_fb.OperatorT):
+    if is_opt_1_type:
+      raw_ops_t = op.builtinOptions
+    else:
+      raw_ops_t = op.builtinOptions2
+    if raw_ops_t is None or not isinstance(raw_ops_t, opts_type):
+      return None
+    return raw_ops_t
+
+  return _get_opts(op)