@@ -488,10 +488,36 @@ TfLiteStatus InterpreterBuilder::ParseQuantization(
488488 quantization->type = kTfLiteAffineQuantization ;
489489 auto * affine_quantization = reinterpret_cast <TfLiteAffineQuantization*>(
490490 malloc (sizeof (TfLiteAffineQuantization)));
491- affine_quantization->scale = TfLiteFloatArrayCreate (num_scales);
492- for (size_t i = 0 ; i < num_scales; ++i) {
493- affine_quantization->scale ->data [i] = src_quantization->scale ()->Get (i);
491+ // Memory optimization: When the model is mmap-backed, avoid copying scale
492+ // data by directly referencing the flatbuffer's scale array. The memory
493+ // layout of flatbuffers::Vector<float> ([uint32_t length][float data[]]) is
494+ // compatible with TfLiteFloatArray ([int size][float data[]]) on platforms
495+ // where sizeof(int) == sizeof(uint32_t).
496+ //
497+ // Note: zero_point cannot use this optimization because the flatbuffer uses
498+ // int64_t but TfLiteIntArray uses int32_t, requiring conversion.
499+ const bool can_borrow_scale =
500+ allocation_ != nullptr &&
501+ allocation_->type () == Allocation::Type::kMMap &&
502+ sizeof (int ) == sizeof (uint32_t );
503+
504+ // Initialize ownership_flags to 0 (all arrays owned by default).
505+ affine_quantization->ownership_flags = 0 ;
506+ if (can_borrow_scale) {
507+ // Borrow scale directly from mmap'd flatbuffer (zero-copy).
508+ // The flatbuffers::Vector<float> pointer points to the length field,
509+ // followed by the float data - matching TfLiteFloatArray layout.
510+ affine_quantization->scale = const_cast <TfLiteFloatArray*>(
511+ reinterpret_cast <const TfLiteFloatArray*>(src_quantization->scale ()));
512+ affine_quantization->ownership_flags |= kTfLiteQuantizationScaleBorrowed ;
513+ } else {
514+ // Copy scale data to heap (original behavior).
515+ affine_quantization->scale = TfLiteFloatArrayCreate (num_scales);
516+ for (size_t i = 0 ; i < num_scales; ++i) {
517+ affine_quantization->scale ->data [i] = src_quantization->scale ()->Get (i);
518+ }
494519 }
520+ // Zero point must always be copied due to int64_t -> int32_t conversion.
495521 if (all_zero_points_same) {
496522 affine_quantization->zero_point = TfLiteIntArrayCreate (1 );
497523 affine_quantization->zero_point ->data [0 ] = zero_point;
0 commit comments