Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tflite/core/c/c_api_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1610,8 +1610,8 @@ TEST(CApiSimple, OpaqueApiAccessors) {
TfLiteQuantization new_quantization{};
new_quantization.type = kTfLiteAffineQuantization;
TfLiteAffineQuantization* affine_quant =
(TfLiteAffineQuantization*)malloc(
sizeof(TfLiteAffineQuantization));
(TfLiteAffineQuantization*)calloc(
1, sizeof(TfLiteAffineQuantization));
affine_quant->scale = TfLiteFloatArrayCreate(1);
affine_quant->zero_point = TfLiteIntArrayCreate(1);
new_quantization.params = affine_quant;
Expand Down
10 changes: 8 additions & 2 deletions tflite/core/c/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -241,11 +241,17 @@ void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
if (quantization->type == kTfLiteAffineQuantization) {
TfLiteAffineQuantization* q_params =
reinterpret_cast<TfLiteAffineQuantization*>(quantization->params);
if (q_params->scale) {
// Only free arrays that are owned (not borrowed from mmap).
// When flag is clear (0), the array is owned and must be freed.
// When flag is set (1), the array is borrowed from mmap and must NOT be
// freed.
if (q_params->scale &&
!(q_params->ownership_flags & kTfLiteQuantizationScaleBorrowed)) {
TfLiteFloatArrayFree(q_params->scale);
q_params->scale = nullptr;
}
if (q_params->zero_point) {
if (q_params->zero_point &&
!(q_params->ownership_flags & kTfLiteQuantizationZeroPointBorrowed)) {
TfLiteIntArrayFree(q_params->zero_point);
q_params->zero_point = nullptr;
}
Expand Down
16 changes: 16 additions & 0 deletions tflite/core/c/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,24 @@ typedef struct TfLiteAffineQuantization {
TfLiteFloatArray* scale;
TfLiteIntArray* zero_point;
int32_t quantized_dimension;
// Bit flags for ownership tracking to enable zero-copy quantization.
// When a flag is set, the corresponding array points to memory-mapped (mmap)
// data and must NOT be freed. When clear, the array is heap-allocated and
// must be freed. Zero-initialization (via calloc) means "owned" by default.
// Note: scale and zero_point may have different ownership - scale can be
// borrowed from mmap while zero_point must be copied (int64->int32
// conversion).
uint8_t ownership_flags; // See kTfLiteQuantization* constants below
} TfLiteAffineQuantization;

/// Bit flags for TfLiteAffineQuantization::ownership_flags.
/// When set, the array is borrowed (from mmap) and must NOT be freed.
/// When clear, the array is owned (heap-allocated) and must be freed.
enum {
kTfLiteQuantizationScaleBorrowed = (1 << 0),
kTfLiteQuantizationZeroPointBorrowed = (1 << 1),
};

/// Parameters for blockwise quantization across the output channels dimension.
/// For a particular value in quantized_dimension, quantized values can be
/// converted back to float using:
Expand Down
4 changes: 2 additions & 2 deletions tflite/core/c/common_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ TEST(Quantization, TestQuantizationFree) {
t.quantization.type = kTfLiteAffineQuantization;
t.sparsity = nullptr;
auto* params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
params->scale = TfLiteFloatArrayCreate(3);
params->zero_point = TfLiteIntArrayCreate(3);
t.quantization.params = reinterpret_cast<void*>(params);
Expand Down Expand Up @@ -907,7 +907,7 @@ TEST(TensorCloneTest, CloneATensorAttributes) {
auto dims_signature_data = BuildTfLiteArray<int>({11, 12, 13});
TfLiteAffineQuantization* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
affine_quantization->scale = BuildTfLiteArray<float>({7, 8, 9}).release();
affine_quantization->zero_point = BuildTfLiteArray({4, 5, 6}).release();
affine_quantization->quantized_dimension = 34;
Expand Down
32 changes: 29 additions & 3 deletions tflite/core/interpreter_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -488,10 +488,36 @@ TfLiteStatus InterpreterBuilder::ParseQuantization(
quantization->type = kTfLiteAffineQuantization;
auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
affine_quantization->scale = TfLiteFloatArrayCreate(num_scales);
for (size_t i = 0; i < num_scales; ++i) {
affine_quantization->scale->data[i] = src_quantization->scale()->Get(i);
// Memory optimization: When the model is mmap-backed, avoid copying scale
// data by directly referencing the flatbuffer's scale array. The memory
// layout of flatbuffers::Vector<float> ([uint32_t length][float data[]]) is
// compatible with TfLiteFloatArray ([int size][float data[]]) on platforms
// where sizeof(int) == sizeof(uint32_t).
//
// Note: zero_point cannot use this optimization because the flatbuffer uses
// int64_t but TfLiteIntArray uses int32_t, requiring conversion.
const bool can_borrow_scale =
allocation_ != nullptr &&
allocation_->type() == Allocation::Type::kMMap &&
sizeof(int) == sizeof(uint32_t);

// Initialize ownership_flags to 0 (all arrays owned by default).
affine_quantization->ownership_flags = 0;
if (can_borrow_scale) {
// Borrow scale directly from mmap'd flatbuffer (zero-copy).
// The flatbuffers::Vector<float> pointer points to the length field,
// followed by the float data - matching TfLiteFloatArray layout.
affine_quantization->scale = const_cast<TfLiteFloatArray*>(
reinterpret_cast<const TfLiteFloatArray*>(src_quantization->scale()));
affine_quantization->ownership_flags |= kTfLiteQuantizationScaleBorrowed;
} else {
// Copy scale data to heap (original behavior).
affine_quantization->scale = TfLiteFloatArrayCreate(num_scales);
for (size_t i = 0; i < num_scales; ++i) {
affine_quantization->scale->data[i] = src_quantization->scale()->Get(i);
}
}
// Zero point must always be copied due to int64_t -> int32_t conversion.
if (all_zero_points_same) {
affine_quantization->zero_point = TfLiteIntArrayCreate(1);
affine_quantization->zero_point->data[0] = zero_point;
Expand Down
2 changes: 1 addition & 1 deletion tflite/core/model_building.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ TfLiteQuantization ToTfLiteQuantization(Quantization quantization) {
Overload([&q](NoQuantization) { q.type = kTfLiteNoQuantization; },
[&q](const AffineQuantization& src) {
q.type = kTfLiteAffineQuantization;
q.params = calloc(sizeof(TfLiteAffineQuantization), 1);
q.params = calloc(1, sizeof(TfLiteAffineQuantization));
TfLiteAffineQuantization& qa =
*reinterpret_cast<TfLiteAffineQuantization*>(q.params);
qa.quantized_dimension = src.axis;
Expand Down
2 changes: 1 addition & 1 deletion tflite/delegates/delegate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ TEST_F(TestDelegate, StaticDelegateMakesGraphImmutable) {
TfLiteQuantization quant = {};
quant.type = kTfLiteAffineQuantization;
auto quant_params = static_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
quant_params->scale = nullptr;
quant_params->zero_point = nullptr;
quant_params->quantized_dimension = 0;
Expand Down
2 changes: 1 addition & 1 deletion tflite/delegates/gpu/common/model_builder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,7 @@ class InterpreterQuantized : public DelegatedInterpreter {
TfLiteQuantization rw_quantization;
rw_quantization.type = kTfLiteAffineQuantization;
auto* rw_affine_quantization = static_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
rw_affine_quantization->scale = TfLiteFloatArrayCreate(1);
rw_affine_quantization->zero_point = TfLiteIntArrayCreate(1);
rw_affine_quantization->scale->data[0] = scale;
Expand Down
4 changes: 2 additions & 2 deletions tflite/interpreter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ TEST(BasicInterpreter, CheckQuantization) {
TfLiteQuantization rw_quantization;
rw_quantization.type = kTfLiteAffineQuantization;
auto* rw_affine_quantization = static_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
rw_affine_quantization->scale = TfLiteFloatArrayCreate(1);
rw_affine_quantization->zero_point = TfLiteIntArrayCreate(1);
rw_affine_quantization->scale->data[0] = scale;
Expand All @@ -241,7 +241,7 @@ TEST(BasicInterpreter, CheckQuantization) {
TfLiteQuantization ro_quantization;
ro_quantization.type = kTfLiteAffineQuantization;
auto* ro_affine_quantization = static_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
ro_affine_quantization->scale = TfLiteFloatArrayCreate(1);
ro_affine_quantization->zero_point = TfLiteIntArrayCreate(1);
ro_affine_quantization->scale->data[0] = scale;
Expand Down
2 changes: 1 addition & 1 deletion tflite/kernels/batch_matmul.cc
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ TfLiteTensor* GetTempRhs(TfLiteContext* context, TfLiteNode* node,
free(transposed_rhs->quantization.params);
}
transposed_rhs->quantization.params =
malloc(sizeof(TfLiteAffineQuantization));
calloc(1, sizeof(TfLiteAffineQuantization));
const auto* rhs_affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(rhs->quantization.params);
auto* transposed_rhs_affine_quantization =
Expand Down
40 changes: 20 additions & 20 deletions tflite/kernels/kernel_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ TEST_F(QuantizationParamsTest, PerChannelConvolution) {
input->params = input_quant;
input->quantization.type = kTfLiteAffineQuantization;
auto* input_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
input_params->scale = TfLiteFloatArrayCreate(1);
input_params->scale->data[0] = 0.5;
input_params->zero_point = TfLiteIntArrayCreate(1);
Expand All @@ -323,7 +323,7 @@ TEST_F(QuantizationParamsTest, PerChannelConvolution) {
filter->params = filter_quant;
filter->quantization.type = kTfLiteAffineQuantization;
auto* filter_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
filter_params->scale = TfLiteFloatArrayCreate(3);
filter_params->scale->data[0] = 0.25;
filter_params->scale->data[1] = 0.125;
Expand All @@ -344,7 +344,7 @@ TEST_F(QuantizationParamsTest, PerChannelConvolution) {
bias->params = bias_quant;
bias->quantization.type = kTfLiteAffineQuantization;
auto* bias_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
bias_params->scale = TfLiteFloatArrayCreate(3);
bias_params->scale->data[0] = 0.125;
bias_params->scale->data[1] = 0.0625;
Expand All @@ -364,7 +364,7 @@ TEST_F(QuantizationParamsTest, PerChannelConvolution) {
output->params = output_quant;
output->quantization.type = kTfLiteAffineQuantization;
auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
output_params->scale = TfLiteFloatArrayCreate(1);
output_params->scale->data[0] = 0.5;
output_params->zero_point = TfLiteIntArrayCreate(1);
Expand Down Expand Up @@ -403,7 +403,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateShift) {
input->params = input_quant;
input->quantization.type = kTfLiteAffineQuantization;
auto* input_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
input_params->scale = TfLiteFloatArrayCreate(1);
input_params->scale->data[0] = 0.5;
input_params->zero_point = TfLiteIntArrayCreate(1);
Expand All @@ -423,7 +423,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateShift) {
filter->params = filter_quant;
filter->quantization.type = kTfLiteAffineQuantization;
auto* filter_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
// Create scale of size one.
filter_params->scale = TfLiteFloatArrayCreate(1);
filter_params->scale->data[0] = 0.25;
Expand All @@ -441,7 +441,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateShift) {
bias->params = bias_quant;
bias->quantization.type = kTfLiteAffineQuantization;
auto* bias_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
bias_params->scale = TfLiteFloatArrayCreate(3);
bias_params->scale->data[0] = 0.125;
bias_params->scale->data[1] = 0.0625;
Expand All @@ -461,7 +461,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateShift) {
output->params = output_quant;
output->quantization.type = kTfLiteAffineQuantization;
auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
output_params->scale = TfLiteFloatArrayCreate(1);
output_params->scale->data[0] = 0.5;
output_params->zero_point = TfLiteIntArrayCreate(1);
Expand Down Expand Up @@ -504,7 +504,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateZeroValue) {
input->params = input_quant;
input->quantization.type = kTfLiteAffineQuantization;
auto* input_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
input_params->scale = TfLiteFloatArrayCreate(1);
input_params->scale->data[0] = 1;
input_params->zero_point = TfLiteIntArrayCreate(1);
Expand All @@ -524,7 +524,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateZeroValue) {
filter->params = filter_quant;
filter->quantization.type = kTfLiteAffineQuantization;
auto* filter_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
filter_params->scale = TfLiteFloatArrayCreate(3);
filter_params->scale->data[0] = std::ldexp(1.0f, -31);
filter_params->scale->data[1] = std::ldexp(1.0f, -32);
Expand All @@ -545,7 +545,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateZeroValue) {
bias->params = bias_quant;
bias->quantization.type = kTfLiteAffineQuantization;
auto* bias_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
bias_params->scale = TfLiteFloatArrayCreate(3);
bias_params->scale->data[0] = std::ldexp(1.0f, -31);
bias_params->scale->data[1] = std::ldexp(1.0f, -32);
Expand All @@ -565,7 +565,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateZeroValue) {
output->params = output_quant;
output->quantization.type = kTfLiteAffineQuantization;
auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
output_params->scale = TfLiteFloatArrayCreate(1);
output_params->scale->data[0] = 1;
output_params->zero_point = TfLiteIntArrayCreate(1);
Expand Down Expand Up @@ -603,7 +603,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateUint8) {
input->params = input_quant;
input->quantization.type = kTfLiteAffineQuantization;
auto* input_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
input_params->scale = TfLiteFloatArrayCreate(1);
input_params->scale->data[0] = 1;
input_params->zero_point = TfLiteIntArrayCreate(1);
Expand All @@ -623,7 +623,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateUint8) {
filter->params = filter_quant;
filter->quantization.type = kTfLiteAffineQuantization;
auto* filter_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
filter_params->scale = TfLiteFloatArrayCreate(1);
int32_t two_pow_neg_31 = 0x30000000; // 2^-31 so shift = -30.
filter_params->scale->data[0] = *reinterpret_cast<float*>(&two_pow_neg_31);
Expand All @@ -641,7 +641,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateUint8) {
bias->params = bias_quant;
bias->quantization.type = kTfLiteAffineQuantization;
auto* bias_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
bias_params->scale = TfLiteFloatArrayCreate(1);
bias_params->scale->data[0] = 4.6566129e-10; // 2^-31
bias_params->zero_point = TfLiteIntArrayCreate(1);
Expand All @@ -657,7 +657,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateUint8) {
output->params = output_quant;
output->quantization.type = kTfLiteAffineQuantization;
auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
output_params->scale = TfLiteFloatArrayCreate(1);
output_params->scale->data[0] = 1;
output_params->zero_point = TfLiteIntArrayCreate(1);
Expand Down Expand Up @@ -695,7 +695,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateWithoutBias) {
input->params = input_quant;
input->quantization.type = kTfLiteAffineQuantization;
auto* input_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
input_params->scale = TfLiteFloatArrayCreate(1);
input_params->scale->data[0] = 1;
input_params->zero_point = TfLiteIntArrayCreate(1);
Expand All @@ -715,7 +715,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateWithoutBias) {
filter->params = filter_quant;
filter->quantization.type = kTfLiteAffineQuantization;
auto* filter_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
filter_params->scale = TfLiteFloatArrayCreate(1);
int32_t two_pow_neg_31 = 0x30000000; // 2^-31 so shift = -30.
filter_params->scale->data[0] = *reinterpret_cast<float*>(&two_pow_neg_31);
Expand All @@ -733,7 +733,7 @@ TEST_F(QuantizationParamsTest, CheckAndPopulateWithoutBias) {
output->params = output_quant;
output->quantization.type = kTfLiteAffineQuantization;
auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
output_params->scale = TfLiteFloatArrayCreate(1);
output_params->scale->data[0] = 1;
output_params->zero_point = TfLiteIntArrayCreate(1);
Expand Down Expand Up @@ -770,7 +770,7 @@ TEST_F(QuantizationParamsTest, ActivationRangeQuantizedOverflow) {
output->params = output_quant;
output->quantization.type = kTfLiteAffineQuantization;
auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
output_params->scale = TfLiteFloatArrayCreate(1);
output_params->scale->data[0] = 1;
output_params->zero_point = TfLiteIntArrayCreate(1);
Expand Down
2 changes: 1 addition & 1 deletion tflite/kernels/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,7 @@ class SingleOpModel {
TfLiteQuantizationFree(&t->quantization);
t->quantization.type = kTfLiteAffineQuantization;
auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
calloc(1, sizeof(TfLiteAffineQuantization)));
affine_quantization->quantized_dimension = 0;
affine_quantization->scale = TfLiteFloatArrayCreate(1);
affine_quantization->zero_point = TfLiteIntArrayCreate(1);
Expand Down
Loading