Add support for kTfLiteInt2 to Dequantize kernels.

majiddadashi · copybara-github · commit be0f046922d7 · 2025-10-21T11:38:25.000-07:00
This change enables the Dequantize and PerChannelDequantize operations to handle 2-bit integer inputs (`kTfLiteInt2`). It includes logic to unpack the packed 2-bit integers into int8_t before performing the dequantization and adds new test cases for both per-tensor and per-channel dequantization with kTfLiteInt2.

PiperOrigin-RevId: 822207279
diff --git a/tflite/core/kernels/register.cc b/tflite/core/kernels/register.cc
@@ -179,7 +179,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
              /* max_version = */ 8);
   AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(),
              /* min_version = */ 1,
-             /* max_version = */ 6);
+             /* max_version = */ 7);
   AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
   AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(),
              /* min_version = */ 1,
diff --git a/tflite/kernels/dequantize.cc b/tflite/kernels/dequantize.cc
@@ -57,7 +57,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   TF_LITE_ENSURE(context, op_context.input != nullptr);
 
-  TF_LITE_ENSURE(context, op_context.input->type == kTfLiteInt4 ||
+  TF_LITE_ENSURE(context, op_context.input->type == kTfLiteInt2 ||
+                              op_context.input->type == kTfLiteInt4 ||
                               op_context.input->type == kTfLiteUInt8 ||
                               op_context.input->type == kTfLiteInt8 ||
                               op_context.input->type == kTfLiteInt16 ||
diff --git a/tflite/kernels/dequantize.h b/tflite/kernels/dequantize.h
@@ -72,14 +72,24 @@ inline TfLiteStatus PerChannelDequantizeImpl(TfLiteContext* context,
     per_channel_op_params.zero_point = zero_points.data();
   }
   const int8_t* input_data;
-  const size_t bytes_unpacked = input->bytes * 2;
+  size_t bytes_unpacked;
+  if (input->type == kTfLiteInt2) {
+    bytes_unpacked = input->bytes * 4;
+  } else {
+    bytes_unpacked = input->bytes * 2;
+  }
   auto unpacked_input_data = std::make_unique<int8_t[]>(bytes_unpacked);
 
   if (input->type == kTfLiteInt4) {
     tflite::tensor_utils::UnpackPackedIntToInt8(
         GetTensorData<int8_t>(input), GetTensorShape(input).FlatSize(),
         /*bit_width=*/4, unpacked_input_data.get());
     input_data = unpacked_input_data.get();
+  } else if (input->type == kTfLiteInt2) {
+    tflite::tensor_utils::UnpackPackedIntToInt8(
+        GetTensorData<int8_t>(input), GetTensorShape(input).FlatSize(),
+        /*bit_width=*/2, unpacked_input_data.get());
+    input_data = unpacked_input_data.get();
   } else {
     input_data = GetTensorData<int8_t>(input);
   }
@@ -91,6 +101,7 @@ inline TfLiteStatus PerChannelDequantizeImpl(TfLiteContext* context,
           GetTensorData<uint8_t>(input), GetTensorShape(output),
           GetTensorData<float>(output));
       break;
+    case kTfLiteInt2:
     case kTfLiteInt4:
     case kTfLiteInt8:
       reference_ops::PerChannelDequantize<int8_t>(
@@ -115,7 +126,12 @@ TfLiteStatus DequantizeImpl(TfLiteContext* context, TfLiteNode* node,
   op_params.zero_point = input->params.zero_point;
   op_params.scale = input->params.scale;
   const int8_t* input_data;
-  const size_t bytes_unpacked = input->bytes * 2;
+  size_t bytes_unpacked;
+  if (input->type == kTfLiteInt2) {
+    bytes_unpacked = input->bytes * 4;
+  } else {
+    bytes_unpacked = input->bytes * 2;
+  }
   auto unpacked_input_data = std::make_unique<int8_t[]>(bytes_unpacked);
 
   if (input->type == kTfLiteInt4) {
@@ -124,6 +140,12 @@ TfLiteStatus DequantizeImpl(TfLiteContext* context, TfLiteNode* node,
         GetTensorData<int8_t>(input), GetTensorShape(input).FlatSize(),
         /*bit_width=*/4, unpacked_input_data.get());
     input_data = unpacked_input_data.get();
+  } else if (input->type == kTfLiteInt2) {
+    // Use GetTensorShape(input).FlatSize() for num_elements.
+    tflite::tensor_utils::UnpackPackedIntToInt8(
+        GetTensorData<int8_t>(input), GetTensorShape(input).FlatSize(),
+        /*bit_width=*/2, unpacked_input_data.get());
+    input_data = unpacked_input_data.get();
   } else {
     input_data = GetTensorData<int8_t>(input);
   }
@@ -140,6 +162,7 @@ TfLiteStatus DequantizeImpl(TfLiteContext* context, TfLiteNode* node,
             GetTensorShape(output), GetTensorData<float>(output));
       }
       break;
+    case kTfLiteInt2:
     case kTfLiteInt4:
     case kTfLiteInt8:
       if (kernel_type == kReference) {
diff --git a/tflite/kernels/dequantize_test.cc b/tflite/kernels/dequantize_test.cc
@@ -19,12 +19,8 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "absl/memory/memory.h"
 #include "Eigen/Core"  // from @eigen_archive
-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "tflite/core/api/op_resolver.h"
 #include "tflite/core/interpreter.h"
-#include "tflite/kernels/internal/types.h"
 #include "tflite/kernels/test_util.h"
 #include "tflite/schema/schema_generated.h"
 
@@ -75,6 +71,15 @@ class DequantizeOpModel : public SingleOpModel {
                        data_int8.data() + data_int8.size());
   }
 
+  template <typename T>
+  void SetInputInt2(int input, const std::vector<T> data) {
+    auto non_const = *const_cast<std::vector<T>*>(&data);
+    std::vector<int8_t> data_int8(non_const.size());
+    std::copy(non_const.begin(), non_const.end(), data_int8.begin());
+    PopulateTensor2bit(input, 0, data_int8.data(),
+                       data_int8.data() + data_int8.size());
+  }
+
   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
 
  protected:
@@ -92,6 +97,15 @@ TEST(DequantizeOpTest, Int4) {
               ElementsAreArray(ArrayFloatNear({4, 3.5, -3, -3.5})));
 }
 
+TEST(DequantizeOpTest, Int2) {
+  DequantizeOpModel m(TensorType_INT2, {1, 4}, 0.5, -1, 6);
+
+  m.SetInputInt2<int8_t>(0, {1, 0, -1, -2});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear({1.0, 0.5, 0.0, -0.5})));
+}
+
 TEST(DequantizeOpTest, Uint8) {
   // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
   DequantizeOpModel m(TensorType_UINT8, {2, 5}, 0.5, 127, 1);
@@ -185,5 +199,22 @@ TEST(DequantizePerChannelOpTest, Int8) {
                   {-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64})));
 }
 
+TEST(DequantizePerChannelOpTest, Int2) {
+  // scales={0.5, 1.0}, zero_points={-1, 0}, channel_dim=0
+  DequantizePerChannelOpModel m(TensorType_INT2, {2, 2}, {0.5, 1.0}, {-1, 0}, 0,
+                                6);
+  m.SetInputInt2<int8_t>(0, {1, 0, -1, -2});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  // Dequantization formula: (val - zp) * scale
+  // Channel 0: scale=0.5, zp=-1.
+  // val=1: (1 - (-1)) * 0.5 = 1.0
+  // val=0: (0 - (-1)) * 0.5 = 0.5
+  // Channel 1: scale=1.0, zp=0
+  // val=-1: (-1 - 0) * 1.0 = -1.0
+  // val=-2: (-2 - 0) * 1.0 = -2.0
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear({1.0, 0.5, -1.0, -2.0})));
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tflite/kernels/register_ref.cc b/tflite/kernels/register_ref.cc
@@ -380,7 +380,7 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() {
              /* max_version = */ 8);
   AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE_REF(),
              /* min_version = */ 1,
-             /* max_version = */ 6);
+             /* max_version = */ 7);
   AddBuiltin(BuiltinOperator_PRELU, Register_PRELU_REF());
   AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM_REF(),
              /* min_version = */ 1,