fix: Use correct integer type for index bound vector creation (#405)

xiaoxmeng · meta-codesync[bot] · commit b3b68ad2c0b1 · 2026-01-04T00:19:36.000-08:00
Summary: Pull Request resolved: #405 This diff adds support for creating integer bound vectors with the correct C++ type based on the column type (TINYINT, SMALLINT, INTEGER, BIGINT) instead of always using int64_t. Previously, `kBigintRange` filter conversion always created int64_t vectors regardless of the actual column type. This caused type mismatches when the index column was a smaller integer type. Added `INTEGER_TYPE_DISPATCH` macro to dispatch to the correct integer type at runtime Reviewed By: HuamengJiang Differential Revision: D90067705 fbshipit-source-id: c93104dafa890340b0558a110d66993f8bd3c5ce
diff --git a/dwio/nimble/index/IndexFilter.cpp b/dwio/nimble/index/IndexFilter.cpp
@@ -26,6 +26,38 @@ using namespace velox::common;
 
 namespace {
 
+// Dispatch macro for integer types (TINYINT, SMALLINT, INTEGER, BIGINT).
+#define INTEGER_TYPE_DISPATCH(FUNC, typeKind, ...)                       \
+  [&]() {                                                                \
+    switch (typeKind) {                                                  \
+      case TypeKind::TINYINT:                                            \
+        return FUNC<TypeKind::TINYINT>(__VA_ARGS__);                     \
+      case TypeKind::SMALLINT:                                           \
+        return FUNC<TypeKind::SMALLINT>(__VA_ARGS__);                    \
+      case TypeKind::INTEGER:                                            \
+        return FUNC<TypeKind::INTEGER>(__VA_ARGS__);                     \
+      case TypeKind::BIGINT:                                             \
+        return FUNC<TypeKind::BIGINT>(__VA_ARGS__);                      \
+      default:                                                           \
+        NIMBLE_UNREACHABLE(                                              \
+            "Unsupported integer type: {}", static_cast<int>(typeKind)); \
+    }                                                                    \
+  }()
+
+template <TypeKind Kind>
+std::pair<VectorPtr, VectorPtr> createIntegerBoundVectors(
+    const TypePtr& columnType,
+    int64_t lower,
+    int64_t upper,
+    memory::MemoryPool* pool) {
+  using T = typename TypeTraits<Kind>::NativeType;
+  auto lowerVector = BaseVector::create<FlatVector<T>>(columnType, 1, pool);
+  auto upperVector = BaseVector::create<FlatVector<T>>(columnType, 1, pool);
+  lowerVector->set(0, static_cast<T>(lower));
+  upperVector->set(0, static_cast<T>(upper));
+  return {std::move(lowerVector), std::move(upperVector)};
+}
+
 // Information extracted from a filter for building index bounds.
 struct FilterBoundInfo {
   // Whether the filter is convertible to index bounds.
@@ -153,13 +185,13 @@ void addColumnBound(
     const std::string& columnName,
     const TypePtr& columnType,
     const core::SortOrder& sortOrder,
-    memory::MemoryPool* pool,
     std::vector<std::string>& columnNames,
     std::vector<TypePtr>& columnTypes,
     std::vector<VectorPtr>& lowerVectors,
     std::vector<VectorPtr>& upperVectors,
     bool& lowerInclusive,
-    bool& upperInclusive) {
+    bool& upperInclusive,
+    memory::MemoryPool* pool) {
   columnNames.push_back(columnName);
   columnTypes.push_back(columnType);
 
@@ -180,12 +212,13 @@ void addColumnBound(
   switch (filter.kind()) {
     case FilterKind::kBigintRange: {
       const auto& range = *filter.as<BigintRange>();
-      auto lowerVector =
-          BaseVector::create<FlatVector<int64_t>>(columnType, 1, pool);
-      auto upperVector =
-          BaseVector::create<FlatVector<int64_t>>(columnType, 1, pool);
-      lowerVector->set(0, range.lower());
-      upperVector->set(0, range.upper());
+      auto [lowerVector, upperVector] = INTEGER_TYPE_DISPATCH(
+          createIntegerBoundVectors,
+          columnType->kind(),
+          columnType,
+          range.lower(),
+          range.upper(),
+          pool);
       addVectors(std::move(lowerVector), std::move(upperVector));
       break;
     }
@@ -325,8 +358,7 @@ void addColumnBound(
     }
 
     default:
-      NIMBLE_UNREACHABLE(
-          "Unsupported filter kind: {}", static_cast<int>(filter.kind()));
+      NIMBLE_UNREACHABLE("Unsupported filter kind: {}", filter.kind());
   }
 }
 
@@ -386,13 +418,13 @@ std::optional<serializer::IndexBounds> convertFilterToIndexBounds(
         columnName,
         columnType,
         sortOrder,
-        pool,
         boundColumnNames,
         boundColumnTypes,
         lowerVectors,
         upperVectors,
         lowerInclusive,
-        upperInclusive);
+        upperInclusive,
+        pool);
 
     // If this is a range scan (not point lookup), we must stop here.
     // Cannot add more columns after a range scan.
diff --git a/dwio/nimble/index/tests/IndexFilterTest.cpp b/dwio/nimble/index/tests/IndexFilterTest.cpp
@@ -463,6 +463,74 @@ TEST_P(IndexFilterTestWithSortOrder, bigintFilter) {
   }
 }
 
+// Tests for different integer types (TINYINT, SMALLINT, INTEGER) to verify
+// the INTEGER_TYPE_DISPATCH macro works correctly with all integer types.
+TEST_P(IndexFilterTestWithSortOrder, integerTypesFilter) {
+  // Local template function to test a single integer type with range filter.
+  auto testIntegerType =
+      [this]<typename T>(const TypePtr& type, int64_t lower, int64_t upper) {
+        const auto rowType = ROW({"a"}, {type});
+        std::unordered_map<std::string, std::unique_ptr<Filter>> filters;
+        filters["a"] = std::make_unique<BigintRange>(lower, upper, false);
+        auto scanSpec = createScanSpec(rowType, filters);
+
+        auto result = convertFilterToIndexBounds(
+            {"a"}, sortOrders(1), rowType, *scanSpec, pool_.get());
+        ASSERT_TRUE(result.has_value());
+
+        const auto& bounds = result.value();
+        EXPECT_EQ(bounds.indexColumns, std::vector<std::string>{"a"});
+        verifyRangeBounds<T>(bounds, "a", lower, upper);
+        EXPECT_EQ(scanSpec->childByName("a")->filter(), nullptr);
+      };
+
+  // Test all integer types.
+  testIntegerType.template operator()<int8_t>(TINYINT(), 10, 20);
+  testIntegerType.template operator()<int16_t>(SMALLINT(), 1000, 2000);
+  testIntegerType.template operator()<int32_t>(INTEGER(), 100000, 200000);
+  testIntegerType.template operator()<int64_t>(BIGINT(), 1000000, 2000000);
+
+  // Local template function to test mixed integer types: point lookup on first
+  // column + range on second column.
+  auto testMixedIntegerTypes = [this]<typename T1, typename T2>(
+                                   const TypePtr& type1,
+                                   int64_t pointValue,
+                                   const TypePtr& type2,
+                                   int64_t lower,
+                                   int64_t upper) {
+    const auto rowType = ROW({"a", "b"}, {type1, type2});
+    std::unordered_map<std::string, std::unique_ptr<Filter>> filters;
+    filters["a"] = std::make_unique<BigintRange>(pointValue, pointValue, false);
+    filters["b"] = std::make_unique<BigintRange>(lower, upper, false);
+    auto scanSpec = createScanSpec(rowType, filters);
+
+    auto result = convertFilterToIndexBounds(
+        {"a", "b"}, sortOrders(2), rowType, *scanSpec, pool_.get());
+    ASSERT_TRUE(result.has_value());
+
+    const auto& bounds = result.value();
+    EXPECT_EQ(bounds.indexColumns.size(), 2);
+    EXPECT_EQ(bounds.indexColumns[0], "a");
+    EXPECT_EQ(bounds.indexColumns[1], "b");
+
+    // Point lookup on "a".
+    verifyBound<T1>(bounds.lowerBound->bound, "a", pointValue);
+    verifyBound<T1>(bounds.upperBound->bound, "a", pointValue);
+    // Range on "b".
+    verifyRangeBounds<T2>(bounds, "b", lower, upper);
+
+    EXPECT_EQ(scanSpec->childByName("a")->filter(), nullptr);
+    EXPECT_EQ(scanSpec->childByName("b")->filter(), nullptr);
+  };
+
+  // Test mixed integer types: TINYINT point lookup + INTEGER range.
+  testMixedIntegerTypes.template operator()<int8_t, int32_t>(
+      TINYINT(), 42, INTEGER(), 10000, 20000);
+  // Test SMALLINT + BIGINT combination.
+  testMixedIntegerTypes.template operator()<int16_t, int64_t>(
+      SMALLINT(), 100, BIGINT(), 1000000000, 2000000000);
+}
+
 TEST_P(IndexFilterTestWithSortOrder, doubleFilter) {
   const auto rowType = ROW({"a", "b"}, {DOUBLE(), DOUBLE()});