Skip to content

Commit b3b68ad

Browse files
xiaoxmengmeta-codesync[bot]
authored andcommitted
fix: Use correct integer type for index bound vector creation (#405)
Summary: Pull Request resolved: #405 This diff adds support for creating integer bound vectors with the correct C++ type based on the column type (TINYINT, SMALLINT, INTEGER, BIGINT) instead of always using int64_t. Previously, `kBigintRange` filter conversion always created int64_t vectors regardless of the actual column type. This caused type mismatches when the index column was a smaller integer type. Added `INTEGER_TYPE_DISPATCH` macro to dispatch to the correct integer type at runtime Reviewed By: HuamengJiang Differential Revision: D90067705 fbshipit-source-id: c93104dafa890340b0558a110d66993f8bd3c5ce
1 parent b39bfb5 commit b3b68ad

File tree

2 files changed

+112
-12
lines changed

2 files changed

+112
-12
lines changed

dwio/nimble/index/IndexFilter.cpp

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,38 @@ using namespace velox::common;
2626

2727
namespace {
2828

29+
// Dispatch macro for integer types (TINYINT, SMALLINT, INTEGER, BIGINT).
30+
#define INTEGER_TYPE_DISPATCH(FUNC, typeKind, ...) \
31+
[&]() { \
32+
switch (typeKind) { \
33+
case TypeKind::TINYINT: \
34+
return FUNC<TypeKind::TINYINT>(__VA_ARGS__); \
35+
case TypeKind::SMALLINT: \
36+
return FUNC<TypeKind::SMALLINT>(__VA_ARGS__); \
37+
case TypeKind::INTEGER: \
38+
return FUNC<TypeKind::INTEGER>(__VA_ARGS__); \
39+
case TypeKind::BIGINT: \
40+
return FUNC<TypeKind::BIGINT>(__VA_ARGS__); \
41+
default: \
42+
NIMBLE_UNREACHABLE( \
43+
"Unsupported integer type: {}", static_cast<int>(typeKind)); \
44+
} \
45+
}()
46+
47+
template <TypeKind Kind>
48+
std::pair<VectorPtr, VectorPtr> createIntegerBoundVectors(
49+
const TypePtr& columnType,
50+
int64_t lower,
51+
int64_t upper,
52+
memory::MemoryPool* pool) {
53+
using T = typename TypeTraits<Kind>::NativeType;
54+
auto lowerVector = BaseVector::create<FlatVector<T>>(columnType, 1, pool);
55+
auto upperVector = BaseVector::create<FlatVector<T>>(columnType, 1, pool);
56+
lowerVector->set(0, static_cast<T>(lower));
57+
upperVector->set(0, static_cast<T>(upper));
58+
return {std::move(lowerVector), std::move(upperVector)};
59+
}
60+
2961
// Information extracted from a filter for building index bounds.
3062
struct FilterBoundInfo {
3163
// Whether the filter is convertible to index bounds.
@@ -153,13 +185,13 @@ void addColumnBound(
153185
const std::string& columnName,
154186
const TypePtr& columnType,
155187
const core::SortOrder& sortOrder,
156-
memory::MemoryPool* pool,
157188
std::vector<std::string>& columnNames,
158189
std::vector<TypePtr>& columnTypes,
159190
std::vector<VectorPtr>& lowerVectors,
160191
std::vector<VectorPtr>& upperVectors,
161192
bool& lowerInclusive,
162-
bool& upperInclusive) {
193+
bool& upperInclusive,
194+
memory::MemoryPool* pool) {
163195
columnNames.push_back(columnName);
164196
columnTypes.push_back(columnType);
165197

@@ -180,12 +212,13 @@ void addColumnBound(
180212
switch (filter.kind()) {
181213
case FilterKind::kBigintRange: {
182214
const auto& range = *filter.as<BigintRange>();
183-
auto lowerVector =
184-
BaseVector::create<FlatVector<int64_t>>(columnType, 1, pool);
185-
auto upperVector =
186-
BaseVector::create<FlatVector<int64_t>>(columnType, 1, pool);
187-
lowerVector->set(0, range.lower());
188-
upperVector->set(0, range.upper());
215+
auto [lowerVector, upperVector] = INTEGER_TYPE_DISPATCH(
216+
createIntegerBoundVectors,
217+
columnType->kind(),
218+
columnType,
219+
range.lower(),
220+
range.upper(),
221+
pool);
189222
addVectors(std::move(lowerVector), std::move(upperVector));
190223
break;
191224
}
@@ -325,8 +358,7 @@ void addColumnBound(
325358
}
326359

327360
default:
328-
NIMBLE_UNREACHABLE(
329-
"Unsupported filter kind: {}", static_cast<int>(filter.kind()));
361+
NIMBLE_UNREACHABLE("Unsupported filter kind: {}", filter.kind());
330362
}
331363
}
332364

@@ -386,13 +418,13 @@ std::optional<serializer::IndexBounds> convertFilterToIndexBounds(
386418
columnName,
387419
columnType,
388420
sortOrder,
389-
pool,
390421
boundColumnNames,
391422
boundColumnTypes,
392423
lowerVectors,
393424
upperVectors,
394425
lowerInclusive,
395-
upperInclusive);
426+
upperInclusive,
427+
pool);
396428

397429
// If this is a range scan (not point lookup), we must stop here.
398430
// Cannot add more columns after a range scan.

dwio/nimble/index/tests/IndexFilterTest.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,74 @@ TEST_P(IndexFilterTestWithSortOrder, bigintFilter) {
463463
}
464464
}
465465

466+
// Tests for different integer types (TINYINT, SMALLINT, INTEGER) to verify
467+
// the INTEGER_TYPE_DISPATCH macro works correctly with all integer types.
468+
TEST_P(IndexFilterTestWithSortOrder, integerTypesFilter) {
469+
// Local template function to test a single integer type with range filter.
470+
auto testIntegerType =
471+
[this]<typename T>(const TypePtr& type, int64_t lower, int64_t upper) {
472+
const auto rowType = ROW({"a"}, {type});
473+
std::unordered_map<std::string, std::unique_ptr<Filter>> filters;
474+
filters["a"] = std::make_unique<BigintRange>(lower, upper, false);
475+
auto scanSpec = createScanSpec(rowType, filters);
476+
477+
auto result = convertFilterToIndexBounds(
478+
{"a"}, sortOrders(1), rowType, *scanSpec, pool_.get());
479+
ASSERT_TRUE(result.has_value());
480+
481+
const auto& bounds = result.value();
482+
EXPECT_EQ(bounds.indexColumns, std::vector<std::string>{"a"});
483+
verifyRangeBounds<T>(bounds, "a", lower, upper);
484+
EXPECT_EQ(scanSpec->childByName("a")->filter(), nullptr);
485+
};
486+
487+
// Test all integer types.
488+
testIntegerType.template operator()<int8_t>(TINYINT(), 10, 20);
489+
testIntegerType.template operator()<int16_t>(SMALLINT(), 1000, 2000);
490+
testIntegerType.template operator()<int32_t>(INTEGER(), 100000, 200000);
491+
testIntegerType.template operator()<int64_t>(BIGINT(), 1000000, 2000000);
492+
493+
// Local template function to test mixed integer types: point lookup on first
494+
// column + range on second column.
495+
auto testMixedIntegerTypes = [this]<typename T1, typename T2>(
496+
const TypePtr& type1,
497+
int64_t pointValue,
498+
const TypePtr& type2,
499+
int64_t lower,
500+
int64_t upper) {
501+
const auto rowType = ROW({"a", "b"}, {type1, type2});
502+
std::unordered_map<std::string, std::unique_ptr<Filter>> filters;
503+
filters["a"] = std::make_unique<BigintRange>(pointValue, pointValue, false);
504+
filters["b"] = std::make_unique<BigintRange>(lower, upper, false);
505+
auto scanSpec = createScanSpec(rowType, filters);
506+
507+
auto result = convertFilterToIndexBounds(
508+
{"a", "b"}, sortOrders(2), rowType, *scanSpec, pool_.get());
509+
ASSERT_TRUE(result.has_value());
510+
511+
const auto& bounds = result.value();
512+
EXPECT_EQ(bounds.indexColumns.size(), 2);
513+
EXPECT_EQ(bounds.indexColumns[0], "a");
514+
EXPECT_EQ(bounds.indexColumns[1], "b");
515+
516+
// Point lookup on "a".
517+
verifyBound<T1>(bounds.lowerBound->bound, "a", pointValue);
518+
verifyBound<T1>(bounds.upperBound->bound, "a", pointValue);
519+
// Range on "b".
520+
verifyRangeBounds<T2>(bounds, "b", lower, upper);
521+
522+
EXPECT_EQ(scanSpec->childByName("a")->filter(), nullptr);
523+
EXPECT_EQ(scanSpec->childByName("b")->filter(), nullptr);
524+
};
525+
526+
// Test mixed integer types: TINYINT point lookup + INTEGER range.
527+
testMixedIntegerTypes.template operator()<int8_t, int32_t>(
528+
TINYINT(), 42, INTEGER(), 10000, 20000);
529+
// Test SMALLINT + BIGINT combination.
530+
testMixedIntegerTypes.template operator()<int16_t, int64_t>(
531+
SMALLINT(), 100, BIGINT(), 1000000000, 2000000000);
532+
}
533+
466534
TEST_P(IndexFilterTestWithSortOrder, doubleFilter) {
467535
const auto rowType = ROW({"a", "b"}, {DOUBLE(), DOUBLE()});
468536

0 commit comments

Comments
 (0)