Skip to content

Commit 6995fc5

Browse files
authored
feat(test): add tests for global index (#41)
1 parent 580b53a commit 6995fc5

File tree

6 files changed

+188
-8
lines changed

6 files changed

+188
-8
lines changed

src/paimon/common/global_index/bitmap_topk_global_index_result.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,13 @@ const std::vector<float>& BitmapTopKGlobalIndexResult::GetScores() const {
143143
}
144144

145145
std::string BitmapTopKGlobalIndexResult::ToString() const {
146-
return fmt::format("row ids: {}, scores: {{{}}}", bitmap_.ToString(), fmt::join(scores_, ","));
146+
std::vector<std::string> formatted_scores;
147+
formatted_scores.reserve(scores_.size());
148+
for (const auto& score : scores_) {
149+
formatted_scores.push_back(fmt::format("{:.2f}", score));
150+
}
151+
return fmt::format("row ids: {}, scores: {{{}}}", bitmap_.ToString(),
152+
fmt::join(formatted_scores, ","));
147153
}
148154

149155
} // namespace paimon

src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,10 @@ TEST_F(BitmapTopKGlobalIndexResultTest, TestAndBitmapResult) {
143143
ASSERT_OK_AND_ASSIGN(auto result, index_result1->And(index_result2));
144144
ASSERT_EQ(result->ToString(), expected_str);
145145
};
146-
check_and_result({1, 2, 3}, {1.1f, 1.2f, 1.3f}, {1, 2, 7}, "row ids: {1,2}, scores: {1.1,1.2}");
146+
check_and_result({1, 2, 3}, {1.1f, 1.2f, 1.3f}, {1, 2, 7},
147+
"row ids: {1,2}, scores: {1.10,1.20}");
147148
check_and_result({1, 2, 3}, {100.1f, 100.2f, 100.3f}, {1, 2, 3},
148-
"row ids: {1,2,3}, scores: {100.1,100.2,100.3}");
149+
"row ids: {1,2,3}, scores: {100.10,100.20,100.30}");
149150
check_and_result({1, 2, 3}, {1.1f, 1.2f, 1.3f}, {100, 200, 300}, "row ids: {}, scores: {}");
150151
check_and_result({1, 2, 3}, {1.1f, 1.2f, 1.3f}, {}, "row ids: {}, scores: {}");
151152
check_and_result({}, {}, {}, "row ids: {}, scores: {}");
@@ -177,14 +178,14 @@ TEST_F(BitmapTopKGlobalIndexResultTest, TestOr) {
177178
ASSERT_EQ(result->ToString(), expected_str);
178179
};
179180
check_or_result({1, 2, 3}, {1.1f, 1.2f, 1.3f}, {100, 200, 300}, {100.1f, 200.1f, 300.1f},
180-
"row ids: {1,2,3,100,200,300}, scores: {1.1,1.2,1.3,100.1,200.1,300.1}");
181+
"row ids: {1,2,3,100,200,300}, scores: {1.10,1.20,1.30,100.10,200.10,300.10}");
181182
check_or_result({1, 2, 3}, {1.1f, 1.2f, 1.3f}, {}, {},
182-
"row ids: {1,2,3}, scores: {1.1,1.2,1.3}");
183+
"row ids: {1,2,3}, scores: {1.10,1.20,1.30}");
183184
check_or_result({}, {}, {}, {}, "row ids: {}, scores: {}");
184185
check_or_result(
185186
{1, 2, 3, RoaringBitmap64::MAX_VALUE}, {1.1f, 1.2f, 1.3f, 1.4f},
186187
{RoaringBitmap32::MAX_VALUE}, {0.12f},
187-
"row ids: {1,2,3,2147483647,9223372036854775807}, scores: {1.1,1.2,1.3,0.12,1.4}");
188+
"row ids: {1,2,3,2147483647,9223372036854775807}, scores: {1.10,1.20,1.30,0.12,1.40}");
188189
}
189190

190191
TEST_F(BitmapTopKGlobalIndexResultTest, TestOrBitmapResult) {
@@ -243,7 +244,7 @@ TEST_F(BitmapTopKGlobalIndexResultTest, TestAddOffset) {
243244
auto index_result = std::make_shared<BitmapTopKGlobalIndexResult>(
244245
RoaringBitmap64::From(ids), std::move(scores));
245246
ASSERT_OK_AND_ASSIGN(auto result_with_offset, index_result->AddOffset(10));
246-
ASSERT_EQ(result_with_offset->ToString(), "row ids: {11,12,13}, scores: {1.1,1.2,1.3}");
247+
ASSERT_EQ(result_with_offset->ToString(), "row ids: {11,12,13}, scores: {1.10,1.20,1.30}");
247248
}
248249
{
249250
std::vector<int64_t> ids = {};

src/paimon/core/global_index/row_range_global_index_scanner_impl.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include <vector>
2323

2424
#include "arrow/c/bridge.h"
25+
#include "arrow/c/helpers.h"
26+
#include "paimon/common/utils/scope_guard.h"
2527
#include "paimon/core/global_index/global_index_evaluator_impl.h"
2628
#include "paimon/global_index/global_indexer.h"
2729
#include "paimon/global_index/global_indexer_factory.h"
@@ -106,9 +108,11 @@ Result<std::shared_ptr<GlobalIndexReader>> RowRangeGlobalIndexScannerImpl::Creat
106108
// TODO(xinyu.lxy): c_arrow_schema may contains additional associated fields.
107109
auto arrow_field = DataField::ConvertDataFieldToArrowField(field);
108110
auto arrow_schema = arrow::schema({arrow_field});
111+
109112
ArrowSchema c_arrow_schema;
110113
PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*arrow_schema, &c_arrow_schema));
111114
auto index_io_metas = ToGlobalIndexIOMetas(entries);
115+
ScopeGuard guard([&]() { ArrowSchemaRelease(&c_arrow_schema); });
112116
return indexer->CreateReader(&c_arrow_schema, index_file_manager_, index_io_metas, pool_);
113117
}
114118

src/paimon/global_index/lumina/lumina_global_index_test.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,11 @@ TEST_F(LuminaGlobalIndexTest, TestSimple) {
200200
/*predicate*/ nullptr));
201201
CheckResult(topk_result, {3l, 1l, 2l}, {0.01f, 2.01f, 2.21f});
202202
}
203+
{
204+
// visit equal will return all rows
205+
ASSERT_OK_AND_ASSIGN(auto is_null_result, reader->VisitIsNull());
206+
ASSERT_EQ(is_null_result->ToString(), "{0,1,2,3}");
207+
}
203208
}
204209

205210
TEST_F(LuminaGlobalIndexTest, TestWithFilter) {

src/paimon/testing/utils/io_exception_helper.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,12 @@ namespace paimon::test {
5151
} \
5252
} \
5353
}
54+
55+
#define CHECK_HOOK_STATUS_WITHOUT_MESSAGE_CHECK(status) \
56+
{ \
57+
auto __s = (status); \
58+
if (!__s.ok()) { \
59+
continue; \
60+
} \
61+
}
5462
} // namespace paimon::test

test/inte/global_index_test.cpp

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,34 @@ TEST_P(GlobalIndexTest, TestWriteIndex) {
311311
/*partition=*/BinaryRow::EmptyRow(), /*bucket=*/0, /*total_buckets=*/std::nullopt,
312312
expected_data_increment, CompactIncrement({}, {}, {}));
313313
ASSERT_TRUE(expected_commit_message->TEST_Equal(*index_commit_msg_impl));
314+
315+
{
316+
// test invalid write task with none-registered index type
317+
ASSERT_NOK_WITH_MSG(
318+
GlobalIndexWriteTask::WriteIndex(
319+
table_path, "f0", "invalid",
320+
std::make_shared<IndexedSplitImpl>(split, std::vector<Range>({Range(0, 7)})),
321+
/*options=*/{}, pool_),
322+
"Unknown index type invalid, may not registered");
323+
}
324+
{
325+
// test invalid range mismatch
326+
ASSERT_NOK_WITH_MSG(
327+
GlobalIndexWriteTask::WriteIndex(
328+
table_path, "f0", "bitmap",
329+
std::make_shared<IndexedSplitImpl>(split, std::vector<Range>({Range(0, 8)})),
330+
/*options=*/{}, pool_),
331+
"specified range length 9 mismatch indexed range length 8");
332+
}
333+
{
334+
// test invalid multiple ranges
335+
ASSERT_NOK_WITH_MSG(GlobalIndexWriteTask::WriteIndex(
336+
table_path, "f0", "bitmap",
337+
std::make_shared<IndexedSplitImpl>(
338+
split, std::vector<Range>({Range(0, 6), Range(7, 7)})),
339+
/*options=*/{}, pool_),
340+
"GlobalIndexWriteTask only supports a single contiguous range.");
341+
}
314342
}
315343

316344
TEST_P(GlobalIndexTest, TestWriteIndexWithPartition) {
@@ -641,6 +669,13 @@ TEST_P(GlobalIndexTest, TestScanIndexWithRange) {
641669
index_reader->VisitEqual(Literal(FieldType::STRING, "Alice", 5)));
642670
ASSERT_EQ(index_result->ToString(), "{0,7}");
643671

672+
{
673+
// test non-exist index type
674+
ASSERT_OK_AND_ASSIGN(auto non_exist_index_reader,
675+
range_scanner->CreateReader("f0", "non-exist"));
676+
ASSERT_FALSE(non_exist_index_reader);
677+
}
678+
644679
// test evaluator
645680
ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator());
646681
auto predicate =
@@ -952,6 +987,15 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithPartition) {
952987
ASSERT_NOK_WITH_MSG(global_index_scan->CreateRangeScan(Range(0, 8)),
953988
"input range contain multiple partitions, fail to create range scan");
954989
}
990+
{
991+
// test invalid partition input
992+
ASSERT_NOK_WITH_MSG(
993+
GlobalIndexScan::Create(
994+
table_path, /*snapshot_id=*/std::nullopt,
995+
/*partitions=*/std::vector<std::map<std::string, std::string>>(), lumina_options,
996+
/*file_system=*/nullptr, pool_),
997+
"invalid input partition, supposed to be null or at least one partition");
998+
}
955999
}
9561000

9571001
TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithScore) {
@@ -1675,13 +1719,125 @@ TEST_P(GlobalIndexTest, TestScanIndexWithTwoIndexes) {
16751719
std::vector<float> query = {11.0f, 11.0f, 11.0f, 11.0f};
16761720
ASSERT_OK_AND_ASSIGN(auto topk_result, index_readers[0]->VisitTopK(1, query, /*filter=*/nullptr,
16771721
/*predicate*/ nullptr));
1678-
ASSERT_EQ(topk_result->ToString(), "row ids: {7}, scores: {0}");
1722+
ASSERT_EQ(topk_result->ToString(), "row ids: {7}, scores: {0.00}");
16791723

16801724
// query f2
16811725
ASSERT_OK_AND_ASSIGN(index_readers, range_scanner->CreateReaders("f2"));
16821726
ASSERT_EQ(index_readers.size(), 0);
16831727
}
16841728

1729+
TEST_P(GlobalIndexTest, TestIOException) {
1730+
if (GetParam() == "lance") {
1731+
return;
1732+
}
1733+
arrow::FieldVector fields = {
1734+
arrow::field("f0", arrow::utf8()), arrow::field("f1", arrow::list(arrow::float32())),
1735+
arrow::field("f2", arrow::int32()), arrow::field("f3", arrow::float64())};
1736+
1737+
auto schema = arrow::schema(fields);
1738+
std::vector<std::string> write_cols = schema->field_names();
1739+
auto src_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([
1740+
["Alice", [0.0, 0.0, 0.0, 0.0], 10, 11.1],
1741+
["Bob", [0.0, 1.0, 0.0, 1.0], 10, 12.1],
1742+
["Emily", [1.0, 0.0, 1.0, 0.0], 10, 13.1],
1743+
["Tony", [1.0, 1.0, 1.0, 1.0], 10, 14.1]
1744+
])")
1745+
.ValueOrDie();
1746+
1747+
std::map<std::string, std::string> options = {{Options::MANIFEST_FORMAT, "orc"},
1748+
{Options::FILE_FORMAT, GetParam()},
1749+
{Options::FILE_SYSTEM, "local"},
1750+
{Options::ROW_TRACKING_ENABLED, "true"},
1751+
{Options::DATA_EVOLUTION_ENABLED, "true"}};
1752+
std::map<std::string, std::string> lumina_options = {
1753+
{"lumina.dimension", "4"},
1754+
{"lumina.indextype", "bruteforce"},
1755+
{"lumina.distance.metric", "l2"},
1756+
{"lumina.encoding.type", "encoding.rawf32"},
1757+
{"lumina.search.threadcount", "10"}};
1758+
std::string table_path;
1759+
bool write_run_complete = false;
1760+
auto io_hook = IOHook::GetInstance();
1761+
for (size_t i = 0; i < 2000; i += paimon::test::RandomNumber(20, 30)) {
1762+
ScopeGuard guard([&io_hook]() { io_hook->Clear(); });
1763+
dir_ = UniqueTestDirectory::Create("local");
1764+
// create table and write data
1765+
CreateTable(/*partition_keys=*/{}, schema, options);
1766+
table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar");
1767+
ASSERT_OK_AND_ASSIGN(auto commit_msgs, WriteArray(table_path, write_cols, src_array));
1768+
ASSERT_OK(Commit(table_path, commit_msgs));
1769+
1770+
io_hook->Reset(i, IOHook::Mode::RETURN_ERROR);
1771+
// write bitmap index
1772+
auto bitmap_index_write_status =
1773+
WriteIndex(table_path, /*partition_filters=*/{}, "f0", "bitmap",
1774+
/*options=*/{}, Range(0, 3));
1775+
CHECK_HOOK_STATUS(bitmap_index_write_status, i);
1776+
// write lumina index
1777+
auto lumina_index_write_status =
1778+
WriteIndex(table_path, /*partition_filters=*/{}, "f1", "lumina",
1779+
/*options=*/lumina_options, Range(0, 3));
1780+
CHECK_HOOK_STATUS_WITHOUT_MESSAGE_CHECK(lumina_index_write_status);
1781+
write_run_complete = true;
1782+
break;
1783+
}
1784+
ASSERT_TRUE(write_run_complete);
1785+
1786+
// read for bitmap
1787+
bool read_run_complete = false;
1788+
for (size_t i = 0; i < 2000; i += paimon::test::RandomNumber(20, 30)) {
1789+
ScopeGuard guard([&io_hook]() { io_hook->Clear(); });
1790+
io_hook->Reset(i, IOHook::Mode::RETURN_ERROR);
1791+
auto predicate =
1792+
PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING,
1793+
Literal(FieldType::STRING, "Alice", 5));
1794+
auto result_fields = fields;
1795+
result_fields.insert(result_fields.begin(), SpecialFields::ValueKind().ArrowField());
1796+
auto expected_array =
1797+
arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([
1798+
[0, "Alice", [0.0, 0.0, 0.0, 0.0], 10, 11.1]
1799+
])")
1800+
.ValueOrDie();
1801+
1802+
auto plan_result = ScanGlobalIndexAndData(table_path, predicate);
1803+
CHECK_HOOK_STATUS(plan_result.status(), i);
1804+
auto plan = std::move(plan_result).value();
1805+
auto read_status = ReadData(table_path, write_cols, expected_array, predicate, plan);
1806+
CHECK_HOOK_STATUS(read_status, i);
1807+
read_run_complete = true;
1808+
break;
1809+
}
1810+
ASSERT_TRUE(read_run_complete);
1811+
1812+
// read for lumina
1813+
read_run_complete = false;
1814+
for (size_t i = 0; i < 2000; i += paimon::test::RandomNumber(20, 30)) {
1815+
ScopeGuard guard([&io_hook]() { io_hook->Clear(); });
1816+
io_hook->Reset(i, IOHook::Mode::RETURN_ERROR);
1817+
auto global_index_scan_result =
1818+
GlobalIndexScan::Create(table_path, /*snapshot_id=*/std::nullopt,
1819+
/*partitions=*/std::nullopt, lumina_options,
1820+
/*file_system=*/nullptr, pool_);
1821+
CHECK_HOOK_STATUS(global_index_scan_result.status(), i);
1822+
auto global_index_scan = std::move(global_index_scan_result).value();
1823+
auto range_scanner_result = global_index_scan->CreateRangeScan(Range(0, 3));
1824+
CHECK_HOOK_STATUS(range_scanner_result.status(), i);
1825+
auto range_scanner = std::move(range_scanner_result).value();
1826+
auto lumina_reader_result = range_scanner->CreateReader("f1", "lumina");
1827+
CHECK_HOOK_STATUS_WITHOUT_MESSAGE_CHECK(lumina_reader_result.status());
1828+
auto lumina_reader = std::move(lumina_reader_result).value();
1829+
1830+
std::vector<float> query = {1.0f, 1.0f, 1.0f, 1.1f};
1831+
auto topk_result = lumina_reader->VisitTopK(1, query, /*filter=*/nullptr,
1832+
/*predicate*/ nullptr);
1833+
CHECK_HOOK_STATUS_WITHOUT_MESSAGE_CHECK(topk_result.status());
1834+
ASSERT_EQ(topk_result.value()->ToString(), "row ids: {3}, scores: {0.01}");
1835+
read_run_complete = true;
1836+
break;
1837+
}
1838+
ASSERT_TRUE(read_run_complete);
1839+
}
1840+
16851841
std::vector<std::string> GetTestValuesForGlobalIndexTest() {
16861842
std::vector<std::string> values;
16871843
values.emplace_back("parquet");

0 commit comments

Comments
 (0)