Skip to content

Commit 691e00b

Browse files
committed
fix
1 parent 83040e8 commit 691e00b

File tree

5 files changed

+32
-7
lines changed

5 files changed

+32
-7
lines changed

src/paimon/core/global_index/indexed_split_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class IndexedSplitImpl : public IndexedSplit {
9191
Status Validate() const {
9292
if ((row_ranges_.empty() && !data_split_->DataFiles().empty()) ||
9393
(!row_ranges_.empty() && data_split_->DataFiles().empty())) {
94-
return Status::Invalid("invalid IndexedSplit: row ranges mismatch data files");
94+
return Status::Invalid("Invalid IndexedSplit: row ranges mismatch data files.");
9595
}
9696
if (!scores_.empty()) {
9797
size_t row_count = 0;

src/paimon/core/global_index/indexed_split_test.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,5 +169,24 @@ TEST(IndexedSplitTest, TestValidate) {
169169
ASSERT_NOK_WITH_MSG(split.Validate(),
170170
"Scores length does not match row ranges in indexed split.");
171171
}
172+
{
173+
std::vector<Range> row_ranges = {};
174+
IndexedSplitImpl split(data_split, row_ranges);
175+
ASSERT_NOK_WITH_MSG(split.Validate(),
176+
"Invalid IndexedSplit: row ranges mismatch data files.");
177+
}
178+
{
179+
std::vector<Range> row_ranges = {Range(10, 12)};
180+
DataSplitImpl::Builder empty_builder(
181+
/*partition=*/BinaryRow::EmptyRow(),
182+
/*bucket=*/0, /*bucket_path=*/
183+
"data/test_table/bucket-0", std::vector<std::shared_ptr<DataFileMeta>>({}));
184+
auto empty_data_split = std::dynamic_pointer_cast<DataSplitImpl>(
185+
empty_builder.WithSnapshot(1).IsStreaming(false).RawConvertible(true).Build().value());
186+
187+
IndexedSplitImpl split(empty_data_split, row_ranges);
188+
ASSERT_NOK_WITH_MSG(split.Validate(),
189+
"Invalid IndexedSplit: row ranges mismatch data files.");
190+
}
172191
}
173192
} // namespace paimon::test

src/paimon/core/schema/table_schema.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ Result<DataField> TableSchema::GetField(int32_t field_id) const {
200200
Result<std::vector<DataField>> TableSchema::GetFields(
201201
const std::vector<std::string>& field_names) const {
202202
std::vector<DataField> data_fields;
203+
data_fields.reserve(field_names.size());
203204
for (const auto& name : field_names) {
204205
PAIMON_ASSIGN_OR_RAISE(DataField field, GetField(name));
205206
data_fields.emplace_back(field);

src/paimon/core/table/source/data_evolution_batch_scan.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,25 @@ DataEvolutionBatchScan::DataEvolutionBatchScan(
3737

3838
Result<std::shared_ptr<Plan>> DataEvolutionBatchScan::CreatePlan() {
3939
std::optional<std::vector<Range>> row_ranges;
40-
if (!global_index_result_) {
40+
std::shared_ptr<GlobalIndexResult> final_global_index_result = global_index_result_;
41+
if (!final_global_index_result) {
4142
PAIMON_ASSIGN_OR_RAISE(std::optional<std::shared_ptr<GlobalIndexResult>> index_result,
4243
EvalGlobalIndex());
4344
if (index_result) {
45+
final_global_index_result = index_result.value();
4446
PAIMON_ASSIGN_OR_RAISE(row_ranges, index_result.value()->ToRanges());
4547
}
4648
} else {
47-
PAIMON_ASSIGN_OR_RAISE(row_ranges, global_index_result_->ToRanges());
49+
PAIMON_ASSIGN_OR_RAISE(row_ranges, final_global_index_result->ToRanges());
4850
}
4951
if (!row_ranges) {
5052
return batch_scan_->CreatePlan();
5153
}
5254
batch_scan_->WithRowRanges(row_ranges.value());
5355
PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<Plan> data_plan, batch_scan_->CreatePlan());
5456
std::map<int64_t, float> id_to_score;
55-
if (auto topk_result = std::dynamic_pointer_cast<TopKGlobalIndexResult>(global_index_result_)) {
57+
if (auto topk_result =
58+
std::dynamic_pointer_cast<TopKGlobalIndexResult>(final_global_index_result)) {
5659
PAIMON_ASSIGN_OR_RAISE(std::unique_ptr<TopKGlobalIndexResult::TopKIterator> topk_iter,
5760
topk_result->CreateTopKIterator());
5861
while (topk_iter->HasNext()) {
@@ -68,8 +71,10 @@ Result<std::shared_ptr<Plan>> DataEvolutionBatchScan::WrapToIndexedSplits(
6871
const std::map<int64_t, float>& id_to_score) const {
6972
std::vector<Range> sorted_row_ranges =
7073
Range::SortAndMergeOverlap(row_ranges, /*adjacent=*/true);
74+
auto data_splits = data_plan->Splits();
7175
std::vector<std::shared_ptr<Split>> indexed_splits;
72-
for (const auto& split : data_plan->Splits()) {
76+
indexed_splits.reserve(data_splits.size());
77+
for (const auto& split : data_splits) {
7378
auto data_split = std::dynamic_pointer_cast<DataSplitImpl>(split);
7479
if (!data_split) {
7580
return Status::Invalid("Cannot cast split to DataSplit when create IndexedSplit");

test/inte/global_index_test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,7 +1505,7 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithPartitionWithTwoFields) {
15051505

15061506
auto src_array1 = std::dynamic_pointer_cast<arrow::StructArray>(
15071507
arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([
1508-
["Alice", 10, 1, 11.1],
1508+
["Alice", 10, 1, 11.1],
15091509
["Bob", 10, 1, 12.1],
15101510
["Bob", 10, 1, 16.1]
15111511
])")
@@ -1575,7 +1575,7 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithPartitionWithTwoFields) {
15751575
ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate));
15761576
auto expected_array = std::dynamic_pointer_cast<arrow::StructArray>(
15771577
arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([
1578-
[0, "Alice", 10, 1, 11.1],
1578+
[0, "Alice", 10, 1, 11.1],
15791579
[0, "Bob", 10, 1, 12.1],
15801580
[0, "Bob", 10, 1, 16.1],
15811581
[0, "Lucy", 20, 1, 15.1]

0 commit comments

Comments
 (0)