|
| 1 | +/* |
| 2 | + * Copyright 2024-present Alibaba Inc. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +#include "paimon/common/global_index/complete_index_score_batch_reader.h" |
| 18 | + |
| 19 | +#include <cstddef> |
| 20 | + |
| 21 | +#include "arrow/api.h" |
| 22 | +#include "arrow/array/array_base.h" |
| 23 | +#include "arrow/array/array_nested.h" |
| 24 | +#include "arrow/array/util.h" |
| 25 | +#include "arrow/c/abi.h" |
| 26 | +#include "arrow/c/bridge.h" |
| 27 | +#include "arrow/scalar.h" |
| 28 | +#include "paimon/common/reader/reader_utils.h" |
| 29 | +#include "paimon/common/table/special_fields.h" |
| 30 | +#include "paimon/common/types/row_kind.h" |
| 31 | +#include "paimon/common/utils/arrow/mem_utils.h" |
| 32 | +#include "paimon/common/utils/arrow/status_utils.h" |
| 33 | +#include "paimon/status.h" |
| 34 | +namespace paimon { |
| 35 | +CompleteIndexScoreBatchReader::CompleteIndexScoreBatchReader( |
| 36 | + std::unique_ptr<BatchReader>&& reader, const std::vector<float>& scores, |
| 37 | + const std::shared_ptr<MemoryPool>& pool) |
| 38 | + : arrow_pool_(GetArrowPool(pool)), reader_(std::move(reader)), scores_(scores) {} |
| 39 | + |
| 40 | +Result<BatchReader::ReadBatch> CompleteIndexScoreBatchReader::NextBatch() { |
| 41 | + PAIMON_ASSIGN_OR_RAISE(BatchReader::ReadBatchWithBitmap batch_with_bitmap, |
| 42 | + NextBatchWithBitmap()); |
| 43 | + return ReaderUtils::ApplyBitmapToReadBatch(std::move(batch_with_bitmap), arrow_pool_.get()); |
| 44 | +} |
| 45 | + |
| 46 | +void CompleteIndexScoreBatchReader::UpdateScoreFieldIndex(const arrow::StructType* struct_type) { |
| 47 | + if (index_score_field_idx_ != -1) { |
| 48 | + return; |
| 49 | + } |
| 50 | + index_score_field_idx_ = struct_type->GetFieldIndex(SpecialFields::IndexScore().Name()); |
| 51 | + field_names_with_score_.reserve(struct_type->num_fields()); |
| 52 | + for (const auto& field : struct_type->fields()) { |
| 53 | + field_names_with_score_.push_back(field->name()); |
| 54 | + } |
| 55 | +} |
| 56 | +Result<BatchReader::ReadBatchWithBitmap> CompleteIndexScoreBatchReader::NextBatchWithBitmap() { |
| 57 | + PAIMON_ASSIGN_OR_RAISE(BatchReader::ReadBatchWithBitmap batch_with_bitmap, |
| 58 | + reader_->NextBatchWithBitmap()); |
| 59 | + if (BatchReader::IsEofBatch(batch_with_bitmap)) { |
| 60 | + return batch_with_bitmap; |
| 61 | + } |
| 62 | + if (scores_.empty()) { |
| 63 | + // Indicates score field all null. |
| 64 | + return batch_with_bitmap; |
| 65 | + } |
| 66 | + |
| 67 | + auto& [batch, bitmap] = batch_with_bitmap; |
| 68 | + auto& [c_array, c_schema] = batch; |
| 69 | + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr<arrow::Array> arrow_array, |
| 70 | + arrow::ImportArray(c_array.get(), c_schema.get())); |
| 71 | + auto struct_array = std::dynamic_pointer_cast<arrow::StructArray>(arrow_array); |
| 72 | + if (!struct_array) { |
| 73 | + return Status::Invalid("cannot cast array to StructArray in CompleteIndexScoreBatchReader"); |
| 74 | + } |
| 75 | + auto struct_type = struct_array->struct_type(); |
| 76 | + UpdateScoreFieldIndex(struct_type); |
| 77 | + |
| 78 | + // prepare index score array |
| 79 | + std::unique_ptr<arrow::ArrayBuilder> index_score_builder; |
| 80 | + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::MakeBuilder( |
| 81 | + arrow_pool_.get(), SpecialFields::IndexScore().Type(), &index_score_builder)); |
| 82 | + auto typed_builder = dynamic_cast<arrow::FloatBuilder*>(index_score_builder.get()); |
| 83 | + assert(typed_builder); |
| 84 | + PAIMON_RETURN_NOT_OK_FROM_ARROW(typed_builder->Reserve(struct_array->length())); |
| 85 | + bool all_not_null = (struct_array->length() == bitmap.Cardinality()); |
| 86 | + for (int64_t i = 0; i < struct_array->length(); i++) { |
| 87 | + if (all_not_null || bitmap.Contains(i)) { |
| 88 | + PAIMON_RETURN_NOT_OK_FROM_ARROW(typed_builder->Append(scores_[score_cursor_++])); |
| 89 | + } else { |
| 90 | + PAIMON_RETURN_NOT_OK_FROM_ARROW(typed_builder->AppendNull()); |
| 91 | + } |
| 92 | + } |
| 93 | + std::shared_ptr<arrow::Array> index_score_array; |
| 94 | + PAIMON_RETURN_NOT_OK_FROM_ARROW(typed_builder->Finish(&index_score_array)); |
| 95 | + // update index score array to struct array |
| 96 | + arrow::ArrayVector array_vec = struct_array->fields(); |
| 97 | + array_vec[index_score_field_idx_] = index_score_array; |
| 98 | + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr<arrow::StructArray> array_with_score, |
| 99 | + arrow::StructArray::Make(array_vec, field_names_with_score_)); |
| 100 | + PAIMON_RETURN_NOT_OK_FROM_ARROW( |
| 101 | + arrow::ExportArray(*array_with_score, c_array.get(), c_schema.get())); |
| 102 | + return batch_with_bitmap; |
| 103 | +} |
| 104 | +} // namespace paimon |
0 commit comments