Skip to content

Commit 92203ea

Browse files
authored
branch-3.1:[External](parquet) pass non predicates column's offset index to RowGroupReader #55795 (#57409)
bp #55795
1 parent 6e35555 commit 92203ea

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

be/src/vec/exec/format/parquet/vparquet_reader.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -809,17 +809,32 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
809809
for (size_t idx = 0; idx < _read_table_columns.size(); idx++) {
810810
const auto& read_table_col = _read_table_columns[idx];
811811
const auto& read_file_col = _read_file_columns[idx];
812-
auto conjunct_iter = _colname_to_value_range->find(read_table_col);
813-
if (_colname_to_value_range->end() == conjunct_iter) {
812+
if (!_colname_to_slot_id->contains(read_table_col)) {
813+
// equal delete may add column to read_table_col, but this column no slot_id.
814814
continue;
815815
}
816+
816817
int parquet_col_id =
817818
_file_metadata->schema().get_column(read_file_col)->physical_column_index;
818819
if (parquet_col_id < 0) {
819820
// complex type, not support page index yet.
820821
continue;
821822
}
823+
822824
auto& chunk = row_group.columns[parquet_col_id];
825+
if (chunk.offset_index_length == 0) {
826+
continue;
827+
}
828+
829+
tparquet::OffsetIndex offset_index;
830+
RETURN_IF_ERROR(page_index.parse_offset_index(chunk, off_index_buff.data(), &offset_index));
831+
_col_offsets[parquet_col_id] = offset_index;
832+
833+
auto conjunct_iter = _colname_to_value_range->find(read_table_col);
834+
if (_colname_to_value_range->end() == conjunct_iter) {
835+
continue;
836+
}
837+
823838
if (chunk.column_index_offset == 0 && chunk.column_index_length == 0) {
824839
continue;
825840
}
@@ -837,16 +852,13 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
837852
if (skipped_page_range.empty()) {
838853
continue;
839854
}
840-
tparquet::OffsetIndex offset_index;
841-
RETURN_IF_ERROR(page_index.parse_offset_index(chunk, off_index_buff.data(), &offset_index));
842855
for (int page_id : skipped_page_range) {
843856
RowRange skipped_row_range;
844857
RETURN_IF_ERROR(page_index.create_skipped_row_range(offset_index, row_group.num_rows,
845858
page_id, &skipped_row_range));
846859
// use the union row range
847860
skipped_row_ranges.emplace_back(skipped_row_range);
848861
}
849-
_col_offsets[parquet_col_id] = offset_index;
850862
}
851863
if (skipped_row_ranges.empty()) {
852864
read_whole_row_group();

0 commit comments

Comments
 (0)