Skip to content

Commit 55788d7

Browse files
github-actions[bot]luwei16
authored andcommitted
branch-4.0: [improve](log) Add segment file info when bitshuffle page corruption detected apache#60547 (apache#60689)
Cherry-picked from apache#60547 Co-authored-by: Luwei <814383175@qq.com>
1 parent 68db1eb commit 55788d7

File tree

3 files changed

+18
-6
lines changed

3 files changed

+18
-6
lines changed

be/src/olap/rowset/segment_v2/bitshuffle_page.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,13 +282,15 @@ inline Status parse_bit_shuffle_header(const Slice& data, size_t& num_elements,
282282
num_elements = decode_fixed32_le((const uint8_t*)&data[0]);
283283
compressed_size = decode_fixed32_le((const uint8_t*)&data[4]);
284284
num_element_after_padding = decode_fixed32_le((const uint8_t*)&data[8]);
285+
size_of_element = decode_fixed32_le((const uint8_t*)&data[12]);
285286
if (num_element_after_padding != ALIGN_UP(num_elements, 8)) {
286287
return Status::InternalError(
287288
"num of element information corrupted,"
288-
" _num_element_after_padding:{}, _num_elements:{}",
289-
num_element_after_padding, num_elements);
289+
" _num_element_after_padding:{}, _num_elements:{}, expected_padding:{},"
290+
" compressed_size:{}, size_of_element:{}, data_size:{}",
291+
num_element_after_padding, num_elements, ALIGN_UP(num_elements, 8), compressed_size,
292+
size_of_element, data.size);
290293
}
291-
size_of_element = decode_fixed32_le((const uint8_t*)&data[12]);
292294
switch (size_of_element) {
293295
case 1:
294296
case 2:

be/src/olap/rowset/segment_v2/column_reader.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1981,9 +1981,14 @@ Status FileColumnIterator::_read_data_page(const OrdinalPageIndexIterator& iter)
19811981
RETURN_IF_ERROR(
19821982
_reader->read_page(_opts, iter.page(), &handle, &page_body, &footer, _compress_codec));
19831983
// parse data page
1984-
RETURN_IF_ERROR(ParsedPage::create(std::move(handle), page_body, footer.data_page_footer(),
1985-
_reader->encoding_info(), iter.page(), iter.page_index(),
1986-
&_page));
1984+
auto st = ParsedPage::create(std::move(handle), page_body, footer.data_page_footer(),
1985+
_reader->encoding_info(), iter.page(), iter.page_index(), &_page);
1986+
if (!st.ok()) {
1987+
LOG(WARNING) << "failed to create ParsedPage, file=" << _opts.file_reader->path().native()
1988+
<< ", page_offset=" << iter.page().offset << ", page_size=" << iter.page().size
1989+
<< ", page_index=" << iter.page_index() << ", error=" << st;
1990+
return st;
1991+
}
19871992

19881993
// dictionary page is read when the first data page that uses it is read,
19891994
// this is to optimize the memory usage: when there is no query on one column, we could

be/src/olap/rowset/segment_v2/indexed_column_reader.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,11 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
165165
opts.need_check_bitmap = false;
166166
status = ParsedPage::create(std::move(handle), body, footer.data_page_footer(),
167167
_reader->encoding_info(), pp, 0, &_data_page, opts);
168+
if (!status.ok()) {
169+
LOG(WARNING) << "failed to create ParsedPage in IndexedColumnIterator, file="
170+
<< _reader->_file_reader->path().native() << ", page_offset=" << pp.offset
171+
<< ", page_size=" << pp.size << ", error=" << status;
172+
}
168173
DCHECK(_reader->_meta.ordinal_index_meta().is_root_data_page()
169174
? _reader->_meta.num_values() == _data_page.num_rows
170175
: true);

0 commit comments

Comments
 (0)