Skip to content

Commit 3cb0f62

Browse files
committed
fix
1 parent 635c5b4 commit 3cb0f62

39 files changed

+403
-881
lines changed

be/src/olap/rowset/segment_v2/variant/binary_column_reader.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <algorithm>
2121
#include <tuple>
2222

23+
#include "olap/rowset/segment_v2/segment.h"
2324
#include "vec/columns/column_array.h"
2425
#include "vec/columns/column_map.h"
2526
#include "vec/columns/column_string.h"
@@ -32,6 +33,49 @@ namespace doris::segment_v2 {
3233

3334
#include "common/compile_check_begin.h"
3435

36+
Status DummyBinaryColumnReader::new_binary_column_iterator(ColumnIteratorUPtr* iter) const {
37+
static const TabletColumn binary_column = []() {
38+
TabletColumn binary_column;
39+
binary_column.set_name("binary_column");
40+
binary_column.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
41+
binary_column.set_default_value("NULL");
42+
TabletColumn child_tcolumn;
43+
child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
44+
binary_column.add_sub_column(child_tcolumn);
45+
binary_column.add_sub_column(child_tcolumn);
46+
binary_column.set_is_nullable(false);
47+
return binary_column;
48+
}();
49+
RETURN_IF_ERROR(Segment::new_default_iterator(binary_column, iter));
50+
return Status::OK();
51+
}
52+
53+
Status DummyBinaryColumnReader::add_binary_column_reader(std::shared_ptr<ColumnReader> reader,
54+
uint32_t index) {
55+
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
56+
"DummyBinaryColumnReader does not support add_binary_column_reader");
57+
}
58+
59+
std::pair<std::shared_ptr<ColumnReader>, std::string>
60+
DummyBinaryColumnReader::select_reader_and_cache_key(const std::string& relative_path) const {
61+
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
62+
"DummyBinaryColumnReader does not support select_reader_and_cache_key");
63+
}
64+
65+
std::shared_ptr<ColumnReader> DummyBinaryColumnReader::select_reader(uint32_t index) const {
66+
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
67+
"DummyBinaryColumnReader does not support select_reader");
68+
}
69+
70+
uint32_t DummyBinaryColumnReader::num_buckets() const {
71+
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
72+
"DummyBinaryColumnReader does not support num_buckets");
73+
}
74+
75+
BinaryColumnType DummyBinaryColumnReader::get_type() const {
76+
return BinaryColumnType::DUMMY;
77+
}
78+
3579
Status SingleSparseColumnReader::add_binary_column_reader(std::shared_ptr<ColumnReader> reader,
3680
uint32_t /*index*/) {
3781
if (_single_reader) {

be/src/olap/rowset/segment_v2/variant/binary_column_reader.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ enum class BinaryColumnType {
3636
SINGLE_SPARSE = 0,
3737
MULTIPLE_SPARSE = 1,
3838
MULTIPLE_DOC_VALUE = 2,
39+
DUMMY = 3,
3940
};
4041

4142
// Combine multiple bucket binary iterators into one logical iterator.
@@ -60,6 +61,19 @@ class BinaryColumnReader {
6061
virtual BinaryColumnType get_type() const = 0;
6162
};
6263

64+
// Dummy binary column reader for variant column without any binary data.
65+
// for example, old version variant column without any binary data.
66+
class DummyBinaryColumnReader : public BinaryColumnReader {
67+
public:
68+
Status new_binary_column_iterator(ColumnIteratorUPtr* iter) const override;
69+
Status add_binary_column_reader(std::shared_ptr<ColumnReader> reader, uint32_t index) override;
70+
std::pair<std::shared_ptr<ColumnReader>, std::string> select_reader_and_cache_key(
71+
const std::string& relative_path) const override;
72+
std::shared_ptr<ColumnReader> select_reader(uint32_t index) const override;
73+
uint32_t num_buckets() const override;
74+
BinaryColumnType get_type() const override;
75+
};
76+
6377
class SingleSparseColumnReader : public BinaryColumnReader {
6478
public:
6579
Status add_binary_column_reader(std::shared_ptr<ColumnReader> reader, uint32_t index) override;

be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ Status HierarchicalDataIterator::create(ColumnIteratorUPtr* reader, int32_t col_
4949
ColumnReaderCache* column_reader_cache,
5050
OlapReaderStatistics* stats, ReadType read_type) {
5151
// None leave node need merge with root
52-
std::unique_ptr<HierarchicalDataIterator> stream_iter(new HierarchicalDataIterator(path));
53-
if (node != nullptr && read_type != ReadType::MERGE_ROOT_DOC) {
52+
std::unique_ptr<HierarchicalDataIterator> stream_iter(
53+
new HierarchicalDataIterator(path, read_type));
54+
if (node != nullptr && read_type == ReadType::SUBCOLUMNS_AND_SPARSE) {
5455
std::vector<const SubcolumnColumnMetaInfo::Node*> leaves;
5556
vectorized::PathsInData leaves_paths;
5657
SubcolumnColumnMetaInfo::get_leaves_of_node(node, leaves, leaves_paths);
@@ -68,7 +69,6 @@ Status HierarchicalDataIterator::create(ColumnIteratorUPtr* reader, int32_t col_
6869
// need read from sparse column if not null
6970
stream_iter->_binary_column_reader = std::move(binary_column_reader);
7071
stream_iter->_stats = stats;
71-
stream_iter->_read_type = read_type;
7272
*reader = std::move(stream_iter);
7373

7474
return Status::OK();
@@ -84,7 +84,7 @@ Status HierarchicalDataIterator::init(const ColumnIteratorOptions& opts) {
8484
RETURN_IF_ERROR(_root_reader->iterator->init(opts));
8585
_root_reader->inited = true;
8686
}
87-
if (_binary_column_reader && !_binary_column_reader->inited) {
87+
if (!_binary_column_reader->inited) {
8888
RETURN_IF_ERROR(_binary_column_reader->iterator->init(opts));
8989
_binary_column_reader->inited = true;
9090
}
@@ -100,10 +100,8 @@ Status HierarchicalDataIterator::seek_to_ordinal(ordinal_t ord) {
100100
DCHECK(_root_reader->inited);
101101
RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord));
102102
}
103-
if (_binary_column_reader) {
104-
DCHECK(_binary_column_reader->inited);
105-
RETURN_IF_ERROR(_binary_column_reader->iterator->seek_to_ordinal(ord));
106-
}
103+
DCHECK(_binary_column_reader->inited);
104+
RETURN_IF_ERROR(_binary_column_reader->iterator->seek_to_ordinal(ord));
107105
return Status::OK();
108106
}
109107

@@ -319,18 +317,12 @@ Status HierarchicalDataIterator::_process_sparse_column(
319317
vectorized::ColumnVariant& container_variant, size_t nrows) {
320318
using namespace vectorized;
321319
container_variant.clear_sparse_column();
322-
if (!_binary_column_reader) {
323-
container_variant.get_sparse_column()->assume_mutable()->resize(nrows);
324-
container_variant.get_doc_value_column()->assume_mutable()->resize(nrows);
325-
ENABLE_CHECK_CONSISTENCY(&container_variant);
326-
return Status::OK();
327-
}
328320
// process sparse column
329321
if (_path.get_parts().empty()) {
330-
if (_read_type == ReadType::MERGE_ROOT_SPARSE) {
322+
if (_read_type == ReadType::SUBCOLUMNS_AND_SPARSE) {
331323
container_variant.set_sparse_column(_binary_column_reader->column->get_ptr());
332324
container_variant.get_doc_value_column()->assume_mutable()->resize(nrows);
333-
} else if (_read_type == ReadType::MERGE_ROOT_DOC) {
325+
} else if (_read_type == ReadType::DOC_VALUE_COLUMN) {
334326
container_variant.set_doc_value_column(_binary_column_reader->column->get_ptr());
335327
container_variant.get_sparse_column()->assume_mutable()->resize(nrows);
336328
} else {
@@ -402,7 +394,8 @@ Status HierarchicalDataIterator::_process_sparse_column(
402394
lower_bound_index);
403395
}
404396
// Case 2: subcolumn not created yet and we still have quota → create it and insert.
405-
else if (subcolumns_from_sparse_column.size() < count) {
397+
else if (subcolumns_from_sparse_column.size() < count ||
398+
container_variant.max_subcolumns_count() == 0) {
406399
// Initialize subcolumn with current logical row index i to align sizes.
407400
ColumnVariant::Subcolumn subcolumn(/*size*/ i, /*is_nullable*/ true,
408401
false);
@@ -477,9 +470,7 @@ Status HierarchicalDataIterator::_init_null_map_and_clear_columns(
477470
return Status::OK();
478471
}));
479472
container->clear();
480-
if (_binary_column_reader) {
481-
_binary_column_reader->column->clear();
482-
}
473+
_binary_column_reader->column->clear();
483474
if (_root_reader) {
484475
if (_root_reader->column->is_nullable()) {
485476
// fill nullmap

be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,10 @@ using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;
6666
// Reader for hierarchical data for variant, merge with root(sparse encoded columns)
6767
class HierarchicalDataIterator : public ColumnIterator {
6868
public:
69-
// Currently three types of read, merge sparse columns with root columns, merge doc columns with root columns, or read directly
7069
enum class ReadType {
71-
MERGE_ROOT_SPARSE = 0,
72-
MERGE_ROOT_DOC = 1,
73-
READ_DIRECT = 2,
70+
SUBCOLUMNS_AND_SPARSE = 0,
71+
DOC_VALUE_COLUMN = 1,
7472
};
75-
7673
static Status create(ColumnIteratorUPtr* reader, int32_t col_uid, vectorized::PathInData path,
7774
const SubcolumnColumnMetaInfo::Node* target_node,
7875
std::unique_ptr<SubstreamIterator>&& sparse_reader,
@@ -101,9 +98,9 @@ class HierarchicalDataIterator : public ColumnIterator {
10198
size_t _rows_read = 0;
10299
vectorized::PathInData _path;
103100
OlapReaderStatistics* _stats = nullptr;
104-
ReadType _read_type = ReadType::READ_DIRECT;
105-
106-
HierarchicalDataIterator(const vectorized::PathInData& path) : _path(path) {}
101+
ReadType _read_type = ReadType::SUBCOLUMNS_AND_SPARSE;
102+
HierarchicalDataIterator(const vectorized::PathInData& path, ReadType read_type)
103+
: _path(path), _read_type(read_type) {}
107104

108105
template <typename NodeFunction>
109106
Status tranverse(NodeFunction&& node_func) {

0 commit comments

Comments
 (0)