Skip to content

Commit 1a385de

Browse files
committed
add be and fe
1 parent 31df3a1 commit 1a385de

File tree

290 files changed

+12343
-878
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

290 files changed

+12343
-878
lines changed

be/src/common/consts.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ const std::string ROW_STORE_COL = "__DORIS_ROW_STORE_COL__";
3030
const std::string DYNAMIC_COLUMN_NAME = "__DORIS_DYNAMIC_COL__";
3131
const std::string PARTIAL_UPDATE_AUTO_INC_COL = "__PARTIAL_UPDATE_AUTO_INC_COLUMN__";
3232
const std::string VIRTUAL_COLUMN_PREFIX = "__DORIS_VIRTUAL_COL__";
33-
const std::string SPARSE_COLUMN_PATH = "__DORIS_VARIANT_SPARSE__";
3433

3534
/// The maximum precision representable by a 4-byte decimal (Decimal4Value)
3635
constexpr int MAX_DECIMAL32_PRECISION = 9;

be/src/olap/olap_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,8 @@ struct OlapReaderStatistics {
448448
int64_t variant_subtree_leaf_iter_count = 0;
449449
int64_t variant_subtree_hierarchical_iter_count = 0;
450450
int64_t variant_subtree_sparse_iter_count = 0;
451+
int64_t variant_subtree_doc_snapshot_extract_iter_count = 0;
452+
int64_t variant_subtree_doc_snapshot_all_iter_count = 0;
451453
};
452454

453455
using ColumnId = uint32_t;

be/src/olap/rowset/segment_creator.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,6 @@ Status SegmentFlusher::flush_single_block(const vectorized::Block* block, int32_
6666
return Status::OK();
6767
}
6868
vectorized::Block flush_block(*block);
69-
if (_context.write_type != DataWriteType::TYPE_COMPACTION &&
70-
_context.tablet_schema->num_variant_columns() > 0) {
71-
RETURN_IF_ERROR(_parse_variant_columns(flush_block));
72-
}
7369
bool no_compression = flush_block.bytes() <= config::segment_compression_threshold_kb * 1024;
7470
if (config::enable_vertical_segment_writer) {
7571
std::unique_ptr<segment_v2::VerticalSegmentWriter> writer;
@@ -85,31 +81,6 @@ Status SegmentFlusher::flush_single_block(const vectorized::Block* block, int32_
8581
return Status::OK();
8682
}
8783

88-
Status SegmentFlusher::_internal_parse_variant_columns(vectorized::Block& block) {
89-
size_t num_rows = block.rows();
90-
if (num_rows == 0) {
91-
return Status::OK();
92-
}
93-
94-
std::vector<int> variant_column_pos;
95-
for (int i = 0; i < block.columns(); ++i) {
96-
const auto& entry = block.get_by_position(i);
97-
if (entry.type->get_primitive_type() == TYPE_VARIANT) {
98-
variant_column_pos.push_back(i);
99-
}
100-
}
101-
102-
if (variant_column_pos.empty()) {
103-
return Status::OK();
104-
}
105-
106-
vectorized::ParseConfig config;
107-
config.enable_flatten_nested = _context.tablet_schema->variant_flatten_nested();
108-
RETURN_IF_ERROR(
109-
vectorized::schema_util::parse_variant_columns(block, variant_column_pos, config));
110-
return Status::OK();
111-
}
112-
11384
Status SegmentFlusher::close() {
11485
return _seg_files.close();
11586
}

be/src/olap/rowset/segment_creator.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,6 @@ class SegmentFlusher {
139139
Status create_writer(std::unique_ptr<SegmentFlusher::Writer>& writer, uint32_t segment_id);
140140

141141
private:
142-
// This method will catch exception when allocate memory failed
143-
Status _parse_variant_columns(vectorized::Block& block) {
144-
RETURN_IF_CATCH_EXCEPTION({ return _internal_parse_variant_columns(block); });
145-
}
146-
Status _internal_parse_variant_columns(vectorized::Block& block);
147142
Status _add_rows(std::unique_ptr<segment_v2::SegmentWriter>& segment_writer,
148143
const vectorized::Block* block, size_t row_offset, size_t row_num);
149144
Status _add_rows(std::unique_ptr<segment_v2::VerticalSegmentWriter>& segment_writer,

be/src/olap/rowset/segment_v2/column_writer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,11 @@ Status ColumnWriter::create_variant_writer(const ColumnWriterOptions& opts,
295295
const TabletColumn* column, io::FileWriter* file_writer,
296296
std::unique_ptr<ColumnWriter>* writer) {
297297
if (column->is_extracted_column()) {
298+
if (column->name().find(DOC_SNAPSHOT_COLUMN_PATH) != std::string::npos) {
299+
*writer = std::make_unique<VariantCompactionDocSnapshotWriter>(
300+
opts, column, std::unique_ptr<Field>(FieldFactory::create(*column)));
301+
return Status::OK();
302+
}
298303
VLOG_DEBUG << "gen subwriter for " << column->path_info_ptr()->get_path();
299304
*writer = std::make_unique<VariantSubcolumnWriter>(
300305
opts, column, std::unique_ptr<Field>(FieldFactory::create(*column)));

be/src/olap/rowset/segment_v2/external_col_meta_util.cpp

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -172,27 +172,7 @@ Status ExternalColMetaUtil::write_external_column_meta(
172172
// 2) write pointers via proto fields
173173
footer->set_col_meta_region_start(meta_region_start);
174174

175-
// 3) clear inline columns to enable true on-demand meta loading
176-
// Note: footer->columns has already been pruned to only Top Level Columns in externalize_from_footer
177-
// But for full externalization, we might want to clear them all or keep only necessary info?
178-
// The original logic was footer->clear_columns().
179-
// If we clear columns, the Reader needs to know how to reconstruct the schema.
180-
// Currently, SegmentFooterPB.columns is used as the schema source if present.
181-
// If we clear it, Reader must rely on External Meta.
182-
// However, the Reader typically reads footer first. If columns is empty, it assumes V3 and reads external.
183-
// So yes, we should clear it.
184-
// But wait, in externalize_from_footer we carefully put Top Level columns back into footer->columns.
185-
// Why? Because in previous logic, we might want to keep roots in footer?
186-
// The previous logic: "replace Footer.columns with only the kept top-level columns".
187-
// BUT then `write_external_column_meta` calls `footer->clear_columns()` at the end!
188-
// So `footer->columns` will be empty anyway.
189-
// The only reason to reconstruct `footer->columns` in `externalize_from_footer` is if `write_external_column_meta` logic depended on it.
190-
// In my updated `write_external_column_meta`, I iterate over `all_metas` which is returned by `externalize_from_footer`.
191-
// So I don't strictly need `footer->columns` to be correct in between.
192-
// However, strictly following protocol: `externalize_from_footer` modifies footer to reflect "logical" columns (Top Level).
193-
// And then `write_external_column_meta` finalizes it by clearing them and setting pointers.
194-
195-
footer->clear_columns();
175+
// Note: footer->columns has already been pruned in externalize_from_footer
196176
return Status::OK();
197177
}
198178

be/src/olap/rowset/segment_v2/segment.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ Status Segment::new_default_iterator(const TabletColumn& tablet_column,
650650
Status Segment::new_column_iterator(const TabletColumn& tablet_column,
651651
std::unique_ptr<ColumnIterator>* iter,
652652
const StorageReadOptions* opt,
653-
const std::unordered_map<int32_t, PathToSparseColumnCacheUPtr>*
653+
const std::unordered_map<int32_t, PathToBinaryColumnCacheUPtr>*
654654
variant_sparse_column_cache) {
655655
if (opt->runtime_state != nullptr) {
656656
_be_exec_version = opt->runtime_state->be_exec_version();
@@ -675,7 +675,7 @@ Status Segment::new_column_iterator(const TabletColumn& tablet_column,
675675
}
676676
if (reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_VARIANT) {
677677
// if sparse_column_cache_ptr is nullptr, means the sparse column cache is not used
678-
PathToSparseColumnCache* sparse_column_cache_ptr = nullptr;
678+
PathToBinaryColumnCache* sparse_column_cache_ptr = nullptr;
679679
if (variant_sparse_column_cache) {
680680
auto it = variant_sparse_column_cache->find(unique_id);
681681
if (it != variant_sparse_column_cache->end()) {

be/src/olap/rowset/segment_v2/segment.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
115115
// if variant_sparse_column_cache is nullptr, means the sparse column cache is not used
116116
Status new_column_iterator(const TabletColumn& tablet_column,
117117
std::unique_ptr<ColumnIterator>* iter, const StorageReadOptions* opt,
118-
const std::unordered_map<int32_t, PathToSparseColumnCacheUPtr>*
118+
const std::unordered_map<int32_t, PathToBinaryColumnCacheUPtr>*
119119
variant_sparse_column_cache = nullptr);
120120

121121
Status new_index_iterator(const TabletColumn& tablet_column, const TabletIndex* index_meta,

be/src/olap/rowset/segment_v2/segment_iterator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
411411
if (int32_t uid = col->get_unique_id(); !_variant_sparse_column_cache.contains(uid)) {
412412
DCHECK(uid >= 0);
413413
_variant_sparse_column_cache.emplace(uid,
414-
std::make_unique<PathToSparseColumnCache>());
414+
std::make_unique<PathToBinaryColumnCache>());
415415
}
416416
}
417417
}

be/src/olap/rowset/segment_v2/segment_iterator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ class SegmentIterator : public RowwiseIterator {
516516
IndexQueryContextPtr _index_query_context;
517517

518518
// key is column uid, value is the sparse column cache
519-
std::unordered_map<int32_t, PathToSparseColumnCacheUPtr> _variant_sparse_column_cache;
519+
std::unordered_map<int32_t, PathToBinaryColumnCacheUPtr> _variant_sparse_column_cache;
520520

521521
bool _find_condition_cache = false;
522522
std::shared_ptr<std::vector<bool>> _condition_cache;

0 commit comments

Comments
 (0)