Skip to content

Commit 4a2c38c

Browse files
authored
[refactor](olap) Reduce header dependencies on column_reader.h (#59324)
### What problem does this PR solve? This PR refactors header dependencies to minimize direct includes of `olap/rowset/segment_v2/column_reader.h`. Previously, a large number of headers included `column_reader.h` either directly or indirectly, which caused widespread recompilation whenever `ColumnReader` or `ColumnIterator` was modified. This made iterating on the column reader implementation expensive and slowed down development. In this change, we: - Remove unnecessary `#include "column_reader.h"` from header files - Replace includes with forward declarations where possible - Move required includes to corresponding source files As a result, changes to `column_reader.h` no longer trigger recompilation of a large set of unrelated source files, significantly reducing build time and improving developer productivity. This refactor does not introduce any functional changes. Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: apache/doris-website#1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into -->
1 parent 9f2c6ed commit 4a2c38c

24 files changed

+93
-74
lines changed

be/src/exec/rowid_fetcher.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,27 +38,22 @@
3838
#include <vector>
3939

4040
#include "bthread/countdown_event.h"
41-
#include "cloud/cloud_storage_engine.h"
42-
#include "cloud/cloud_tablet.h"
43-
#include "cloud/cloud_tablet_mgr.h"
44-
#include "cloud/config.h"
4541
#include "common/config.h"
4642
#include "common/consts.h"
4743
#include "common/exception.h"
4844
#include "common/signal_handler.h"
4945
#include "exec/tablet_info.h" // DorisNodesInfo
5046
#include "olap/olap_common.h"
5147
#include "olap/rowset/beta_rowset.h"
48+
#include "olap/rowset/segment_v2/column_reader.h"
5249
#include "olap/storage_engine.h"
5350
#include "olap/tablet_fwd.h"
54-
#include "olap/tablet_manager.h"
5551
#include "olap/tablet_schema.h"
5652
#include "olap/utils.h"
5753
#include "runtime/descriptors.h"
5854
#include "runtime/exec_env.h" // ExecEnv
5955
#include "runtime/fragment_mgr.h" // FragmentMgr
6056
#include "runtime/runtime_state.h" // RuntimeState
61-
#include "runtime/types.h"
6257
#include "runtime/workload_group/workload_group_manager.h"
6358
#include "semaphore"
6459
#include "util/brpc_client_cache.h" // BrpcClientCache
@@ -69,13 +64,11 @@
6964
#include "vec/common/assert_cast.h"
7065
#include "vec/common/string_ref.h"
7166
#include "vec/core/block.h" // Block
72-
#include "vec/data_types/data_type_factory.hpp"
7367
#include "vec/data_types/data_type_struct.h"
7468
#include "vec/data_types/serde/data_type_serde.h"
7569
#include "vec/exec/format/orc/vorc_reader.h"
7670
#include "vec/exec/format/parquet/vparquet_reader.h"
7771
#include "vec/exec/scan/file_scanner.h"
78-
#include "vec/functions/function_helpers.h"
7972
#include "vec/jsonb/serialize.h"
8073

8174
namespace doris {

be/src/olap/base_tablet.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "olap/rowset/rowset.h"
4646
#include "olap/rowset/rowset_fwd.h"
4747
#include "olap/rowset/rowset_reader.h"
48+
#include "olap/rowset/segment_v2/column_reader.h"
4849
#include "olap/tablet_fwd.h"
4950
#include "olap/txn_manager.h"
5051
#include "service/point_query_executor.h"

be/src/olap/delete_bitmap_calculator.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,22 @@
2222
#include "common/cast_set.h"
2323
#include "common/status.h"
2424
#include "olap/primary_key_index.h"
25+
#include "olap/rowset/segment_v2/indexed_column_reader.h"
26+
#include "olap/tablet_meta.h"
2527
#include "vec/data_types/data_type_factory.hpp"
2628

2729
namespace doris {
2830
#include "common/compile_check_begin.h"
31+
32+
MergeIndexDeleteBitmapCalculatorContext::MergeIndexDeleteBitmapCalculatorContext(
33+
std::unique_ptr<segment_v2::IndexedColumnIterator> iter, vectorized::DataTypePtr index_type,
34+
int32_t segment_id, size_t num_rows, size_t batch_max_size)
35+
: _iter(std::move(iter)),
36+
_index_type(std::move(index_type)),
37+
_num_rows(num_rows),
38+
_max_batch_size(batch_max_size),
39+
_segment_id(segment_id) {}
40+
2941
Status MergeIndexDeleteBitmapCalculatorContext::get_current_key(Slice& slice) {
3042
if (_cur_row_id >= _num_rows) {
3143
return Status::EndOfFile("Reach the end of file");

be/src/olap/delete_bitmap_calculator.h

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,19 @@
2727

2828
#include "common/config.h"
2929
#include "common/status.h"
30-
#include "olap/base_tablet.h"
31-
#include "olap/binlog_config.h"
32-
#include "olap/data_dir.h"
3330
#include "olap/key_coder.h"
3431
#include "olap/olap_common.h"
35-
#include "olap/rowset/rowset.h"
36-
#include "olap/rowset/rowset_meta.h"
37-
#include "olap/rowset/rowset_reader.h"
3832
#include "olap/rowset/segment_v2/segment.h"
39-
#include "olap/tablet_meta.h"
40-
#include "olap/tablet_schema.h"
41-
#include "olap/version_graph.h"
42-
#include "util/metrics.h"
43-
#include "util/once.h"
4433
#include "util/slice.h"
4534

4635
namespace doris {
36+
namespace segment_v2 {
37+
class IndexedColumnIterator;
38+
class Segment;
39+
40+
} // namespace segment_v2
41+
42+
using SegmentSharedPtr = std::shared_ptr<segment_v2::Segment>;
4743

4844
class MergeIndexDeleteBitmapCalculatorContext {
4945
public:
@@ -62,12 +58,7 @@ class MergeIndexDeleteBitmapCalculatorContext {
6258

6359
MergeIndexDeleteBitmapCalculatorContext(std::unique_ptr<segment_v2::IndexedColumnIterator> iter,
6460
vectorized::DataTypePtr index_type, int32_t segment_id,
65-
size_t num_rows, size_t batch_max_size = 1024)
66-
: _iter(std::move(iter)),
67-
_index_type(index_type),
68-
_num_rows(num_rows),
69-
_max_batch_size(batch_max_size),
70-
_segment_id(segment_id) {}
61+
size_t num_rows, size_t batch_max_size = 1024);
7162
Status get_current_key(Slice& slice);
7263
Status advance();
7364
Status seek_at_or_after(Slice const& key);

be/src/olap/rowset/segment_v2/column_reader.h

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,14 @@
2424
#include <cstddef> // for size_t
2525
#include <cstdint> // for uint32_t
2626
#include <memory> // for unique_ptr
27-
#include <mutex>
2827
#include <string>
2928
#include <utility>
3029
#include <vector>
3130

3231
#include "common/config.h"
33-
#include "common/exception.h"
3432
#include "common/logging.h"
3533
#include "common/status.h" // for Status
3634
#include "io/fs/file_reader_writer_fwd.h"
37-
#include "io/fs/file_system.h"
3835
#include "io/io_common.h"
3936
#include "olap/olap_common.h"
4037
#include "olap/rowset/segment_v2/common.h"
@@ -50,9 +47,7 @@
5047
#include "util/once.h"
5148
#include "vec/columns/column.h"
5249
#include "vec/columns/column_array.h" // ColumnArray
53-
#include "vec/columns/subcolumn_tree.h"
5450
#include "vec/data_types/data_type.h"
55-
#include "vec/json/path_in_data.h"
5651

5752
namespace doris {
5853
#include "common/compile_check_begin.h"
@@ -662,7 +657,7 @@ class RowIdColumnIterator : public ColumnIterator {
662657

663658
Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override {
664659
for (size_t i = 0; i < *n; ++i) {
665-
rowid_t row_id = cast_set<uint32_t>(_current_rowid + i);
660+
const auto row_id = cast_set<uint32_t>(_current_rowid + i);
666661
GlobalRowLoacation location(_tablet_id, _rowset_id, _segment_id, row_id);
667662
dst->insert_data(reinterpret_cast<const char*>(&location), sizeof(GlobalRowLoacation));
668663
}
@@ -722,14 +717,12 @@ class RowIdColumnIteratorV2 : public ColumnIterator {
722717
// This iterator is used to read default value column
723718
class DefaultValueColumnIterator : public ColumnIterator {
724719
public:
725-
DefaultValueColumnIterator(bool has_default_value, const std::string& default_value,
726-
bool is_nullable, TypeInfoPtr type_info, int precision, int scale)
720+
DefaultValueColumnIterator(bool has_default_value, std::string default_value, bool is_nullable,
721+
TypeInfoPtr type_info, int precision, int scale)
727722
: _has_default_value(has_default_value),
728-
_default_value(default_value),
723+
_default_value(std::move(default_value)),
729724
_is_nullable(is_nullable),
730725
_type_info(std::move(type_info)),
731-
_is_default_value_null(false),
732-
_type_size(0),
733726
_precision(precision),
734727
_scale(scale) {}
735728

@@ -766,8 +759,8 @@ class DefaultValueColumnIterator : public ColumnIterator {
766759
std::string _default_value;
767760
bool _is_nullable;
768761
TypeInfoPtr _type_info;
769-
bool _is_default_value_null;
770-
size_t _type_size;
762+
bool _is_default_value_null {false};
763+
size_t _type_size {0};
771764
int _precision;
772765
int _scale;
773766
std::vector<char> _mem_value;

be/src/olap/rowset/segment_v2/column_reader_cache.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,13 @@
1818

1919
#include "agent/be_exec_version_manager.h"
2020
#include "io/fs/file_reader.h"
21-
#include "olap/rowset/segment_v2/column_reader.h"
21+
#include "olap/rowset/segment_v2/stream_reader.h"
22+
#include "olap/tablet_fwd.h"
23+
#include "vec/json/path_in_data.h"
2224

2325
namespace doris::segment_v2 {
2426

27+
class ColumnReader;
2528
class ColumnReaderCache;
2629
class ColumnMetaAccessor;
2730

@@ -31,7 +34,7 @@ using ColumnReaderCacheKey = std::pair<int32_t, vectorized::PathInData>;
3134
// This node holds the cached ColumnReader and its key.
3235
struct CacheNode {
3336
ColumnReaderCacheKey key; // key: (column uid, column path)
34-
std::shared_ptr<segment_v2::ColumnReader> reader;
37+
std::shared_ptr<ColumnReader> reader;
3538
std::chrono::steady_clock::time_point last_access; // optional if needed
3639
};
3740

be/src/olap/rowset/segment_v2/segment.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include <memory> // for unique_ptr
2828
#include <string>
2929
#include <unordered_map>
30-
#include <vector>
3130

3231
#include "agent/be_exec_version_manager.h"
3332
#include "common/status.h" // Status
@@ -37,21 +36,14 @@
3736
#include "olap/field.h"
3837
#include "olap/olap_common.h"
3938
#include "olap/page_cache.h"
40-
#include "olap/rowset/segment_v2/column_reader.h" // ColumnReader
4139
#include "olap/rowset/segment_v2/page_handle.h"
42-
#include "olap/rowset/segment_v2/variant/variant_column_reader.h"
4340
#include "olap/schema.h"
4441
#include "olap/tablet_schema.h"
45-
#include "runtime/define_primitive_type.h"
4642
#include "runtime/descriptors.h"
47-
#include "runtime/primitive_type.h"
4843
#include "util/once.h"
4944
#include "util/slice.h"
5045
#include "vec/columns/column.h"
5146
#include "vec/data_types/data_type.h"
52-
#include "vec/data_types/data_type_nullable.h"
53-
#include "vec/json/path_in_data.h"
54-
5547
namespace doris {
5648
namespace vectorized {
5749
class IDataType;
@@ -70,10 +62,21 @@ class Segment;
7062
class InvertedIndexIterator;
7163
class IndexFileReader;
7264
class IndexIterator;
65+
class ColumnReader;
66+
class ColumnIterator;
7367
class ColumnReaderCache;
7468
class ColumnMetaAccessor;
7569

7670
using SegmentSharedPtr = std::shared_ptr<Segment>;
71+
72+
struct SparseColumnCache;
73+
using SparseColumnCacheSPtr = std::shared_ptr<SparseColumnCache>;
74+
75+
// key is column path, value is the sparse column cache
76+
// now column path is only SPARSE_COLUMN_PATH, in the future, we can add more sparse column paths
77+
using PathToSparseColumnCache = std::unordered_map<std::string, SparseColumnCacheSPtr>;
78+
using PathToSparseColumnCacheUPtr = std::unique_ptr<PathToSparseColumnCache>;
79+
7780
// A Segment is used to represent a segment in memory format. When segment is
7881
// generated, it won't be modified, so this struct aimed to help read operation.
7982
// It will prepare all ColumnReader to create ColumnIterator as needed.
@@ -95,7 +98,7 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
9598
return file_cache_key(_rowset_id.to_string(), _segment_id);
9699
}
97100

98-
~Segment();
101+
~Segment() override;
99102

100103
int64_t get_metadata_size() const override;
101104
void update_metadata_size();
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "stream_reader.h"
19+
20+
#include "olap/rowset/segment_v2/column_reader.h"
21+
22+
namespace doris::segment_v2 {
23+
SubstreamIterator::SubstreamIterator(vectorized::MutableColumnPtr&& col,
24+
std::unique_ptr<ColumnIterator>&& it,
25+
std::shared_ptr<const vectorized::IDataType> t)
26+
: column(std::move(col)), iterator(std::move(it)), type(std::move(t)) {}
27+
} // namespace doris::segment_v2

be/src/olap/rowset/segment_v2/stream_reader.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
#include <memory>
2121

22-
#include "olap/rowset/segment_v2/column_reader.h"
2322
#include "vec/columns/column.h"
2423
#include "vec/columns/subcolumn_tree.h"
2524
#include "vec/data_types/data_type.h"
@@ -43,8 +42,7 @@ struct SubstreamIterator {
4342
size_t rows_read = 0;
4443
SubstreamIterator() = default;
4544
SubstreamIterator(vectorized::MutableColumnPtr&& col, std::unique_ptr<ColumnIterator>&& it,
46-
std::shared_ptr<const vectorized::IDataType> t)
47-
: column(std::move(col)), iterator(std::move(it)), type(t) {}
45+
std::shared_ptr<const vectorized::IDataType> t);
4846
};
4947

5048
// path -> SubstreamIterator

be/src/service/point_query_executor.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#include <google/protobuf/extension_set.h>
2727
#include <stdlib.h>
2828

29-
#include <climits>
3029
#include <memory>
3130
#include <unordered_map>
3231
#include <vector>
@@ -41,8 +40,7 @@
4140
#include "olap/row_cursor.h"
4241
#include "olap/rowset/beta_rowset.h"
4342
#include "olap/rowset/rowset_fwd.h"
44-
#include "olap/storage_engine.h"
45-
#include "olap/tablet_manager.h"
43+
#include "olap/rowset/segment_v2/column_reader.h"
4644
#include "olap/tablet_schema.h"
4745
#include "olap/utils.h"
4846
#include "runtime/descriptors.h"

0 commit comments

Comments
 (0)