Skip to content

Commit 7e261f7

Browse files
authored
[refactor](inverted_index) replace custom memory pool with doris built-in arena (apache#52334)
### What problem does this PR solve? Replace the custom memory pool implementation with Doris built-in arena to improve memory management consistency and reduce code maintenance overhead.
1 parent bf340ad commit 7e261f7

File tree

12 files changed

+42
-138
lines changed

12 files changed

+42
-138
lines changed

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/AnalyzeContext.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919

2020
namespace doris::segment_v2 {
2121

22-
AnalyzeContext::AnalyzeContext(IKMemoryPool<Cell>& pool, std::shared_ptr<Configuration> config)
22+
AnalyzeContext::AnalyzeContext(vectorized::Arena& arena, std::shared_ptr<Configuration> config)
2323
: segment_buff_(),
2424
typed_runes_(),
2525
buffer_offset_(0),
2626
cursor_(0),
2727
available_(0),
2828
last_useless_char_num_(0),
2929
buffer_locker_(0),
30-
org_lexemes_(pool),
30+
org_lexemes_(arena),
3131
path_map_(),
3232
results_(),
3333
config_(config) {

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/AnalyzeContext.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
#include "common/logging.h"
3535
#include "olap/rowset/segment_v2/inverted_index/analyzer/ik/cfg/Configuration.h"
3636
#include "olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.h"
37+
#include "vec/common/arena.h"
38+
3739
namespace doris::segment_v2 {
3840

3941
class AnalyzeContext {
@@ -80,7 +82,7 @@ class AnalyzeContext {
8082
SURROGATE_PAIR_SEGMENTER
8183
};
8284
const CharacterUtil::TypedRuneArray& getTypedRuneArray() const { return typed_runes_; }
83-
explicit AnalyzeContext(IKMemoryPool<Cell>& pool, std::shared_ptr<Configuration> config);
85+
explicit AnalyzeContext(vectorized::Arena& arena, std::shared_ptr<Configuration> config);
8486
virtual ~AnalyzeContext();
8587

8688
void reset();

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKArbitrator.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace doris::segment_v2 {
2222
void IKArbitrator::process(AnalyzeContext& context, bool use_smart) {
2323
auto org_lexemes = context.getOrgLexemes();
2424
auto org_lexeme = org_lexemes->pollFirst();
25-
LexemePath* cross_path = new LexemePath(pool_);
25+
LexemePath* cross_path = new LexemePath(arena_);
2626

2727
auto process_path = [&](LexemePath* path) {
2828
if (path->size() == 1 || !use_smart) {
@@ -43,7 +43,7 @@ void IKArbitrator::process(AnalyzeContext& context, bool use_smart) {
4343
// Find the next crossPath that does not intersect with crossPath.
4444
process_path(cross_path);
4545
// Add orgLexeme to the new crossPath
46-
cross_path = new LexemePath(pool_);
46+
cross_path = new LexemePath(arena_);
4747
cross_path->addCrossLexeme(*org_lexeme);
4848
}
4949
org_lexeme = org_lexemes->pollFirst();
@@ -56,12 +56,12 @@ void IKArbitrator::process(AnalyzeContext& context, bool use_smart) {
5656
// Performs ambiguity resolution on a given lexeme path.
5757
LexemePath* IKArbitrator::judge(Cell* lexeme_cell, size_t full_text_length) {
5858
// Candidate result path
59-
LexemePath* path_option = new LexemePath(pool_);
59+
LexemePath* path_option = new LexemePath(arena_);
6060

6161
// Traverse crossPath once and return the stack of conflicting Lexemes
6262
std::stack<Cell*, std::vector<Cell*>> lexemeStack;
6363
forwardPath(lexeme_cell, path_option, lexemeStack);
64-
LexemePath* best_path = new LexemePath(*path_option, pool_);
64+
LexemePath* best_path = new LexemePath(*path_option, arena_);
6565

6666
// Process ambiguous words if they exist
6767
while (!lexemeStack.empty()) {
@@ -71,7 +71,7 @@ LexemePath* IKArbitrator::judge(Cell* lexeme_cell, size_t full_text_length) {
7171
forwardPath(c, path_option);
7272
if (*path_option < *best_path) {
7373
delete best_path;
74-
best_path = new LexemePath(*path_option, pool_);
74+
best_path = new LexemePath(*path_option, arena_);
7575
}
7676
}
7777
delete path_option;

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKArbitrator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ namespace doris::segment_v2 {
2929

3030
class IKArbitrator {
3131
public:
32-
IKArbitrator(IKMemoryPool<Cell>& pool) : pool_(pool) {}
32+
IKArbitrator(vectorized::Arena& arena) : arena_(arena) {}
3333
// Ambiguity handling
3434
void process(AnalyzeContext& context, bool use_smart);
3535

3636
private:
37-
IKMemoryPool<Cell>& pool_;
37+
vectorized::Arena& arena_;
3838
// Ambiguity identification
3939
LexemePath* judge(Cell* lexeme_cell, size_t full_text_length);
4040

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,12 @@
1919

2020
namespace doris::segment_v2 {
2121

22-
constexpr size_t DEFAULT_MEMORY_POOL_SIZE = 512;
23-
2422
IKSegmenter::IKSegmenter(std::shared_ptr<Configuration> config)
25-
: pool_(DEFAULT_MEMORY_POOL_SIZE),
23+
: arena_(),
2624
config_(config),
27-
context_(std::make_unique<AnalyzeContext>(pool_, config_)),
25+
context_(std::make_unique<AnalyzeContext>(arena_, config_)),
2826
segmenters_(loadSegmenters()),
29-
arbitrator_(IKArbitrator(pool_)) {}
27+
arbitrator_(IKArbitrator(arena_)) {}
3028

3129
std::vector<std::unique_ptr<ISegmenter>> IKSegmenter::loadSegmenters() {
3230
std::vector<std::unique_ptr<ISegmenter>> segmenters;
@@ -62,6 +60,7 @@ bool IKSegmenter::next(Lexeme& lexeme) {
6260
arbitrator_.process(*context_, config_->isUseSmart());
6361
context_->outputToResult();
6462
context_->markBufferOffset();
63+
arena_.clear();
6564
}
6665
return true;
6766
}

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "LetterSegmenter.h"
3030
#include "SurrogatePairSegmenter.h"
3131
#include "olap/rowset/segment_v2/inverted_index/analyzer/ik/cfg/Configuration.h"
32+
#include "vec/common/arena.h"
3233
namespace doris::segment_v2 {
3334

3435
class IKSegmenter {
@@ -40,7 +41,7 @@ class IKSegmenter {
4041

4142
private:
4243
std::vector<std::unique_ptr<ISegmenter>> loadSegmenters();
43-
IKMemoryPool<Cell> pool_;
44+
vectorized::Arena arena_;
4445
lucene::util::Reader* input_;
4546
std::shared_ptr<Configuration> config_;
4647
std::unique_ptr<AnalyzeContext> context_;

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/LexemePath.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919

2020
namespace doris::segment_v2 {
2121

22-
LexemePath::LexemePath(IKMemoryPool<Cell>& pool)
23-
: QuickSortSet(pool), path_begin_(-1), path_end_(-1), payload_length_(0) {}
22+
LexemePath::LexemePath(vectorized::Arena& arena)
23+
: QuickSortSet(arena), path_begin_(-1), path_end_(-1), payload_length_(0) {}
2424

25-
LexemePath::LexemePath(LexemePath& other, IKMemoryPool<Cell>& pool)
26-
: QuickSortSet(pool),
25+
LexemePath::LexemePath(LexemePath& other, vectorized::Arena& arena)
26+
: QuickSortSet(arena),
2727
path_begin_(other.path_begin_),
2828
path_end_(other.path_end_),
2929
payload_length_(other.payload_length_) {
@@ -34,8 +34,8 @@ LexemePath::LexemePath(LexemePath& other, IKMemoryPool<Cell>& pool)
3434
}
3535
}
3636

37-
LexemePath::LexemePath(LexemePath&& other, IKMemoryPool<Cell>& pool) noexcept
38-
: QuickSortSet(pool),
37+
LexemePath::LexemePath(LexemePath&& other, vectorized::Arena& arena) noexcept
38+
: QuickSortSet(arena),
3939
path_begin_(other.path_begin_),
4040
path_end_(other.path_end_),
4141
payload_length_(other.payload_length_) {

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/LexemePath.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ namespace doris::segment_v2 {
3131

3232
class LexemePath : public QuickSortSet {
3333
public:
34-
LexemePath(IKMemoryPool<Cell>& pool);
35-
LexemePath(LexemePath& other, IKMemoryPool<Cell>& pool);
36-
LexemePath(LexemePath&& other, IKMemoryPool<Cell>& pool) noexcept;
34+
LexemePath(vectorized::Arena& arena);
35+
LexemePath(LexemePath& other, vectorized::Arena& arena);
36+
LexemePath(LexemePath&& other, vectorized::Arena& arena) noexcept;
3737
bool addCrossLexeme(Lexeme& lexeme);
3838
bool addNotCrossLexeme(Lexeme& lexeme);
3939
std::optional<Lexeme> removeTail();

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/QuickSortSet.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,13 @@ QuickSortSet::~QuickSortSet() {
2525

2626
void QuickSortSet::clear() {
2727
if (head_) {
28-
pool_.mergeFreeList(head_, tail_, cell_size_);
28+
Cell* current = head_;
29+
while (current) {
30+
Cell* next = current->next_;
31+
current->~Cell();
32+
current = next;
33+
}
34+
2935
head_ = nullptr;
3036
tail_ = nullptr;
3137
cell_size_ = 0;
@@ -129,14 +135,13 @@ size_t QuickSortSet::getPathEnd() const {
129135
}
130136

131137
Cell* QuickSortSet::allocateCell(Lexeme&& lexeme) {
132-
void* memory = pool_.allocate();
133-
return new (memory) Cell(std::move(lexeme));
138+
Cell* cell = arena_.alloc<Cell>();
139+
return new (cell) Cell(std::move(lexeme));
134140
}
135141

136142
void QuickSortSet::deallocateCell(Cell* cell) {
137143
if (cell) {
138144
cell->~Cell();
139-
pool_.deallocate(cell);
140145
}
141146
}
142147

be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/QuickSortSet.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
#include "CLucene/_ApiHeader.h"
2424
#include "CLucene/util/Misc.h"
2525
#include "Lexeme.h"
26-
#include "olap/rowset/segment_v2/inverted_index/analyzer/ik/util/IKMemoryPool.h"
27-
26+
#include "vec/common/arena.h"
2827
namespace doris::segment_v2 {
2928

3029
class Cell {
@@ -42,8 +41,6 @@ class Cell {
4241
Lexeme& getLexeme() { return lexeme_; }
4342

4443
private:
45-
// Do not change the position of the declarations of next_ and prev_, as this is related to the
46-
// mergeFreeList in IKMemoryPool.
4744
Cell* next_ = nullptr;
4845
Cell* prev_ = nullptr;
4946
Lexeme lexeme_;
@@ -61,9 +58,9 @@ class QuickSortSet {
6158
size_t cell_size_ = 0;
6259

6360
public:
64-
IKMemoryPool<Cell>& pool_;
61+
vectorized::Arena& arena_;
6562

66-
QuickSortSet(IKMemoryPool<Cell>& pool) : pool_(pool) {}
63+
QuickSortSet(vectorized::Arena& arena) : arena_(arena) {}
6764
virtual ~QuickSortSet();
6865

6966
QuickSortSet(const QuickSortSet&) = delete;

0 commit comments

Comments
 (0)