Skip to content

Commit b08aabd

Browse files
chore(range_tree): Simplify RangeTree code (#5418)
* chore(range_tree): Simplify range block keys by using single double value Signed-off-by: Stepan Bagritsevich <[email protected]> * chore(search_family): Simplify MergeAllResults method Signed-off-by: Stepan Bagritsevich <[email protected]> * chore(range_tree): Rename method in RangeFilterIterator Signed-off-by: Stepan Bagritsevich <[email protected]> --------- Signed-off-by: Stepan Bagritsevich <[email protected]>
1 parent 8a61acc commit b08aabd

File tree

2 files changed

+26
-44
lines changed

2 files changed

+26
-44
lines changed

src/core/search/range_tree.cc

Lines changed: 25 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class RangeFilterIterator {
2727

2828
RangeFilterIterator(BaseIterator begin, BaseIterator end, double l, double r)
2929
: l_(l), r_(r), current_(begin), end_(end) {
30-
SkipUnvalidEntries(std::numeric_limits<DocId>::max());
30+
SkipInvalidEntries(std::numeric_limits<DocId>::max());
3131
}
3232

3333
value_type operator*() const {
@@ -37,7 +37,7 @@ class RangeFilterIterator {
3737
RangeFilterIterator& operator++() {
3838
const DocId last_id = (*current_).first;
3939
++current_;
40-
SkipUnvalidEntries(last_id);
40+
SkipInvalidEntries(last_id);
4141
return *this;
4242
}
4343

@@ -49,8 +49,12 @@ class RangeFilterIterator {
4949
return current_ != other.current_;
5050
}
5151

52+
bool HasReachedEnd() const {
53+
return current_ == end_;
54+
}
55+
5256
private:
53-
void SkipUnvalidEntries(DocId last_id) {
57+
void SkipInvalidEntries(DocId last_id) {
5458
// Faster than using std::find_if
5559
while (current_ != end_ && (!InRange(current_) || (*current_).first == last_id)) {
5660
++current_;
@@ -91,28 +95,24 @@ std::vector<DocId> MergeTwoBlocks(const RangeTree::RangeBlock& left_block,
9195
}
9296

9397
template <typename MapT> auto FindRangeBlockImpl(MapT& entries, double value) {
94-
using RangeNumber = double;
9598
DCHECK(!entries.empty());
9699

97-
auto it = entries.lower_bound({value, -std::numeric_limits<RangeNumber>::infinity()});
98-
if (it != entries.begin() && (it == entries.end() || it->first.first > value)) {
100+
auto it = entries.lower_bound(value);
101+
if (it != entries.begin() && (it == entries.end() || it->first > value)) {
99102
// TODO: remove this, we do log N here
100103
// we can use negative left bouding to find the block
101104
--it; // Move to the block that contains the value
102105
}
103106

104-
DCHECK(it != entries.end() &&
105-
(it->first.first <= value &&
106-
(value == std::numeric_limits<RangeNumber>::infinity() || value < it->first.second)));
107+
DCHECK(it != entries.end() && it->first <= value);
107108
return it;
108109
}
109110
} // namespace
110111

111112
RangeTree::RangeTree(PMR_NS::memory_resource* mr, size_t max_range_block_size)
112113
: max_range_block_size_(max_range_block_size), entries_(mr) {
113114
// TODO: at the beggining create more blocks
114-
entries_.insert({{-std::numeric_limits<RangeNumber>::infinity(),
115-
std::numeric_limits<RangeNumber>::infinity()},
115+
entries_.insert({{-std::numeric_limits<RangeNumber>::infinity()},
116116
RangeBlock{entries_.get_allocator().resource(), max_range_block_size_}});
117117
}
118118

@@ -123,9 +123,7 @@ void RangeTree::Add(DocId id, double value) {
123123
RangeBlock& block = it->second;
124124

125125
auto insert_result = block.Insert({id, value});
126-
LOG_IF(ERROR, !insert_result) << "RangeTree: Failed to insert id: " << id << ", value: " << value
127-
<< " into block with range [" << it->first.first << ", "
128-
<< it->first.second << ")";
126+
LOG_IF(ERROR, !insert_result) << "RangeTree: Failed to insert id: " << id << ", value: " << value;
129127

130128
if (block.Size() <= max_range_block_size_) {
131129
return;
@@ -141,9 +139,7 @@ void RangeTree::Remove(DocId id, double value) {
141139
RangeBlock& block = it->second;
142140

143141
auto remove_result = block.Remove({id, value});
144-
LOG_IF(ERROR, !remove_result) << "RangeTree: Failed to remove id: " << id << ", value: " << value
145-
<< " from block with range [" << it->first.first << ", "
146-
<< it->first.second << ")";
142+
LOG_IF(ERROR, !remove_result) << "RangeTree: Failed to remove id: " << id << ", value: " << value;
147143

148144
// TODO: maybe merging blocks if they are too small
149145
// The problem that for each mutable operation we do Remove and then Add,
@@ -210,10 +206,7 @@ TODO: we can optimize this case by splitting to three blocks:
210206
- empty right block with range [std::nextafter(m, +inf), r)
211207
*/
212208
void RangeTree::SplitBlock(Map::iterator it) {
213-
const RangeNumber l = it->first.first;
214-
const RangeNumber r = it->first.second;
215-
216-
DCHECK(l < r);
209+
const RangeNumber l = it->first;
217210

218211
auto split_result = Split(std::move(it->second));
219212

@@ -225,11 +218,11 @@ void RangeTree::SplitBlock(Map::iterator it) {
225218
if (l != m) {
226219
// If l == m, it means that all values in the block were equal to the median value
227220
// We can not insert an empty block with range [l, l) because it is not valid.
228-
entries_.emplace(std::piecewise_construct, std::forward_as_tuple(l, m),
221+
entries_.emplace(std::piecewise_construct, std::forward_as_tuple(l),
229222
std::forward_as_tuple(std::move(split_result.left)));
230223
}
231224

232-
entries_.emplace(std::piecewise_construct, std::forward_as_tuple(m, r),
225+
entries_.emplace(std::piecewise_construct, std::forward_as_tuple(m),
233226
std::forward_as_tuple(std::move(split_result.right)));
234227

235228
DCHECK(TreeIsInCorrectState());
@@ -242,20 +235,12 @@ void RangeTree::SplitBlock(Map::iterator it) {
242235
}
243236

244237
Key prev_range = entries_.begin()->first;
245-
if (prev_range.first >= prev_range.second) {
246-
return false; // Invalid range
247-
}
248-
249238
for (auto it = std::next(entries_.begin()); it != entries_.end(); ++it) {
250239
const Key& current_range = it->first;
251240

252-
if (current_range.first >= current_range.second) {
253-
return false; // Invalid range
254-
}
255-
256241
// Check that ranges are non-overlapping and sorted
257242
// Also there can not be gaps between ranges
258-
if (prev_range.second != current_range.first) {
243+
if (prev_range >= current_range) {
259244
return false;
260245
}
261246

@@ -286,15 +271,14 @@ std::vector<DocId> RangeResult::MergeAllResults() const {
286271

287272
// After the benchmarking, it is better to use inlined vector
288273
// than std::priority_queue
289-
absl::InlinedVector<std::pair<RangeFilterIterator, RangeFilterIterator>, 10> heap;
274+
absl::InlinedVector<RangeFilterIterator, 10> heap;
290275
heap.reserve(blocks_.size());
291276

292277
size_t doc_ids_count = 0;
293278
for (const auto* block : blocks_) {
294279
auto it = MakeBegin(*block, l_, r_);
295-
auto end_it = MakeEnd(*block, l_, r_);
296-
if (it != end_it) {
297-
heap.emplace_back(it, end_it);
280+
if (!it.HasReachedEnd()) {
281+
heap.emplace_back(it);
298282
doc_ids_count += block->Size();
299283
}
300284
}
@@ -304,24 +288,22 @@ std::vector<DocId> RangeResult::MergeAllResults() const {
304288

305289
size_t size = heap.size();
306290
while (size) {
307-
DCHECK(heap[0].first != heap[0].second);
291+
DCHECK(!heap[0].HasReachedEnd());
308292

309293
size_t min_doc_id_index = 0;
310294
for (size_t i = 1; i < size; ++i) {
311-
DCHECK(heap[i].first != heap[i].second);
295+
DCHECK(!heap[i].HasReachedEnd());
312296

313-
if (*heap[i].first < *heap[min_doc_id_index].first) {
297+
if (*heap[i] < *heap[min_doc_id_index]) {
314298
min_doc_id_index = i;
315299
}
316300
}
317301

318-
auto& it = heap[min_doc_id_index].first;
319-
auto& end_it = heap[min_doc_id_index].second;
320-
302+
auto& it = heap[min_doc_id_index];
321303
result.push_back(*it);
322304
++it;
323305

324-
if (it == end_it) {
306+
if (it.HasReachedEnd()) {
325307
// If we reached the end of the current block, remove it from the heap
326308
std::swap(heap[min_doc_id_index], heap[size - 1]);
327309
--size;

src/core/search/range_tree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class RangeTree {
3434
friend class RangeResult;
3535

3636
using RangeNumber = double;
37-
using Key = std::pair<RangeNumber, RangeNumber>;
37+
using Key = RangeNumber;
3838
using Entry = std::pair<DocId, double>;
3939
using RangeBlock = BlockList<SortedVector<Entry>>;
4040
using Map = absl::btree_map<Key, RangeBlock, std::less<Key>,

0 commit comments

Comments
 (0)