@@ -27,7 +27,7 @@ class RangeFilterIterator {
27
27
28
28
RangeFilterIterator (BaseIterator begin, BaseIterator end, double l, double r)
29
29
: l_(l), r_(r), current_(begin), end_(end) {
30
- SkipUnvalidEntries (std::numeric_limits<DocId>::max ());
30
+ SkipInvalidEntries (std::numeric_limits<DocId>::max ());
31
31
}
32
32
33
33
value_type operator *() const {
@@ -37,7 +37,7 @@ class RangeFilterIterator {
37
37
RangeFilterIterator& operator ++() {
38
38
const DocId last_id = (*current_).first ;
39
39
++current_;
40
- SkipUnvalidEntries (last_id);
40
+ SkipInvalidEntries (last_id);
41
41
return *this ;
42
42
}
43
43
@@ -49,8 +49,12 @@ class RangeFilterIterator {
49
49
return current_ != other.current_ ;
50
50
}
51
51
52
+ bool HasReachedEnd () const {
53
+ return current_ == end_;
54
+ }
55
+
52
56
private:
53
- void SkipUnvalidEntries (DocId last_id) {
57
+ void SkipInvalidEntries (DocId last_id) {
54
58
// Faster than using std::find_if
55
59
while (current_ != end_ && (!InRange (current_) || (*current_).first == last_id)) {
56
60
++current_;
@@ -91,28 +95,24 @@ std::vector<DocId> MergeTwoBlocks(const RangeTree::RangeBlock& left_block,
91
95
}
92
96
93
97
template <typename MapT> auto FindRangeBlockImpl (MapT& entries, double value) {
94
- using RangeNumber = double ;
95
98
DCHECK (!entries.empty ());
96
99
97
- auto it = entries.lower_bound ({ value, -std::numeric_limits<RangeNumber>:: infinity ()} );
98
- if (it != entries.begin () && (it == entries.end () || it->first . first > value)) {
100
+ auto it = entries.lower_bound (value);
101
+ if (it != entries.begin () && (it == entries.end () || it->first > value)) {
99
102
// TODO: remove this, we do log N here
100
103
// we can use negative left bouding to find the block
101
104
--it; // Move to the block that contains the value
102
105
}
103
106
104
- DCHECK (it != entries.end () &&
105
- (it->first .first <= value &&
106
- (value == std::numeric_limits<RangeNumber>::infinity () || value < it->first .second )));
107
+ DCHECK (it != entries.end () && it->first <= value);
107
108
return it;
108
109
}
109
110
} // namespace
110
111
111
112
RangeTree::RangeTree (PMR_NS::memory_resource* mr, size_t max_range_block_size)
112
113
: max_range_block_size_(max_range_block_size), entries_(mr) {
113
114
// TODO: at the beggining create more blocks
114
- entries_.insert ({{-std::numeric_limits<RangeNumber>::infinity (),
115
- std::numeric_limits<RangeNumber>::infinity ()},
115
+ entries_.insert ({{-std::numeric_limits<RangeNumber>::infinity ()},
116
116
RangeBlock{entries_.get_allocator ().resource (), max_range_block_size_}});
117
117
}
118
118
@@ -123,9 +123,7 @@ void RangeTree::Add(DocId id, double value) {
123
123
RangeBlock& block = it->second ;
124
124
125
125
auto insert_result = block.Insert ({id, value});
126
- LOG_IF (ERROR, !insert_result) << " RangeTree: Failed to insert id: " << id << " , value: " << value
127
- << " into block with range [" << it->first .first << " , "
128
- << it->first .second << " )" ;
126
+ LOG_IF (ERROR, !insert_result) << " RangeTree: Failed to insert id: " << id << " , value: " << value;
129
127
130
128
if (block.Size () <= max_range_block_size_) {
131
129
return ;
@@ -141,9 +139,7 @@ void RangeTree::Remove(DocId id, double value) {
141
139
RangeBlock& block = it->second ;
142
140
143
141
auto remove_result = block.Remove ({id, value});
144
- LOG_IF (ERROR, !remove_result) << " RangeTree: Failed to remove id: " << id << " , value: " << value
145
- << " from block with range [" << it->first .first << " , "
146
- << it->first .second << " )" ;
142
+ LOG_IF (ERROR, !remove_result) << " RangeTree: Failed to remove id: " << id << " , value: " << value;
147
143
148
144
// TODO: maybe merging blocks if they are too small
149
145
// The problem that for each mutable operation we do Remove and then Add,
@@ -210,10 +206,7 @@ TODO: we can optimize this case by splitting to three blocks:
210
206
- empty right block with range [std::nextafter(m, +inf), r)
211
207
*/
212
208
void RangeTree::SplitBlock (Map::iterator it) {
213
- const RangeNumber l = it->first .first ;
214
- const RangeNumber r = it->first .second ;
215
-
216
- DCHECK (l < r);
209
+ const RangeNumber l = it->first ;
217
210
218
211
auto split_result = Split (std::move (it->second ));
219
212
@@ -225,11 +218,11 @@ void RangeTree::SplitBlock(Map::iterator it) {
225
218
if (l != m) {
226
219
// If l == m, it means that all values in the block were equal to the median value
227
220
// We can not insert an empty block with range [l, l) because it is not valid.
228
- entries_.emplace (std::piecewise_construct, std::forward_as_tuple (l, m ),
221
+ entries_.emplace (std::piecewise_construct, std::forward_as_tuple (l),
229
222
std::forward_as_tuple (std::move (split_result.left )));
230
223
}
231
224
232
- entries_.emplace (std::piecewise_construct, std::forward_as_tuple (m, r ),
225
+ entries_.emplace (std::piecewise_construct, std::forward_as_tuple (m),
233
226
std::forward_as_tuple (std::move (split_result.right )));
234
227
235
228
DCHECK (TreeIsInCorrectState ());
@@ -242,20 +235,12 @@ void RangeTree::SplitBlock(Map::iterator it) {
242
235
}
243
236
244
237
Key prev_range = entries_.begin ()->first ;
245
- if (prev_range.first >= prev_range.second ) {
246
- return false ; // Invalid range
247
- }
248
-
249
238
for (auto it = std::next (entries_.begin ()); it != entries_.end (); ++it) {
250
239
const Key& current_range = it->first ;
251
240
252
- if (current_range.first >= current_range.second ) {
253
- return false ; // Invalid range
254
- }
255
-
256
241
// Check that ranges are non-overlapping and sorted
257
242
// Also there can not be gaps between ranges
258
- if (prev_range. second ! = current_range. first ) {
243
+ if (prev_range > = current_range) {
259
244
return false ;
260
245
}
261
246
@@ -286,15 +271,14 @@ std::vector<DocId> RangeResult::MergeAllResults() const {
286
271
287
272
// After the benchmarking, it is better to use inlined vector
288
273
// than std::priority_queue
289
- absl::InlinedVector<std::pair< RangeFilterIterator, RangeFilterIterator> , 10 > heap;
274
+ absl::InlinedVector<RangeFilterIterator, 10 > heap;
290
275
heap.reserve (blocks_.size ());
291
276
292
277
size_t doc_ids_count = 0 ;
293
278
for (const auto * block : blocks_) {
294
279
auto it = MakeBegin (*block, l_, r_);
295
- auto end_it = MakeEnd (*block, l_, r_);
296
- if (it != end_it) {
297
- heap.emplace_back (it, end_it);
280
+ if (!it.HasReachedEnd ()) {
281
+ heap.emplace_back (it);
298
282
doc_ids_count += block->Size ();
299
283
}
300
284
}
@@ -304,24 +288,22 @@ std::vector<DocId> RangeResult::MergeAllResults() const {
304
288
305
289
size_t size = heap.size ();
306
290
while (size) {
307
- DCHECK (heap[ 0 ]. first != heap[0 ].second );
291
+ DCHECK (! heap[0 ].HasReachedEnd () );
308
292
309
293
size_t min_doc_id_index = 0 ;
310
294
for (size_t i = 1 ; i < size; ++i) {
311
- DCHECK (heap[i]. first != heap[i].second );
295
+ DCHECK (! heap[i].HasReachedEnd () );
312
296
313
- if (*heap[i]. first < *heap[min_doc_id_index]. first ) {
297
+ if (*heap[i] < *heap[min_doc_id_index]) {
314
298
min_doc_id_index = i;
315
299
}
316
300
}
317
301
318
- auto & it = heap[min_doc_id_index].first ;
319
- auto & end_it = heap[min_doc_id_index].second ;
320
-
302
+ auto & it = heap[min_doc_id_index];
321
303
result.push_back (*it);
322
304
++it;
323
305
324
- if (it == end_it ) {
306
+ if (it. HasReachedEnd () ) {
325
307
// If we reached the end of the current block, remove it from the heap
326
308
std::swap (heap[min_doc_id_index], heap[size - 1 ]);
327
309
--size;
0 commit comments