Skip to content

Commit c31a4b9

Browse files
authored
fix(search): Result set cutoff (#5906)
1 parent 6899751 commit c31a4b9

File tree

4 files changed

+43
-20
lines changed

4 files changed

+43
-20
lines changed

src/core/search/index_result.h

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ class IndexResult {
3939

4040
BorrowedView Borrowed() const;
4141

42-
// Move out of owned or copy borrowed
43-
DocVec Take();
42+
// Move out of owned or copy borrowed. Take up to `limit` entries and return original size.
43+
std::pair<DocVec, size_t /* full size */> Take(size_t limit = std::numeric_limits<size_t>::max());
4444

4545
private:
4646
bool IsOwned() const;
@@ -82,18 +82,36 @@ inline IndexResult::BorrowedView IndexResult::Borrowed() const {
8282
return std::visit(cb, value_);
8383
}
8484

85-
inline IndexResult::DocVec IndexResult::Take() {
85+
inline std::pair<IndexResult::DocVec, size_t> IndexResult::Take(size_t limit) {
8686
if (IsOwned()) {
87-
return std::move(std::get<DocVec>(value_));
87+
auto& vec = std::get<DocVec>(value_);
88+
size_t size = vec.size();
89+
return {std::move(vec), size};
8890
}
8991

90-
auto cb = [](auto* set) -> DocVec {
92+
// Numeric ranges need to be filtered and don't know their exact size ahead
93+
if (std::holds_alternative<RangeResult>(value_)) {
94+
auto cb = [limit](auto* range) -> std::pair<DocVec, size_t> {
95+
DocVec out;
96+
size_t total = 0;
97+
out.reserve(std::min(limit, range->size()));
98+
for (auto it = range->begin(); it != range->end(); ++it) {
99+
total++;
100+
if (out.size() < limit)
101+
out.push_back(*it);
102+
}
103+
return {std::move(out), total};
104+
};
105+
return std::visit(cb, Borrowed());
106+
}
107+
108+
// Generic borrowed results sets don't need to be filtered, so we can tell the result size ahead
109+
auto cb = [limit](auto* set) -> std::pair<DocVec, size_t> {
91110
DocVec out;
92-
out.reserve(set->size());
93-
for (auto it = set->begin(); it != set->end(); ++it) {
111+
out.reserve(std::min(limit, set->size()));
112+
for (auto it = set->begin(); it != set->end() && out.size() < limit; ++it)
94113
out.push_back(*it);
95-
}
96-
return out;
114+
return {std::move(out), set->size()};
97115
};
98116
return std::visit(cb, Borrowed());
99117
}

src/core/search/search.cc

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ struct BasicSearch {
289289

290290
// negate -(*subquery*): explicitly compute result complement. Needs further optimizations
291291
IndexResult Search(const AstNegateNode& node, string_view active_field) {
292-
vector<DocId> matched = SearchGeneric(*node.node, active_field).Take();
292+
auto matched = SearchGeneric(*node.node, active_field).Take().first;
293293
vector<DocId> all = indices_->GetAllDocs();
294294

295295
// To negate a result, we have to find the complement of matched to all documents,
@@ -358,7 +358,7 @@ struct BasicSearch {
358358
knn_distances_ = vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime);
359359
else
360360
knn_distances_ =
361-
vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime, sub_results.Take());
361+
vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime, sub_results.Take().first);
362362
}
363363

364364
// [KNN limit @field vec]: Compute distance from `vec` to all vectors keep closest `limit`
@@ -410,7 +410,6 @@ struct BasicSearch {
410410

411411
// Top level results don't need to be sorted, because they will be scored, sorted by fields or
412412
// used by knn
413-
414413
DCHECK(top_level || holds_alternative<AstKnnNode>(node.Variant()) ||
415414
holds_alternative<AstGeoNode>(node.Variant()) ||
416415
visit([](auto* set) { return is_sorted(set->begin(), set->end()); }, result.Borrowed()));
@@ -421,16 +420,15 @@ struct BasicSearch {
421420
return result;
422421
}
423422

424-
SearchResult Search(const AstNode& query) {
423+
SearchResult Search(const AstNode& query, size_t cuttoff_limit) {
425424
IndexResult result = SearchGeneric(query, "", true);
426425

427426
// Extract profile if enabled
428427
optional<AlgorithmProfile> profile =
429428
profile_builder_ ? make_optional(profile_builder_->Take()) : nullopt;
430429

431-
auto out = result.Take();
432-
const size_t total = out.size();
433-
return SearchResult{total, std::move(out), std::move(knn_scores_), std::move(profile),
430+
auto [out, total_size] = result.Take(cuttoff_limit);
431+
return SearchResult{total_size, std::move(out), std::move(knn_scores_), std::move(profile),
434432
std::move(error_)};
435433
}
436434

@@ -659,11 +657,11 @@ bool SearchAlgorithm::Init(string_view query, const QueryParams* params,
659657
return true;
660658
}
661659

662-
SearchResult SearchAlgorithm::Search(const FieldIndices* index) const {
660+
SearchResult SearchAlgorithm::Search(const FieldIndices* index, size_t cuttoff_limit) const {
663661
auto bs = BasicSearch{index};
664662
if (profiling_enabled_)
665663
bs.EnableProfiling();
666-
return bs.Search(*query_);
664+
return bs.Search(*query_, cuttoff_limit);
667665
}
668666

669667
optional<KnnScoreSortOption> SearchAlgorithm::GetKnnScoreSortOption() const {

src/core/search/search.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,9 @@ class SearchAlgorithm {
197197
bool Init(std::string_view query, const QueryParams* params,
198198
const OptionalFilters* filters = nullptr);
199199

200-
SearchResult Search(const FieldIndices* index) const;
200+
// Search on given index with predefined limit for cutting off result ids
201+
SearchResult Search(const FieldIndices* index,
202+
size_t cuttoff_limit = std::numeric_limits<size_t>::max()) const;
201203

202204
// if enabled, return limit & alias for knn query
203205
std::optional<KnnScoreSortOption> GetKnnScoreSortOption() const;

src/server/search/doc_index.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,12 @@ vector<search::SortableValue> ShardDocIndex::KeepTopKSorted(vector<DocId>* ids,
422422
SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& params,
423423
search::SearchAlgorithm* search_algo) const {
424424
size_t limit = params.limit_offset + params.limit_total;
425-
auto result = search_algo->Search(&*indices_);
425+
426+
// If we don't sort the documents, we don't need to copy more ids than are requested
427+
bool can_cut = !params.sort_option && !search_algo->GetKnnScoreSortOption();
428+
size_t id_cutoff_limit = can_cut ? limit : numeric_limits<size_t>::max();
429+
430+
auto result = search_algo->Search(&*indices_, id_cutoff_limit);
426431
if (!result.error.empty())
427432
return {facade::ErrorReply(std::move(result.error))};
428433

0 commit comments

Comments
 (0)