Skip to content

Commit 5f65a65

Browse files
authored
chore(search): LIMIT=0 Fast path and remove LOAD parameter from FT.SEARCH (#5419)
* chore(search): Small DocIndex refactor and limit optimization --------- Signed-off-by: Vladislav Oleshko <[email protected]>
1 parent dbf1ebd commit 5f65a65

File tree

5 files changed

+83
-328
lines changed

5 files changed

+83
-328
lines changed

src/server/search/doc_index.cc

Lines changed: 55 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -320,9 +320,22 @@ bool ShardDocIndex::Matches(string_view key, unsigned obj_code) const {
320320
return base_->Matches(key, obj_code);
321321
}
322322

323-
std::vector<search::SortableValue> ShardDocIndex::KeepTopKSorted(
324-
std::vector<DocId>* ids, const SearchParams::SortOption& sort, size_t limit,
325-
const OpArgs& op_args) const {
323+
optional<ShardDocIndex::LoadedEntry> ShardDocIndex::LoadEntry(DocId id,
324+
const OpArgs& op_args) const {
325+
auto& db_slice = op_args.GetDbSlice();
326+
string_view key = key_index_.Get(id);
327+
auto it = db_slice.FindReadOnly(op_args.db_cntx, key, base_->GetObjCode());
328+
if (!it || !IsValid(*it))
329+
return std::nullopt;
330+
331+
return {{key, GetAccessor(op_args.db_cntx, (*it)->second)}};
332+
}
333+
334+
vector<search::SortableValue> ShardDocIndex::KeepTopKSorted(vector<DocId>* ids, size_t limit,
335+
const SearchParams::SortOption& sort,
336+
const OpArgs& op_args) const {
337+
DCHECK_GT(limit, 0u) << "Limit=0 still has O(ids->size()) complexity";
338+
326339
auto comp = [order = sort.order](const auto& lhs, const auto& rhs) {
327340
return order == SortOrder::ASC ? lhs < rhs : lhs > rhs;
328341
};
@@ -331,22 +344,20 @@ std::vector<search::SortableValue> ShardDocIndex::KeepTopKSorted(
331344
std::priority_queue<QPair, std::vector<QPair>, decltype(comp)> q(comp);
332345

333346
// Iterate over all documents, extract sortable field and update the queue
334-
auto& db_slice = op_args.GetDbSlice();
335347
for (DocId id : *ids) {
336-
auto it = db_slice.FindReadOnly(op_args.db_cntx, key_index_.Get(id), base_->GetObjCode());
337-
if (!it || !IsValid(*it))
348+
auto entry = LoadEntry(id, op_args);
349+
if (!entry)
338350
continue;
339351

340-
auto val = GetAccessor(op_args.db_cntx, (*it)->second)->Serialize(base_->schema, {sort.field});
341-
if (val.empty())
352+
auto result = entry->second->Serialize(base_->schema, {sort.field});
353+
if (result.empty())
342354
continue;
343-
auto& first_val = val.begin()->second;
344355

345356
// Check if the extracted value is better than the worst (q.top())
346-
if (q.size() < limit || comp(first_val, q.top().first)) {
357+
if (q.size() < limit || comp(result.begin()->second, q.top().first)) {
347358
if (q.size() >= limit)
348359
q.pop();
349-
q.emplace(std::move(first_val), id);
360+
q.emplace(std::move(result.begin()->second), id);
350361
}
351362
}
352363

@@ -368,10 +379,8 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
368379
if (!result.error.empty())
369380
return {facade::ErrorReply(std::move(result.error))};
370381

371-
// TODO(vlad): LOAD does NOT exist as a FT.SEARCH option, logic is blurry
372-
SearchFieldsList fields_to_load = params.ShouldReturnAllFields()
373-
? params.load_fields.value_or(SearchFieldsList{})
374-
: params.return_fields.value_or(SearchFieldsList{});
382+
if (limit == 0)
383+
return {result.total, {}, std::move(result.profile)};
375384

376385
// Tune sort for KNN: Skip if it's on the knn field, otherwise extend the limit if needed
377386
bool skip_sort = false;
@@ -381,7 +390,10 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
381390
limit = max(limit, ko->limit);
382391
}
383392

393+
auto return_fields = params.return_fields.value_or(SearchFieldsList{});
394+
384395
// Apply SORTBY
396+
// TODO(vlad): Write profiling up to here
385397
vector<search::SortableValue> sort_scores;
386398
if (params.sort_option && !skip_sort) {
387399
const auto& so = *params.sort_option;
@@ -390,8 +402,9 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
390402
auto* idx = indices_->GetSortIndex(fident);
391403
sort_scores = idx->Sort(&result.ids, limit, so.order == SortOrder::DESC);
392404
} else {
393-
sort_scores = KeepTopKSorted(&result.ids, so, limit, op_args);
394-
fields_to_load.emplace_back(so.field);
405+
sort_scores = KeepTopKSorted(&result.ids, limit, so, op_args);
406+
if (params.ShouldReturnAllFields())
407+
return_fields.push_back(so.field);
395408
}
396409

397410
// If we sorted with knn_scores present, rearrange them
@@ -408,29 +421,35 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
408421
// Serialize documents
409422
vector<SerializedSearchDoc> out;
410423
out.reserve(min(limit, result.ids.size()));
411-
auto& db_slice = op_args.GetDbSlice();
424+
412425
size_t expired_count = 0;
413426
for (size_t i = 0; i < result.ids.size(); i++) {
414-
DocId id = result.ids[i];
415-
auto key = key_index_.Get(id);
416-
auto it = db_slice.FindReadOnly(op_args.db_cntx, key, base_->GetObjCode());
427+
float knn_score = result.knn_scores.empty() ? 0 : result.knn_scores[i].second;
428+
auto sort_score = sort_scores.empty() ? std::monostate{} : std::move(sort_scores[i]);
417429

418-
if (!it || !IsValid(*it)) {
430+
// Don't load entry if we need only its key. Ignore expiration.
431+
if (params.IdsOnly()) {
432+
string_view key = key_index_.Get(result.ids[i]);
433+
out.push_back({string{key}, {}, knn_score, sort_score});
434+
continue;
435+
}
436+
437+
auto entry = LoadEntry(result.ids[i], op_args);
438+
if (!entry) {
419439
expired_count++;
420440
continue;
421441
}
422442

423-
// Load all required fields from document
424-
auto accessor = GetAccessor(op_args.db_cntx, (*it)->second);
425-
auto fields = params.ShouldReturnAllFields() ? accessor->SerializeDocument(base_->schema)
426-
: SearchDocData{};
427-
auto loaded = accessor->Serialize(base_->schema, fields_to_load);
428-
fields.insert(std::make_move_iterator(loaded.begin()), std::make_move_iterator(loaded.end()));
429-
430-
SerializedSearchDoc doc{string{key}, std::move(fields),
431-
result.knn_scores.empty() ? 0 : result.knn_scores[i].second,
432-
sort_scores.empty() ? std::monostate{} : std::move(sort_scores[i])};
433-
out.push_back(std::move(doc));
443+
auto& [key, accessor] = *entry;
444+
445+
// Load all specified fields from document
446+
SearchDocData fields{};
447+
if (params.ShouldReturnAllFields())
448+
fields = accessor->SerializeDocument(base_->schema);
449+
450+
auto more_fields = accessor->Serialize(base_->schema, return_fields);
451+
fields.insert(make_move_iterator(more_fields.begin()), make_move_iterator(more_fields.end()));
452+
out.push_back({string{key}, std::move(fields), knn_score, sort_score});
434453
}
435454

436455
return {result.total - expired_count, std::move(out), std::move(result.profile)};
@@ -439,7 +458,6 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
439458
vector<SearchDocData> ShardDocIndex::SearchForAggregator(
440459
const OpArgs& op_args, const AggregateParams& params,
441460
search::SearchAlgorithm* search_algo) const {
442-
auto& db_slice = op_args.GetDbSlice();
443461
auto search_results = search_algo->Search(&*indices_);
444462

445463
if (!search_results.error.empty())
@@ -450,13 +468,10 @@ vector<SearchDocData> ShardDocIndex::SearchForAggregator(
450468

451469
vector<absl::flat_hash_map<string, search::SortableValue>> out;
452470
for (DocId doc : search_results.ids) {
453-
auto key = key_index_.Get(doc);
454-
auto it = db_slice.FindReadOnly(op_args.db_cntx, key, base_->GetObjCode());
455-
456-
if (!it || !IsValid(*it)) // Item must have expired
471+
auto entry = LoadEntry(doc, op_args);
472+
if (!entry)
457473
continue;
458-
459-
auto accessor = GetAccessor(op_args.db_cntx, (*it)->second);
474+
auto& [_, accessor] = *entry;
460475

461476
SearchDocData extracted_sort_indicies;
462477
extracted_sort_indicies.reserve(sort_indicies.size());

src/server/search/doc_index.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
namespace dfly {
2727

28+
struct BaseAccessor;
29+
2830
using SearchDocData = absl::flat_hash_map<std::string /*field*/, search::SortableValue /*value*/>;
2931
using Synonyms = search::Synonyms;
3032

@@ -166,7 +168,6 @@ struct SearchParams {
166168
Only one of load_fields and return_fields should be set.
167169
*/
168170
std::optional<SearchFieldsList> load_fields;
169-
bool no_content = false;
170171

171172
std::optional<SortOption> sort_option;
172173
search::QueryParams query_params;
@@ -176,7 +177,7 @@ struct SearchParams {
176177
}
177178

178179
bool IdsOnly() const {
179-
return no_content || (return_fields && return_fields->empty());
180+
return return_fields && return_fields->empty();
180181
}
181182

182183
bool ShouldReturnField(std::string_view alias) const;
@@ -276,10 +277,13 @@ class ShardDocIndex {
276277
// Clears internal data. Traverses all matching documents and assigns ids.
277278
void Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr);
278279

280+
using LoadedEntry = std::pair<std::string_view, std::unique_ptr<BaseAccessor>>;
281+
std::optional<LoadedEntry> LoadEntry(search::DocId id, const OpArgs& op_args) const;
282+
279283
// Behaviour identical to SortIndex::Sort for non-sortable fields that need to be fetched first
280-
std::vector<search::SortableValue> KeepTopKSorted(std::vector<DocId>* ids,
284+
std::vector<search::SortableValue> KeepTopKSorted(std::vector<DocId>* ids, size_t limit,
281285
const SearchParams::SortOption& sort,
282-
size_t limit, const OpArgs& op_args) const;
286+
const OpArgs& op_args) const;
283287

284288
private:
285289
std::shared_ptr<const DocIndex> base_;

src/server/search/search_family.cc

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -332,28 +332,25 @@ std::optional<std::string_view> ParseFieldWithAtSign(CmdArgParser* parser) {
332332
return field;
333333
}
334334

335-
void ParseLoadFields(CmdArgParser* parser, std::optional<SearchFieldsList>* load_fields) {
335+
SearchFieldsList ParseLoadOrReturnFields(CmdArgParser* parser, bool is_load) {
336336
// TODO: Change to num_strings. In Redis strings number is expected. For example: LOAD 3 $.a AS a
337+
SearchFieldsList fields;
337338
size_t num_fields = parser->Next<size_t>();
338-
if (!load_fields->has_value()) {
339-
load_fields->emplace();
340-
}
341339

342340
while (parser->HasNext() && num_fields--) {
343341
string_view str = parser->Next();
344342

345-
if (absl::StartsWith(str, "@"sv)) {
343+
if (is_load && absl::StartsWith(str, "@"sv)) {
346344
str.remove_prefix(1); // remove leading @
347345
}
348346

349347
StringOrView name = StringOrView::FromString(std::string{str});
350-
if (parser->Check("AS")) {
351-
load_fields->value().emplace_back(name, true,
352-
StringOrView::FromString(parser->Next<std::string>()));
353-
} else {
354-
load_fields->value().emplace_back(name, true);
355-
}
348+
if (parser->Check("AS"))
349+
fields.emplace_back(name, true, StringOrView::FromString(parser->Next<std::string>()));
350+
else
351+
fields.emplace_back(name, true);
356352
}
353+
return fields;
357354
}
358355

359356
search::QueryParams ParseQueryParams(CmdArgParser* parser) {
@@ -379,29 +376,15 @@ ParseResult<SearchParams> ParseSearchParams(CmdArgParser* parser) {
379376
return CreateSyntaxError("LOAD cannot be applied after RETURN"sv);
380377
}
381378

382-
ParseLoadFields(parser, &params.load_fields);
379+
params.load_fields = ParseLoadOrReturnFields(parser, true);
383380
} else if (parser->Check("RETURN")) {
384381
if (params.load_fields) {
385382
return CreateSyntaxError("RETURN cannot be applied after LOAD"sv);
386383
}
387-
388-
// RETURN {num} [{ident} AS {name}...]
389-
/* TODO: Change to num_strings. In Redis strings number is expected. For example: RETURN 3 $.a
390-
* AS a */
391-
size_t num_fields = parser->Next<size_t>();
392-
params.return_fields.emplace();
393-
while (parser->HasNext() && params.return_fields->size() < num_fields) {
394-
StringOrView name = StringOrView::FromString(parser->Next<std::string>());
395-
396-
if (parser->Check("AS")) {
397-
params.return_fields->emplace_back(std::move(name), true,
398-
StringOrView::FromString(parser->Next<std::string>()));
399-
} else {
400-
params.return_fields->emplace_back(std::move(name), true);
401-
}
402-
}
384+
if (!params.return_fields) // after NOCONTENT it's silently ignored
385+
params.return_fields = ParseLoadOrReturnFields(parser, false);
403386
} else if (parser->Check("NOCONTENT")) { // NOCONTENT
404-
params.no_content = true;
387+
params.return_fields.emplace();
405388
} else if (parser->Check("PARAMS")) { // [PARAMS num(ignored) name(ignored) knn_vector]
406389
params.query_params = ParseQueryParams(parser);
407390
} else if (parser->Check("SORTBY")) {
@@ -464,7 +447,12 @@ ParseResult<AggregateParams> ParseAggregatorParams(CmdArgParser* parser) {
464447
// Parse LOAD count field [field ...]
465448
// LOAD options are at the beginning of the query, so we need to parse them first
466449
while (parser->HasNext() && parser->Check("LOAD")) {
467-
ParseLoadFields(parser, &params.load_fields);
450+
auto fields = ParseLoadOrReturnFields(parser, true);
451+
if (!params.load_fields.has_value())
452+
params.load_fields = std::move(fields);
453+
else
454+
params.load_fields->insert(params.load_fields->end(), make_move_iterator(fields.begin()),
455+
make_move_iterator(fields.end()));
468456
}
469457

470458
while (parser->HasNext()) {
@@ -1235,7 +1223,7 @@ void SearchFamily::Register(CommandRegistry* registry) {
12351223

12361224
// Disable journaling, because no-key-transactional enables it by default
12371225
const uint32_t kReadOnlyMask =
1238-
CO::NO_KEY_TRANSACTIONAL | CO::NO_KEY_TX_SPAN_ALL | CO::NO_AUTOJOURNAL;
1226+
CO::NO_KEY_TRANSACTIONAL | CO::NO_KEY_TX_SPAN_ALL | CO::NO_AUTOJOURNAL | CO::IDEMPOTENT;
12391227

12401228
registry->StartFamily();
12411229
*registry << CI{"FT.CREATE", CO::WRITE | CO::GLOBAL_TRANS, -2, 0, 0, acl::FT_SEARCH}.HFUNC(

0 commit comments

Comments
 (0)