@@ -320,9 +320,22 @@ bool ShardDocIndex::Matches(string_view key, unsigned obj_code) const {
320
320
return base_->Matches (key, obj_code);
321
321
}
322
322
323
- std::vector<search::SortableValue> ShardDocIndex::KeepTopKSorted (
324
- std::vector<DocId>* ids, const SearchParams::SortOption& sort, size_t limit,
325
- const OpArgs& op_args) const {
323
+ optional<ShardDocIndex::LoadedEntry> ShardDocIndex::LoadEntry (DocId id,
324
+ const OpArgs& op_args) const {
325
+ auto & db_slice = op_args.GetDbSlice ();
326
+ string_view key = key_index_.Get (id);
327
+ auto it = db_slice.FindReadOnly (op_args.db_cntx , key, base_->GetObjCode ());
328
+ if (!it || !IsValid (*it))
329
+ return std::nullopt ;
330
+
331
+ return {{key, GetAccessor (op_args.db_cntx , (*it)->second )}};
332
+ }
333
+
334
+ vector<search::SortableValue> ShardDocIndex::KeepTopKSorted (vector<DocId>* ids, size_t limit,
335
+ const SearchParams::SortOption& sort,
336
+ const OpArgs& op_args) const {
337
+ DCHECK_GT (limit, 0u ) << " Limit=0 still has O(ids->size()) complexity" ;
338
+
326
339
auto comp = [order = sort.order ](const auto & lhs, const auto & rhs) {
327
340
return order == SortOrder::ASC ? lhs < rhs : lhs > rhs;
328
341
};
@@ -331,22 +344,20 @@ std::vector<search::SortableValue> ShardDocIndex::KeepTopKSorted(
331
344
std::priority_queue<QPair, std::vector<QPair>, decltype (comp)> q (comp);
332
345
333
346
// Iterate over all documents, extract sortable field and update the queue
334
- auto & db_slice = op_args.GetDbSlice ();
335
347
for (DocId id : *ids) {
336
- auto it = db_slice. FindReadOnly (op_args. db_cntx , key_index_. Get (id), base_-> GetObjCode () );
337
- if (!it || ! IsValid (*it) )
348
+ auto entry = LoadEntry (id, op_args );
349
+ if (!entry )
338
350
continue ;
339
351
340
- auto val = GetAccessor (op_args. db_cntx , (*it) ->second ) ->Serialize (base_->schema , {sort.field });
341
- if (val .empty ())
352
+ auto result = entry ->second ->Serialize (base_->schema , {sort.field });
353
+ if (result .empty ())
342
354
continue ;
343
- auto & first_val = val.begin ()->second ;
344
355
345
356
// Check if the extracted value is better than the worst (q.top())
346
- if (q.size () < limit || comp (first_val , q.top ().first )) {
357
+ if (q.size () < limit || comp (result. begin ()-> second , q.top ().first )) {
347
358
if (q.size () >= limit)
348
359
q.pop ();
349
- q.emplace (std::move (first_val ), id);
360
+ q.emplace (std::move (result. begin ()-> second ), id);
350
361
}
351
362
}
352
363
@@ -368,10 +379,8 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
368
379
if (!result.error .empty ())
369
380
return {facade::ErrorReply (std::move (result.error ))};
370
381
371
- // TODO(vlad): LOAD does NOT exist as a FT.SEARCH option, logic is blurry
372
- SearchFieldsList fields_to_load = params.ShouldReturnAllFields ()
373
- ? params.load_fields .value_or (SearchFieldsList{})
374
- : params.return_fields .value_or (SearchFieldsList{});
382
+ if (limit == 0 )
383
+ return {result.total , {}, std::move (result.profile )};
375
384
376
385
// Tune sort for KNN: Skip if it's on the knn field, otherwise extend the limit if needed
377
386
bool skip_sort = false ;
@@ -381,7 +390,10 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
381
390
limit = max (limit, ko->limit );
382
391
}
383
392
393
+ auto return_fields = params.return_fields .value_or (SearchFieldsList{});
394
+
384
395
// Apply SORTBY
396
+ // TODO(vlad): Write profiling up to here
385
397
vector<search::SortableValue> sort_scores;
386
398
if (params.sort_option && !skip_sort) {
387
399
const auto & so = *params.sort_option ;
@@ -390,8 +402,9 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
390
402
auto * idx = indices_->GetSortIndex (fident);
391
403
sort_scores = idx->Sort (&result.ids , limit, so.order == SortOrder::DESC);
392
404
} else {
393
- sort_scores = KeepTopKSorted (&result.ids , so, limit, op_args);
394
- fields_to_load.emplace_back (so.field );
405
+ sort_scores = KeepTopKSorted (&result.ids , limit, so, op_args);
406
+ if (params.ShouldReturnAllFields ())
407
+ return_fields.push_back (so.field );
395
408
}
396
409
397
410
// If we sorted with knn_scores present, rearrange them
@@ -408,29 +421,35 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
408
421
// Serialize documents
409
422
vector<SerializedSearchDoc> out;
410
423
out.reserve (min (limit, result.ids .size ()));
411
- auto & db_slice = op_args. GetDbSlice ();
424
+
412
425
size_t expired_count = 0 ;
413
426
for (size_t i = 0 ; i < result.ids .size (); i++) {
414
- DocId id = result.ids [i];
415
- auto key = key_index_.Get (id);
416
- auto it = db_slice.FindReadOnly (op_args.db_cntx , key, base_->GetObjCode ());
427
+ float knn_score = result.knn_scores .empty () ? 0 : result.knn_scores [i].second ;
428
+ auto sort_score = sort_scores.empty () ? std::monostate{} : std::move (sort_scores[i]);
417
429
418
- if (!it || !IsValid (*it)) {
430
+ // Don't load entry if we need only its key. Ignore expiration.
431
+ if (params.IdsOnly ()) {
432
+ string_view key = key_index_.Get (result.ids [i]);
433
+ out.push_back ({string{key}, {}, knn_score, sort_score});
434
+ continue ;
435
+ }
436
+
437
+ auto entry = LoadEntry (result.ids [i], op_args);
438
+ if (!entry) {
419
439
expired_count++;
420
440
continue ;
421
441
}
422
442
423
- // Load all required fields from document
424
- auto accessor = GetAccessor (op_args.db_cntx , (*it)->second );
425
- auto fields = params.ShouldReturnAllFields () ? accessor->SerializeDocument (base_->schema )
426
- : SearchDocData{};
427
- auto loaded = accessor->Serialize (base_->schema , fields_to_load);
428
- fields.insert (std::make_move_iterator (loaded.begin ()), std::make_move_iterator (loaded.end ()));
429
-
430
- SerializedSearchDoc doc{string{key}, std::move (fields),
431
- result.knn_scores .empty () ? 0 : result.knn_scores [i].second ,
432
- sort_scores.empty () ? std::monostate{} : std::move (sort_scores[i])};
433
- out.push_back (std::move (doc));
443
+ auto & [key, accessor] = *entry;
444
+
445
+ // Load all specified fields from document
446
+ SearchDocData fields{};
447
+ if (params.ShouldReturnAllFields ())
448
+ fields = accessor->SerializeDocument (base_->schema );
449
+
450
+ auto more_fields = accessor->Serialize (base_->schema , return_fields);
451
+ fields.insert (make_move_iterator (more_fields.begin ()), make_move_iterator (more_fields.end ()));
452
+ out.push_back ({string{key}, std::move (fields), knn_score, sort_score});
434
453
}
435
454
436
455
return {result.total - expired_count, std::move (out), std::move (result.profile )};
@@ -439,7 +458,6 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
439
458
vector<SearchDocData> ShardDocIndex::SearchForAggregator (
440
459
const OpArgs& op_args, const AggregateParams& params,
441
460
search::SearchAlgorithm* search_algo) const {
442
- auto & db_slice = op_args.GetDbSlice ();
443
461
auto search_results = search_algo->Search (&*indices_);
444
462
445
463
if (!search_results.error .empty ())
@@ -450,13 +468,10 @@ vector<SearchDocData> ShardDocIndex::SearchForAggregator(
450
468
451
469
vector<absl::flat_hash_map<string, search::SortableValue>> out;
452
470
for (DocId doc : search_results.ids ) {
453
- auto key = key_index_.Get (doc);
454
- auto it = db_slice.FindReadOnly (op_args.db_cntx , key, base_->GetObjCode ());
455
-
456
- if (!it || !IsValid (*it)) // Item must have expired
471
+ auto entry = LoadEntry (doc, op_args);
472
+ if (!entry)
457
473
continue ;
458
-
459
- auto accessor = GetAccessor (op_args.db_cntx , (*it)->second );
474
+ auto & [_, accessor] = *entry;
460
475
461
476
SearchDocData extracted_sort_indicies;
462
477
extracted_sort_indicies.reserve (sort_indicies.size ());
0 commit comments