Skip to content

Commit db46a91

Browse files
DvirDukhanalonre24GuyAv46meiravgri
authored
version 0.5.0 (#260)
* 0.5.0 * Epsilon runtime parameter MOD-4165 (#257) * Extend query result API (#261) * add required api's for metirc iterator * Multi-Value - HNSW - enable range queries (#255) * implement and test results_container wrapper * added support for range query on HNSW multi-value * adjusted tests * added flow test * flow test improvement * review fixes * small improvements * uses smart pointers * add range support to multi-value flat index * fixed timeout return * fixed memory leak * rebase with unique_ptr and fix * review fixes Co-authored-by: meiravgri <meirav.grimberg@redis.com> Co-authored-by: alonre24 <alonreshef24@gmail.com> Co-authored-by: GuyAv46 <47632673+GuyAv46@users.noreply.github.com> Co-authored-by: meiravgri <meirav.grimberg@redis.com>
1 parent 554ffbc commit db46a91

24 files changed

+783
-359
lines changed

src/VecSim/algorithms/brute_force/brute_force.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "VecSim/vec_sim_index.h"
55
#include "VecSim/spaces/spaces.h"
66
#include "VecSim/utils/vecsim_stl.h"
7+
#include "VecSim/utils/vecsim_results_container.h"
78
#include "VecSim/algorithms/brute_force/brute_force_factory.h"
89
#include "VecSim/spaces/spaces.h"
910
#include "VecSim/query_result_struct.h"
@@ -67,6 +68,10 @@ class BruteForceIndex : public VecSimIndexAbstract<DistType> {
6768
virtual inline vecsim_stl::abstract_priority_queue<DistType, labelType> *
6869
getNewMaxPriorityQueue() = 0;
6970

71+
// inline label to id setters that need to be implemented by derived class
72+
virtual inline std::unique_ptr<vecsim_stl::abstract_results_container>
73+
getNewResultsContainer(size_t cap) const = 0;
74+
7075
// inline label to id setters that need to be implemented by derived class
7176
virtual inline void replaceIdOfLabel(labelType label, idType new_id, idType old_id) = 0;
7277
virtual inline void setVectorId(labelType label, idType id) = 0;
@@ -291,26 +296,27 @@ BruteForceIndex<DataType, DistType>::rangeQuery(const void *queryBlob, double ra
291296
}
292297

293298
// Compute scores in every block and save results that are within the range.
294-
rl.results =
295-
array_new<VecSimQueryResult>(10); // Use 10 as the initial capacity for the dynamic array.
299+
auto res_container =
300+
getNewResultsContainer(10); // Use 10 as the initial capacity for the dynamic array.
296301

297302
DistType radius_ = DistType(radius);
298303
idType curr_id = 0;
304+
rl.code = VecSim_QueryResult_OK;
299305
for (auto vectorBlock : this->vectorBlocks) {
300306
auto scores = computeBlockScores(vectorBlock, queryBlob, timeoutCtx, &rl.code);
301307
if (VecSim_OK != rl.code) {
302-
return rl;
308+
break;
303309
}
304310
for (size_t i = 0; i < scores.size(); i++) {
305311
if (scores[i] <= radius_) {
306-
auto res = VecSimQueryResult{getVectorLabel(curr_id), scores[i]};
307-
rl.results = array_append(rl.results, res);
312+
res_container->emplace(getVectorLabel(curr_id), scores[i]);
308313
}
309314
++curr_id;
310315
}
311316
}
312-
assert(curr_id == this->count);
313-
rl.code = VecSim_QueryResult_OK;
317+
// assert only if the loop finished iterating all the ids (we didn't get rl.code != VecSim_OK).
318+
assert((rl.code != VecSim_OK || curr_id == this->count));
319+
rl.results = res_container->get_results();
314320
return rl;
315321
}
316322

src/VecSim/algorithms/brute_force/brute_force_multi.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,14 @@ class BruteForceIndex_Multi : public BruteForceIndex<DataType, DistType> {
1919
int addVector(const void *vector_data, labelType label) override;
2020
int deleteVector(labelType labelType) override;
2121
double getDistanceFrom(labelType label, const void *vector_data) const override;
22-
2322
inline size_t indexLabelCount() const override { return this->labelToIdsLookup.size(); }
2423

24+
inline std::unique_ptr<vecsim_stl::abstract_results_container>
25+
getNewResultsContainer(size_t cap) const override {
26+
return std::unique_ptr<vecsim_stl::abstract_results_container>(
27+
new (this->allocator) vecsim_stl::unique_results_container(cap, this->allocator));
28+
}
29+
2530
private:
2631
// inline definitions
2732

src/VecSim/algorithms/brute_force/brute_force_single.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ class BruteForceIndex_Single : public BruteForceIndex<DataType, DistType> {
1818
int deleteVector(labelType label) override;
1919
double getDistanceFrom(labelType label, const void *vector_data) const override;
2020

21+
inline std::unique_ptr<vecsim_stl::abstract_results_container>
22+
getNewResultsContainer(size_t cap) const override {
23+
return std::unique_ptr<vecsim_stl::abstract_results_container>(
24+
new (this->allocator) vecsim_stl::default_results_container(cap, this->allocator));
25+
}
26+
2127
inline size_t indexLabelCount() const override { return this->count; }
2228

2329
protected:

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "VecSim/memory/vecsim_malloc.h"
77
#include "VecSim/utils/vecsim_stl.h"
88
#include "VecSim/utils/vec_utils.h"
9+
#include "VecSim/utils/vecsim_results_container.h"
910
#include "VecSim/query_result_struct.h"
1011
#include "VecSim/vec_sim_common.h"
1112
#include "VecSim/vec_sim_index.h"
@@ -119,11 +120,10 @@ class HNSWIndex : public VecSimIndexAbstract<DistType> {
119120
tag_t visited_tag,
120121
vecsim_stl::abstract_priority_queue<DistType, Identifier> &top_candidates,
121122
candidatesMaxHeap<DistType> &candidates_set, DistType lowerBound) const;
122-
inline void processCandidate_RangeSearch(idType curNodeId, const void *data_point, size_t layer,
123-
double epsilon, tag_t visited_tag,
124-
VecSimQueryResult **top_candidates,
125-
candidatesMaxHeap<DistType> &candidate_set,
126-
DistType lowerBound, double radius) const;
123+
inline void processCandidate_RangeSearch(
124+
idType curNodeId, const void *data_point, size_t layer, double epsilon, tag_t visited_tag,
125+
std::unique_ptr<vecsim_stl::abstract_results_container> &top_candidates,
126+
candidatesMaxHeap<DistType> &candidate_set, DistType lowerBound, double radius) const;
127127
candidatesMaxHeap<DistType> searchLayer(idType ep_id, const void *data_point, size_t layer,
128128
size_t ef) const;
129129
candidatesLabelsMaxHeap<DistType> *
@@ -192,6 +192,8 @@ class HNSWIndex : public VecSimIndexAbstract<DistType> {
192192

193193
protected:
194194
// inline label to id setters that need to be implemented by derived class
195+
virtual inline std::unique_ptr<vecsim_stl::abstract_results_container>
196+
getNewResultsContainer(size_t cap) const = 0;
195197
virtual inline void replaceIdOfLabel(labelType label, idType new_id, idType old_id) = 0;
196198
virtual inline void setVectorId(labelType label, idType id) = 0;
197199
virtual inline void resizeLabelLookup(size_t new_max_elements) = 0;
@@ -453,8 +455,8 @@ DistType HNSWIndex<DataType, DistType>::processCandidate(
453455
template <typename DataType, typename DistType>
454456
void HNSWIndex<DataType, DistType>::processCandidate_RangeSearch(
455457
idType curNodeId, const void *query_data, size_t layer, double epsilon, tag_t visited_tag,
456-
VecSimQueryResult **results, candidatesMaxHeap<DistType> &candidate_set, DistType dyn_range,
457-
double radius) const {
458+
std::unique_ptr<vecsim_stl::abstract_results_container> &results,
459+
candidatesMaxHeap<DistType> &candidate_set, DistType dyn_range, double radius) const {
458460

459461
#ifdef ENABLE_PARALLELIZATION
460462
std::unique_lock<std::mutex> lock(link_list_locks_[curNodeId]);
@@ -488,10 +490,7 @@ void HNSWIndex<DataType, DistType>::processCandidate_RangeSearch(
488490

489491
// If the new candidate is in the requested radius, add it to the results set.
490492
if (candidate_dist <= radius_) {
491-
auto new_result = VecSimQueryResult{};
492-
VecSimQueryResult_SetId(new_result, getExternalLabel(candidate_id));
493-
VecSimQueryResult_SetScore(new_result, candidate_dist);
494-
*results = array_append(*results, new_result);
493+
results->emplace(getExternalLabel(candidate_id), candidate_dist);
495494
}
496495
}
497496
}
@@ -1415,7 +1414,8 @@ VecSimQueryResult *HNSWIndex<DataType, DistType>::searchRangeBottomLayer_WithTim
14151414
idType ep_id, const void *data_point, double epsilon, double radius, void *timeoutCtx,
14161415
VecSimQueryResult_Code *rc) const {
14171416

1418-
auto *results = array_new<VecSimQueryResult>(10); // arbitrary initial cap.
1417+
*rc = VecSim_QueryResult_OK;
1418+
auto res_container = getNewResultsContainer(10); // arbitrary initial cap.
14191419

14201420
#ifdef ENABLE_PARALLELIZATION
14211421
this->visited_nodes_handler =
@@ -1431,10 +1431,7 @@ VecSimQueryResult *HNSWIndex<DataType, DistType>::searchRangeBottomLayer_WithTim
14311431

14321432
if (ep_dist <= radius) {
14331433
// Entry-point is within the radius - add it to the results.
1434-
auto new_result = VecSimQueryResult{};
1435-
VecSimQueryResult_SetId(new_result, getExternalLabel(ep_id));
1436-
VecSimQueryResult_SetScore(new_result, ep_dist);
1437-
results = array_append(results, new_result);
1434+
res_container->emplace(getExternalLabel(ep_id), ep_dist);
14381435
dynamic_range = radius; // to ensure that dyn_range >= radius.
14391436
}
14401437

@@ -1453,7 +1450,7 @@ VecSimQueryResult *HNSWIndex<DataType, DistType>::searchRangeBottomLayer_WithTim
14531450
}
14541451
if (__builtin_expect(VecSimIndexAbstract<DistType>::timeoutCallback(timeoutCtx), 0)) {
14551452
*rc = VecSim_QueryResult_TimedOut;
1456-
return results;
1453+
break;
14571454
}
14581455
candidate_set.pop();
14591456

@@ -1468,15 +1465,14 @@ VecSimQueryResult *HNSWIndex<DataType, DistType>::searchRangeBottomLayer_WithTim
14681465
// requested radius.
14691466
// Here we send the radius as double to match the function arguments type.
14701467
processCandidate_RangeSearch(curr_el_pair.second, data_point, 0, epsilon, visited_tag,
1471-
&results, candidate_set, dynamic_range_search_boundaries,
1468+
res_container, candidate_set, dynamic_range_search_boundaries,
14721469
radius);
14731470
}
1471+
14741472
#ifdef ENABLE_PARALLELIZATION
14751473
visited_nodes_handler_pool->returnVisitedNodesHandlerToPool(this->visited_nodes_handler);
14761474
#endif
1477-
1478-
*rc = VecSim_QueryResult_OK;
1479-
return results;
1475+
return res_container->get_results();
14801476
}
14811477

14821478
template <typename DataType, typename DistType>

src/VecSim/algorithms/hnsw/hnsw_multi.h

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ class HNSWIndex_Multi : public HNSWIndex<DataType, DistType> {
3232
return new (this->allocator)
3333
vecsim_stl::updatable_max_heap<DistType, labelType>(this->allocator);
3434
}
35+
inline std::unique_ptr<vecsim_stl::abstract_results_container>
36+
getNewResultsContainer(size_t cap) const override {
37+
return std::unique_ptr<vecsim_stl::abstract_results_container>(
38+
new (this->allocator) vecsim_stl::unique_results_container(cap, this->allocator));
39+
}
3540

3641
inline size_t indexLabelCount() const override;
3742
VecSimBatchIterator *newBatchIterator(const void *queryBlob,
@@ -40,8 +45,6 @@ class HNSWIndex_Multi : public HNSWIndex<DataType, DistType> {
4045
int deleteVector(labelType label) override;
4146
int addVector(const void *vector_data, labelType label) override;
4247
double getDistanceFrom(labelType label, const void *vector_data) const override;
43-
VecSimQueryResult_List rangeQuery(const void *query_data, double radius,
44-
VecSimQueryParams *queryParams) override;
4548
};
4649

4750
/**
@@ -124,15 +127,6 @@ int HNSWIndex_Multi<DataType, DistType>::addVector(const void *vector_data, cons
124127
return this->appendVector(vector_data, label);
125128
}
126129

127-
// TODO: support range queries
128-
template <typename DataType, typename DistType>
129-
VecSimQueryResult_List
130-
HNSWIndex_Multi<DataType, DistType>::rangeQuery(const void *query_data, double radius,
131-
VecSimQueryParams *queryParams) {
132-
this->last_mode = RANGE_QUERY;
133-
return {array_new<VecSimQueryResult>(0), VecSim_QueryResult_OK};
134-
}
135-
136130
template <typename DataType, typename DistType>
137131
VecSimBatchIterator *
138132
HNSWIndex_Multi<DataType, DistType>::newBatchIterator(const void *queryBlob,

src/VecSim/algorithms/hnsw/hnsw_single.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ class HNSWIndex_Single : public HNSWIndex<DataType, DistType> {
2929
return new (this->allocator)
3030
vecsim_stl::max_priority_queue<DistType, labelType>(this->allocator);
3131
}
32+
inline std::unique_ptr<vecsim_stl::abstract_results_container>
33+
getNewResultsContainer(size_t cap) const override {
34+
return std::unique_ptr<vecsim_stl::abstract_results_container>(
35+
new (this->allocator) vecsim_stl::default_results_container(cap, this->allocator));
36+
}
3237

3338
inline size_t indexLabelCount() const override;
3439
VecSimBatchIterator *newBatchIterator(const void *queryBlob,

src/VecSim/query_results.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,33 @@
44
#include "VecSim/batch_iterator.h"
55

66
struct VecSimQueryResult_Iterator {
7-
VecSimQueryResult *curr_result;
7+
VecSimQueryResult *results_arr;
88
size_t index;
99
size_t results_len;
1010

1111
explicit VecSimQueryResult_Iterator(VecSimQueryResult_List results_array)
12-
: curr_result(results_array.results), index(0),
12+
: results_arr(results_array.results), index(0),
1313
results_len(array_len(results_array.results)) {}
1414
};
1515

1616
extern "C" size_t VecSimQueryResult_Len(VecSimQueryResult_List rl) { return array_len(rl.results); }
1717

18+
extern "C" VecSimQueryResult *VecSimQueryResult_GetArray(VecSimQueryResult_List rl) {
19+
return rl.results;
20+
}
21+
22+
extern "C" size_t VecSimQueryResult_ArrayLen(VecSimQueryResult *rl) { return array_len(rl); }
23+
1824
extern "C" void VecSimQueryResult_Free(VecSimQueryResult_List rl) {
1925
if (rl.results) {
2026
array_free(rl.results);
21-
rl.results = NULL;
27+
rl.results = nullptr;
28+
}
29+
}
30+
31+
extern "C" void VecSimQueryResult_FreeArray(VecSimQueryResult *rl) {
32+
if (rl) {
33+
array_free(rl);
2234
}
2335
}
2436

@@ -35,7 +47,7 @@ extern "C" VecSimQueryResult *VecSimQueryResult_IteratorNext(VecSimQueryResult_I
3547
if (iterator->index == iterator->results_len) {
3648
return nullptr;
3749
}
38-
VecSimQueryResult *item = iterator->curr_result++;
50+
VecSimQueryResult *item = iterator->results_arr + iterator->index;
3951
iterator->index++;
4052

4153
return item;
@@ -52,13 +64,17 @@ extern "C" double VecSimQueryResult_GetScore(const VecSimQueryResult *res) {
5264
if (res == nullptr) {
5365
return INVALID_SCORE; // "NaN"
5466
}
55-
return (double)res->score;
67+
return res->score;
5668
}
5769

5870
extern "C" void VecSimQueryResult_IteratorFree(VecSimQueryResult_Iterator *iterator) {
5971
delete iterator;
6072
}
6173

74+
extern "C" void VecSimQueryResult_IteratorReset(VecSimQueryResult_Iterator *iterator) {
75+
iterator->index = 0;
76+
}
77+
6278
/********************** batch iterator API ***************************/
6379
VecSimQueryResult_List VecSimBatchIterator_Next(VecSimBatchIterator *iterator, size_t n_results,
6480
VecSimQueryResult_Order order) {

src/VecSim/query_results.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ typedef struct VecSimQueryResult_Iterator VecSimQueryResult_Iterator;
4848
*/
4949
size_t VecSimQueryResult_Len(VecSimQueryResult_List results);
5050

51+
/**
52+
* @brief Get the underline array of result from the opaque list object.
53+
*/
54+
VecSimQueryResult *VecSimQueryResult_GetArray(VecSimQueryResult_List rl);
55+
56+
/**
57+
* @brief Get the length of the result list from the underline array.
58+
*/
59+
size_t VecSimQueryResult_ArrayLen(VecSimQueryResult *rl);
60+
5161
/**
5262
* @brief Create an iterator for going over the list of results. The iterator needs to be free
5363
* with VecSimQueryResult_IteratorFree.
@@ -65,6 +75,11 @@ VecSimQueryResult *VecSimQueryResult_IteratorNext(VecSimQueryResult_Iterator *it
6575
*/
6676
bool VecSimQueryResult_IteratorHasNext(VecSimQueryResult_Iterator *iterator);
6777

78+
/**
79+
* @brief Rewind the iterator to the beginning of the result list
80+
*/
81+
void VecSimQueryResult_IteratorReset(VecSimQueryResult_Iterator *iterator);
82+
6883
/**
6984
* @brief Release the iterator
7085
*/
@@ -75,6 +90,11 @@ void VecSimQueryResult_IteratorFree(VecSimQueryResult_Iterator *iterator);
7590
*/
7691
void VecSimQueryResult_Free(VecSimQueryResult_List results);
7792

93+
/**
94+
* @brief Release the query results array.
95+
*/
96+
void VecSimQueryResult_FreeArray(VecSimQueryResult *rl);
97+
7898
/**
7999
* @brief Iterator for running the same query over an index, getting the in each iteration
80100
* the best results that hasn't returned in the previous iterations.

src/VecSim/utils/vec_utils.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <cassert>
55
#include <cerrno>
66
#include <climits>
7+
#include <float.h>
78

89
#ifndef __COMPAR_FN_T
910
#define __COMPAR_FN_T
@@ -77,6 +78,19 @@ VecSimResolveCode validate_positive_integer_param(VecSimRawParam rawParam, long
7778
return VecSimParamResolver_OK;
7879
}
7980

81+
VecSimResolveCode validate_positive_double_param(VecSimRawParam rawParam, double *val) {
82+
char *ep; // For checking that strtold used all rawParam.valLen chars.
83+
errno = 0;
84+
*val = strtod(rawParam.value, &ep);
85+
// Here we verify that val is positive and strtod was successful.
86+
// The last test checks that the entire rawParam.value was used.
87+
// We catch here inputs like "-3.14", "123text" and so on.
88+
if (*val <= 0 || *val == DBL_MAX || errno != 0 || (rawParam.value + rawParam.valLen) != ep) {
89+
return VecSimParamResolverErr_BadValue;
90+
}
91+
return VecSimParamResolver_OK;
92+
}
93+
8094
const char *VecSimAlgo_ToString(VecSimAlgo vecsimAlgo) {
8195
switch (vecsimAlgo) {
8296
case VecSimAlgo_BF:

src/VecSim/utils/vec_utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ void sort_results_by_score(VecSimQueryResult_List results);
5656

5757
VecSimResolveCode validate_positive_integer_param(VecSimRawParam rawParam, long long *val);
5858

59+
VecSimResolveCode validate_positive_double_param(VecSimRawParam rawParam, double *val);
60+
5961
const char *VecSimAlgo_ToString(VecSimAlgo vecsimAlgo);
6062

6163
const char *VecSimType_ToString(VecSimType vecsimType);

0 commit comments

Comments
 (0)