Skip to content

Commit 488ab52

Browse files
authored
Add cpp examples (#435)
* Add cpp examples * Add multithreaded cpp examples
1 parent 68a3387 commit 488ab52

18 files changed

+743
-5
lines changed

.github/workflows/build.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
- name: Test
2222
timeout-minutes: 15
2323
run: |
24-
python -m unittest discover -v --start-directory examples --pattern "example*.py"
24+
python -m unittest discover -v --start-directory examples/python --pattern "example*.py"
2525
python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py"
2626
2727
test_cpp:
@@ -61,6 +61,12 @@ jobs:
6161
if [ "$RUNNER_OS" == "Windows" ]; then
6262
cp ./Release/* ./
6363
fi
64+
./example_search
65+
./example_filter
66+
./example_replace_deleted
67+
./example_mt_search
68+
./example_mt_filter
69+
./example_mt_replace_deleted
6470
./searchKnnCloserFirst_test
6571
./searchKnnWithFilter_test
6672
./multiThreadLoad_test

ALGO_PARAMS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,5 @@ ef_construction leads to longer construction, but better index quality. At some
2727
not improve the quality of the index. One way to check if the selection of ef_construction was ok is to measure a recall
2828
for M nearest neighbor search when ```ef``` =```ef_construction```: if the recall is lower than 0.9, than there is room
2929
for improvement.
30-
* ```num_elements``` - defines the maximum number of elements in the index. The index can be extened by saving/loading(load_index
30+
* ```num_elements``` - defines the maximum number of elements in the index. The index can be extended by saving/loading (load_index
3131
function has a parameter which defines the new maximum number of elements).

CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,26 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
1616
SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize" )
1717
endif()
1818

19+
# examples
20+
add_executable(example_search examples/cpp/example_search.cpp)
21+
target_link_libraries(example_search hnswlib)
22+
23+
add_executable(example_filter examples/cpp/example_filter.cpp)
24+
target_link_libraries(example_filter hnswlib)
25+
26+
add_executable(example_replace_deleted examples/cpp/example_replace_deleted.cpp)
27+
target_link_libraries(example_replace_deleted hnswlib)
28+
29+
add_executable(example_mt_search examples/cpp/example_mt_search.cpp)
30+
target_link_libraries(example_mt_search hnswlib)
31+
32+
add_executable(example_mt_filter examples/cpp/example_mt_filter.cpp)
33+
target_link_libraries(example_mt_filter hnswlib)
34+
35+
add_executable(example_mt_replace_deleted examples/cpp/example_mt_replace_deleted.cpp)
36+
target_link_libraries(example_mt_replace_deleted hnswlib)
37+
38+
# tests
1939
add_executable(test_updates tests/cpp/updates_test.cpp)
2040
target_link_libraries(test_updates hnswlib)
2141

README.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,12 @@ Properties of `hnswlib.Index` that support reading and writing:
111111
112112

113113
#### Python bindings examples
114-
[See more examples here](examples/EXAMPLES.md)
114+
[See more examples here](examples/python/EXAMPLES.md):
115+
* Creating index, inserting elements, searching, serialization/deserialization
116+
* Filtering during the search with a boolean function
117+
* Deleting the elements and reusing the memory of the deleted elements for newly added elements
118+
119+
An example of creating index, inserting elements, searching and pickle serialization:
115120
```python
116121
import hnswlib
117122
import numpy as np
@@ -218,6 +223,14 @@ labels, distances = p.knn_query(data, k=1)
218223
print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(data))), "\n")
219224
```
220225

226+
#### C++ examples
227+
[See examples here](examples/cpp/EXAMPLES.md):
228+
* creating index, inserting elements, searching, serialization/deserialization
229+
* filtering during the search with a boolean function
230+
* deleting the elements and reusing the memory of the deleted elements for newly added elements
231+
* multithreaded usage
232+
233+
221234
### Bindings installation
222235

223236
You can install from sources:

examples/cpp/EXAMPLES.md

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
# C++ examples
2+
3+
Creating index, inserting elements, searching and serialization
4+
```cpp
5+
#include "../../hnswlib/hnswlib.h"
6+
7+
8+
int main() {
9+
int dim = 16; // Dimension of the elements
10+
int max_elements = 10000; // Maximum number of elements, should be known beforehand
11+
int M = 16; // Tightly connected with internal dimensionality of the data
12+
// strongly affects the memory consumption
13+
int ef_construction = 200; // Controls index search speed/build speed tradeoff
14+
15+
// Initing index
16+
hnswlib::L2Space space(dim);
17+
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);
18+
19+
// Generate random data
20+
std::mt19937 rng;
21+
rng.seed(47);
22+
std::uniform_real_distribution<> distrib_real;
23+
float* data = new float[dim * max_elements];
24+
for (int i = 0; i < dim * max_elements; i++) {
25+
data[i] = distrib_real(rng);
26+
}
27+
28+
// Add data to index
29+
for (int i = 0; i < max_elements; i++) {
30+
alg_hnsw->addPoint(data + i * dim, i);
31+
}
32+
33+
// Query the elements for themselves and measure recall
34+
float correct = 0;
35+
for (int i = 0; i < max_elements; i++) {
36+
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1);
37+
hnswlib::labeltype label = result.top().second;
38+
if (label == i) correct++;
39+
}
40+
float recall = correct / max_elements;
41+
std::cout << "Recall: " << recall << "\n";
42+
43+
// Serialize index
44+
std::string hnsw_path = "hnsw.bin";
45+
alg_hnsw->saveIndex(hnsw_path);
46+
delete alg_hnsw;
47+
48+
// Deserialize index and check recall
49+
alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, hnsw_path);
50+
correct = 0;
51+
for (int i = 0; i < max_elements; i++) {
52+
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1);
53+
hnswlib::labeltype label = result.top().second;
54+
if (label == i) correct++;
55+
}
56+
recall = (float)correct / max_elements;
57+
std::cout << "Recall of deserialized index: " << recall << "\n";
58+
59+
delete[] data;
60+
delete alg_hnsw;
61+
return 0;
62+
}
63+
```
64+
65+
An example of filtering with a boolean function during the search:
66+
```cpp
67+
#include "../../hnswlib/hnswlib.h"
68+
69+
70+
// Filter that allows labels divisible by divisor
71+
class PickDivisibleIds: public hnswlib::BaseFilterFunctor {
72+
unsigned int divisor = 1;
73+
public:
74+
PickDivisibleIds(unsigned int divisor): divisor(divisor) {
75+
assert(divisor != 0);
76+
}
77+
bool operator()(hnswlib::labeltype label_id) {
78+
return label_id % divisor == 0;
79+
}
80+
};
81+
82+
83+
int main() {
84+
int dim = 16; // Dimension of the elements
85+
int max_elements = 10000; // Maximum number of elements, should be known beforehand
86+
int M = 16; // Tightly connected with internal dimensionality of the data
87+
// strongly affects the memory consumption
88+
int ef_construction = 200; // Controls index search speed/build speed tradeoff
89+
90+
// Initing index
91+
hnswlib::L2Space space(dim);
92+
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);
93+
94+
// Generate random data
95+
std::mt19937 rng;
96+
rng.seed(47);
97+
std::uniform_real_distribution<> distrib_real;
98+
float* data = new float[dim * max_elements];
99+
for (int i = 0; i < dim * max_elements; i++) {
100+
data[i] = distrib_real(rng);
101+
}
102+
103+
// Add data to index
104+
for (int i = 0; i < max_elements; i++) {
105+
alg_hnsw->addPoint(data + i * dim, i);
106+
}
107+
108+
// Create filter that allows only even labels
109+
PickDivisibleIds pickIdsDivisibleByTwo(2);
110+
111+
// Query the elements for themselves with filter and check returned labels
112+
int k = 10;
113+
for (int i = 0; i < max_elements; i++) {
114+
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo);
115+
for (auto item: result) {
116+
if (item.second % 2 == 1) std::cout << "Error: found odd label\n";
117+
}
118+
}
119+
120+
delete[] data;
121+
delete alg_hnsw;
122+
return 0;
123+
}
124+
```
125+
126+
An example with reusing the memory of the deleted elements when new elements are being added (via `allow_replace_deleted` flag):
127+
```cpp
128+
#include "../../hnswlib/hnswlib.h"
129+
130+
131+
int main() {
132+
int dim = 16; // Dimension of the elements
133+
int max_elements = 10000; // Maximum number of elements, should be known beforehand
134+
int M = 16; // Tightly connected with internal dimensionality of the data
135+
// strongly affects the memory consumption
136+
int ef_construction = 200; // Controls index search speed/build speed tradeoff
137+
138+
// Initing index
139+
hnswlib::L2Space space(dim);
140+
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, 100, true);
141+
142+
// Generate random data
143+
std::mt19937 rng;
144+
rng.seed(47);
145+
std::uniform_real_distribution<> distrib_real;
146+
float* data = new float[dim * max_elements];
147+
for (int i = 0; i < dim * max_elements; i++) {
148+
data[i] = distrib_real(rng);
149+
}
150+
151+
// Add data to index
152+
for (int i = 0; i < max_elements; i++) {
153+
alg_hnsw->addPoint(data + i * dim, i);
154+
}
155+
156+
// Mark first half of elements as deleted
157+
int num_deleted = max_elements / 2;
158+
for (int i = 0; i < num_deleted; i++) {
159+
alg_hnsw->markDelete(i);
160+
}
161+
162+
float* add_data = new float[dim * num_deleted];
163+
for (int i = 0; i < dim * num_deleted; i++) {
164+
add_data[i] = distrib_real(rng);
165+
}
166+
167+
// Replace deleted data with new elements
168+
// Maximum number of elements is reached therefore we cannot add new items,
169+
// but we can replace the deleted ones by using replace_deleted=true
170+
for (int i = 0; i < num_deleted; i++) {
171+
int label = max_elements + i;
172+
alg_hnsw->addPoint(add_data + i * dim, label, true);
173+
}
174+
175+
delete[] data;
176+
delete[] add_data;
177+
delete alg_hnsw;
178+
return 0;
179+
}
180+
```
181+
182+
Multithreaded examples:
183+
* Creating index, inserting elements, searching [example_mt_search.cpp](example_mt_search.cpp)
184+
* Filtering during the search with a boolean function [example_mt_filter.cpp](example_mt_filter.cpp)
185+
* Reusing the memory of the deleted elements when new elements are being added [example_mt_replace_deleted.cpp](example_mt_replace_deleted.cpp)

examples/cpp/example_filter.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#include "../../hnswlib/hnswlib.h"
2+
3+
4+
// Filter that allows labels divisible by divisor
5+
class PickDivisibleIds: public hnswlib::BaseFilterFunctor {
6+
unsigned int divisor = 1;
7+
public:
8+
PickDivisibleIds(unsigned int divisor): divisor(divisor) {
9+
assert(divisor != 0);
10+
}
11+
bool operator()(hnswlib::labeltype label_id) {
12+
return label_id % divisor == 0;
13+
}
14+
};
15+
16+
17+
int main() {
18+
int dim = 16; // Dimension of the elements
19+
int max_elements = 10000; // Maximum number of elements, should be known beforehand
20+
int M = 16; // Tightly connected with internal dimensionality of the data
21+
// strongly affects the memory consumption
22+
int ef_construction = 200; // Controls index search speed/build speed tradeoff
23+
24+
// Initing index
25+
hnswlib::L2Space space(dim);
26+
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction);
27+
28+
// Generate random data
29+
std::mt19937 rng;
30+
rng.seed(47);
31+
std::uniform_real_distribution<> distrib_real;
32+
float* data = new float[dim * max_elements];
33+
for (int i = 0; i < dim * max_elements; i++) {
34+
data[i] = distrib_real(rng);
35+
}
36+
37+
// Add data to index
38+
for (int i = 0; i < max_elements; i++) {
39+
alg_hnsw->addPoint(data + i * dim, i);
40+
}
41+
42+
// Create filter that allows only even labels
43+
PickDivisibleIds pickIdsDivisibleByTwo(2);
44+
45+
// Query the elements for themselves with filter and check returned labels
46+
int k = 10;
47+
for (int i = 0; i < max_elements; i++) {
48+
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo);
49+
for (auto item: result) {
50+
if (item.second % 2 == 1) std::cout << "Error: found odd label\n";
51+
}
52+
}
53+
54+
delete[] data;
55+
delete alg_hnsw;
56+
return 0;
57+
}

0 commit comments

Comments
 (0)