Skip to content

Commit a137fe8

Browse files
committed
unit testing and C++ cli validation [skip ci]
1 parent 98dc4f8 commit a137fe8

File tree

7 files changed

+386
-335
lines changed

7 files changed

+386
-335
lines changed

src/include/detail/flat/qv.h

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,13 @@ namespace detail::flat {
7373
*/
7474
template <class DB, class Q>
7575
[[deprecated]] auto qv_query_heap_0(
76-
DB& db, const Q& q, int k_nn, unsigned int nthreads) {
76+
const DB& db, const Q& q, int k_nn, unsigned int nthreads) {
7777
scoped_timer _{tdb_func__};
7878

79-
ColMajorMatrix<size_t> top_k(k_nn, size(q));
79+
using id_type = size_t;
80+
using score_type = float;
81+
82+
ColMajorMatrix<id_type> top_k(k_nn, size(q));
8083

8184
auto par = stdx::execution::indexed_parallel_policy{nthreads};
8285
stdx::range_for_each(
@@ -85,7 +88,7 @@ template <class DB, class Q>
8588
size_t size_db = size(db);
8689

8790
// @todo can we do this more efficiently?
88-
Vector<float> scores(size_db);
91+
Vector<score_type> scores(size_db);
8992

9093
for (size_t i = 0; i < size_db; ++i) {
9194
scores[i] = L2(q_vec, db[i]);
@@ -113,40 +116,44 @@ template <class DB, class Q>
113116
* @return A matrix of size k x #queries containing the top k results for each
114117
* query.
115118
*/
116-
template <class T, class DB, class Q, class Index>
119+
template <class T, class DB, class Q, class ID>
117120
auto qv_query_heap(
118121
T,
119-
DB& db,
120-
Q& q,
121-
const std::vector<Index>& ids,
122+
const DB& db,
123+
const Q& q,
124+
const ID& ids,
122125
int k_nn,
123126
unsigned nthreads);
124127

125128
template <class DB, class Q>
126-
auto qv_query_heap(DB& db, Q& q, int k_nn, unsigned nthreads) {
129+
auto qv_query_heap(const DB& db, const Q& q, int k_nn, unsigned nthreads) {
127130
return qv_query_heap(
128131
without_ids{}, db, q, std::vector<size_t>{}, k_nn, nthreads);
129132
}
130133

131-
template <class DB, class Q, class Index>
134+
template <class DB, class Q, class ID>
132135
auto qv_query_heap(
133-
DB& db, Q& q, const std::vector<Index>& ids, int k_nn, unsigned nthreads) {
136+
const DB& db, const Q& q, const ID& ids, int k_nn, unsigned nthreads) {
134137
return qv_query_heap(with_ids{}, db, q, ids, k_nn, nthreads);
135138
}
136139

137140
// @todo Add to out of core
138-
template <class T, class DB, class Q, class Index>
141+
template <class T, class DB, class Q, class ID>
139142
auto qv_query_heap(
140143
T,
141-
DB& db,
142-
Q& query,
143-
const std::vector<Index>& ids,
144+
const DB& db,
145+
const Q& query,
146+
const ID& ids,
144147
int k_nn,
145148
unsigned nthreads) {
146149
scoped_timer _{tdb_func__};
147150

148-
auto top_k = ColMajorMatrix<size_t>(k_nn, query.num_cols());
149-
auto top_k_scores = ColMajorMatrix<float>(k_nn, query.num_cols());
151+
// using feature_type = typename std::remove_reference_t<decltype(db)>::value_type;
152+
using id_type = typename std::remove_reference_t<decltype(ids)>::value_type;
153+
using score_type = float;
154+
155+
auto top_k = ColMajorMatrix<id_type>(k_nn, query.num_cols());
156+
auto top_k_scores = ColMajorMatrix<score_type>(k_nn, query.num_cols());
150157

151158
// Have to do explicit asynchronous threading here, as the current parallel
152159
// algorithms have iterator-based interaces, and the `Matrix` class does not
@@ -159,7 +166,7 @@ auto qv_query_heap(
159166
std::move(par),
160167
query,
161168
[&, size_db](auto&& q_vec, auto&& n = 0, auto&& j = 0) {
162-
fixed_min_pair_heap<float, size_t> min_scores(k_nn);
169+
fixed_min_pair_heap<score_type, id_type> min_scores(k_nn);
163170

164171
for (size_t i = 0; i < size_db; ++i) {
165172
auto score = L2(q_vec, db[i]);
@@ -190,35 +197,40 @@ auto qv_query_heap(
190197
* @return A matrix of size k x #queries containing the top k results for each
191198
* query.
192199
*/
193-
template <class T, class DB, class Q, class Index>
200+
template <class T, class DB, class Q, class ID>
194201
auto qv_query_heap_tiled(
195202
T,
196203
DB& db,
197-
Q& q,
198-
const std::vector<Index>& ids,
204+
const Q& q,
205+
const ID& ids,
199206
int k_nn,
200207
unsigned nthreads);
201208

202209
template <class DB, class Q>
203-
auto qv_query_heap_tiled(DB& db, Q& q, int k_nn, unsigned nthreads) {
210+
auto qv_query_heap_tiled(DB& db, const Q& q, int k_nn, unsigned nthreads) {
204211
return qv_query_heap_tiled(
205212
without_ids{}, db, q, std::vector<size_t>{}, k_nn, nthreads);
206213
}
207214

208-
template <class DB, class Q, class Index>
215+
template <class DB, class Q, class ID>
209216
auto qv_query_heap_tiled(
210-
DB& db, Q& q, const std::vector<Index>& ids, int k_nn, unsigned nthreads) {
217+
DB& db, Q& q, const ID& ids, int k_nn, unsigned nthreads) {
211218
return qv_query_heap_tiled(with_ids{}, db, q, ids, k_nn, nthreads);
212219
}
213220

214-
template <class T, class DB, class Q, class Index>
221+
template <class T, class DB, class Q, class ID>
215222
auto qv_query_heap_tiled(
216223
T,
217224
DB& db,
218-
Q& query,
219-
[[maybe_unused]] const std::vector<Index>& ids,
225+
const Q& query,
226+
[[maybe_unused]] const ID& ids,
220227
int k_nn,
221228
unsigned nthreads) {
229+
230+
// using feature_type = typename std::remove_reference_t<decltype(db)>::value_type;
231+
using id_type = typename std::remove_reference_t<decltype(ids)>::value_type;
232+
using score_type = float;
233+
222234
if constexpr (is_loadable_v<decltype(db)>) {
223235
db.load();
224236
}
@@ -236,8 +248,8 @@ auto qv_query_heap_tiled(
236248
std::vector<std::future<void>> futs;
237249
futs.reserve(nthreads);
238250

239-
auto min_scores = std::vector<fixed_min_pair_heap<float, size_t>>(
240-
size(query), fixed_min_pair_heap<float, size_t>(k_nn));
251+
auto min_scores = std::vector<fixed_min_pair_heap<score_type, id_type>>(
252+
size(query), fixed_min_pair_heap<score_type, id_type>(k_nn));
241253

242254
// @todo: Use range::for_each
243255
for (size_t n = 0; n < nthreads; ++n) {
@@ -251,8 +263,8 @@ auto qv_query_heap_tiled(
251263
auto len = 2 * ((stop - start) / 2);
252264
auto end = start + len;
253265

254-
// auto min_scores0 = fixed_min_pair_heap<float, size_t> (k);
255-
// auto min_scores1 = fixed_min_pair_heap<float, size_t> (k);
266+
// auto min_scores0 = fixed_min_pair_heap<score_type, id_type> (k);
267+
// auto min_scores1 = fixed_min_pair_heap<score_type, id_type> (k);
256268

257269
for (auto j = start; j != end; j += 2) {
258270
auto j0 = j + 0;
@@ -365,15 +377,17 @@ template <class DB, class Q>
365377
auto qv_partition(const DB& db, const Q& q, unsigned nthreads) {
366378
scoped_timer _{tdb_func__};
367379

380+
// Just need a single vector -- creating an index, not ids, so hardcoded size_t is okay to use here
381+
using id_type = size_t;
382+
using score_type = float;
368383
auto size_db = size(db);
369384

370-
// Just need a single vector
371-
std::vector<size_t> top_k(q.num_cols());
385+
std::vector<id_type> top_k(q.num_cols());
372386

373387
auto par = stdx::execution::indexed_parallel_policy{(size_t)nthreads};
374388
stdx::range_for_each(
375389
std::move(par), q, [&, size_db](auto&& qvec, auto&& n = 0, auto&& j = 0) {
376-
float min_score = std::numeric_limits<float>::max();
390+
score_type min_score = std::numeric_limits<score_type>::max();
377391
size_t idx = 0;
378392

379393
for (size_t i = 0; i < size_db; ++i) {

src/include/detail/flat/vq.h

Lines changed: 54 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -51,42 +51,47 @@ namespace detail::flat {
5151
*
5252
* @todo Unify out of core and not out of core versions.
5353
*/
54-
template <class T, class DB, class Q, class Index>
54+
template <class T, class DB, class Q, class ID>
5555
auto vq_query_heap(
5656
T,
5757
DB& db,
58-
Q& q,
59-
const std::vector<Index>& ids,
58+
const Q& q,
59+
const ID& ids,
6060
int k_nn,
6161
unsigned nthreads);
6262

6363
template <class DB, class Q>
64-
auto vq_query_heap(DB& db, Q& q, int k_nn, unsigned nthreads) {
64+
auto vq_query_heap(DB& db, const Q& q, int k_nn, unsigned nthreads) {
6565
return vq_query_heap(
6666
without_ids{}, db, q, std::vector<size_t>{}, k_nn, nthreads);
6767
}
6868

69-
template <class DB, class Q, class Index>
69+
template <class DB, class Q, class ID>
7070
auto vq_query_heap(
71-
DB& db, Q& q, const std::vector<Index>& ids, int k_nn, unsigned nthreads) {
71+
DB& db, const Q& q, const ID& ids, int k_nn, unsigned nthreads) {
7272
return vq_query_heap(with_ids{}, db, q, ids, k_nn, nthreads);
7373
}
7474

7575
// @todo Support out of core
76-
template <class T, class DB, class Q, class Index>
76+
template <class T, class DB, class Q, class ID>
7777
auto vq_query_heap(
7878
T,
7979
DB& db,
80-
Q& q,
81-
const std::vector<Index>& ids,
80+
const Q& q,
81+
const ID& ids,
8282
int k_nn,
8383
unsigned nthreads) {
8484
// @todo Need to get the total number of queries, not just the first block
8585
// @todo Use Matrix here rather than vector of vectors
86-
std::vector<std::vector<fixed_min_pair_heap<float, Index>>> scores(
86+
87+
// using feature_type = typename std::remove_reference_t<decltype(db)>::value_type;
88+
using id_type = typename std::remove_reference_t<decltype(ids)>::value_type;
89+
using score_type = float;
90+
91+
std::vector<std::vector<fixed_min_pair_heap<score_type, id_type>>> scores(
8792
nthreads,
88-
std::vector<fixed_min_pair_heap<float, Index>>(
89-
size(q), fixed_min_pair_heap<float, Index>(k_nn)));
93+
std::vector<fixed_min_pair_heap<score_type, id_type>>(
94+
size(q), fixed_min_pair_heap<score_type, id_type>(k_nn)));
9095

9196
unsigned size_q = size(q);
9297
auto par = stdx::execution::indexed_parallel_policy{nthreads};
@@ -138,7 +143,7 @@ auto vq_query_heap(
138143
}
139144

140145
consolidate_scores(scores);
141-
auto top_k = get_top_k_with_scores<fixed_min_pair_heap<float, Index>, Index>(scores, k_nn);
146+
auto top_k = get_top_k_with_scores(scores, k_nn);
142147

143148
return top_k;
144149
}
@@ -153,41 +158,46 @@ auto vq_query_heap(
153158
* @param nthreads
154159
* @return
155160
*/
156-
template <class T, class DB, class Q, class Index>
161+
template <class T, class DB, class Q, class ID>
157162
auto vq_query_heap_tiled(
158163
T,
159164
DB& db,
160-
Q& q,
161-
const std::vector<Index>& ids,
165+
const Q& q,
166+
const ID& ids,
162167
int k_nn,
163168
unsigned nthreads);
164169

165170
template <class DB, class Q>
166-
auto vq_query_heap_tiled(DB& db, Q& q, int k_nn, unsigned nthreads) {
171+
auto vq_query_heap_tiled(DB& db, const Q& q, int k_nn, unsigned nthreads) {
167172
return vq_query_heap_tiled(
168173
without_ids{}, db, q, std::vector<size_t>{}, k_nn, nthreads);
169174
}
170175

171-
template <class DB, class Q, class Index>
176+
template <class DB, class Q, class ID>
172177
auto vq_query_heap_tiled(
173-
DB& db, Q& q, const std::vector<Index>& ids, int k_nn, unsigned nthreads) {
178+
DB& db, const Q& q, const ID& ids, int k_nn, unsigned nthreads) {
174179
return vq_query_heap_tiled(with_ids{}, db, q, ids, k_nn, nthreads);
175180
}
176181

177-
template <class T, class DB, class Q, class Index>
182+
template <class T, class DB, class Q, class ID>
178183
auto vq_query_heap_tiled(
179184
T,
180185
DB& db,
181-
Q& q,
182-
const std::vector<Index>& ids,
186+
const Q& q,
187+
const ID& ids,
183188
int k_nn,
184189
unsigned nthreads) {
185190
// @todo Need to get the total number of queries, not just the first block
186191
// @todo Use Matrix here rather than vector of vectors
187-
std::vector<std::vector<fixed_min_pair_heap<float, Index>>> scores(
192+
193+
// using feature_type = typename std::remove_reference_t<decltype(db)>::value_type;
194+
using id_type = typename std::remove_reference_t<decltype(ids)>::value_type;
195+
using score_type = float;
196+
197+
std::vector<std::vector<fixed_min_pair_heap<score_type, id_type>>> scores(
188198
nthreads,
189-
std::vector<fixed_min_pair_heap<float, Index>>(
190-
size(q), fixed_min_pair_heap<float, Index>(k_nn)));
199+
std::vector<fixed_min_pair_heap<score_type, id_type>>(
200+
size(q), fixed_min_pair_heap<score_type, id_type>(k_nn)));
191201

192202
unsigned size_q = size(q);
193203
auto par = stdx::execution::indexed_parallel_policy{nthreads};
@@ -223,48 +233,53 @@ auto vq_query_heap_tiled(
223233
} while (load(db));
224234

225235
consolidate_scores(scores);
226-
auto top_k = get_top_k_with_scores<fixed_min_pair_heap<float, Index>, Index>(scores, k_nn);
236+
auto top_k = get_top_k_with_scores(scores, k_nn);
227237

228238
return top_k;
229239
}
230240

231241
// ====================================================================================================
232242

233-
template <class T, class DB, class Q, class Index>
243+
template <class T, class DB, class Q, class ID>
234244
auto vq_query_heap_2(
235245
T,
236246
DB& db,
237-
Q& q,
238-
const std::vector<Index>& ids,
247+
const Q& q,
248+
const ID& ids,
239249
int k_nn,
240250
unsigned nthreads);
241251

242252
template <class DB, class Q>
243-
auto vq_query_heap_2(DB& db, Q& q, int k_nn, unsigned nthreads) {
253+
auto vq_query_heap_2(DB& db, const Q& q, int k_nn, unsigned nthreads) {
244254
return vq_query_heap_2(
245255
without_ids{}, db, q, std::vector<size_t>{}, k_nn, nthreads);
246256
}
247257

248-
template <class DB, class Q, class Index>
258+
template <class DB, class Q, class ID>
249259
auto vq_query_heap_2(
250-
DB& db, Q& q, const std::vector<Index>& ids, int k_nn, unsigned nthreads) {
260+
DB& db, const Q& q, const ID& ids, int k_nn, unsigned nthreads) {
251261
return vq_query_heap_2(with_ids{}, db, q, ids, k_nn, nthreads);
252262
}
253263

254-
template <class T, class DB, class Q, class Index>
264+
template <class T, class DB, class Q, class ID>
255265
auto vq_query_heap_2(
256266
T,
257267
DB& db,
258-
Q& q,
259-
const std::vector<Index>& ids,
268+
const Q& q,
269+
const ID& ids,
260270
int k_nn,
261271
unsigned nthreads) {
262272
// @todo Need to get the total number of queries, not just the first block
263273
// @todo Use Matrix here rather than vector of vectors
264-
std::vector<std::vector<fixed_min_pair_heap<float, Index>>> scores(
274+
275+
// using feature_type = typename std::remove_reference_t<decltype(db)>::value_type;
276+
using id_type = typename std::remove_reference_t<decltype(ids)>::value_type;
277+
using score_type = float;
278+
279+
std::vector<std::vector<fixed_min_pair_heap<score_type, id_type>>> scores(
265280
nthreads,
266-
std::vector<fixed_min_pair_heap<float, Index>>(
267-
size(q), fixed_min_pair_heap<float, Index>(k_nn)));
281+
std::vector<fixed_min_pair_heap<score_type, id_type>>(
282+
size(q), fixed_min_pair_heap<score_type, id_type>(k_nn)));
268283

269284
unsigned size_q = size(q);
270285
auto par = stdx::execution::indexed_parallel_policy{nthreads};
@@ -300,7 +315,7 @@ auto vq_query_heap_2(
300315
} while (load(db));
301316

302317
consolidate_scores(scores);
303-
auto top_k = get_top_k_with_scores<fixed_min_pair_heap<float, Index>, Index>(scores, k_nn);
318+
auto top_k = get_top_k_with_scores(scores, k_nn);
304319

305320
return top_k;
306321
}

0 commit comments

Comments
 (0)