Skip to content

Commit b7e5f93

Browse files
authored
feat(hnswlib): Use __builtin_prefetch (#6500)
Use __builtin_prefetch instead of _mm_prefetch so we can benefit from prefetching memory on ARM also. Signed-off-by: mkaruza <mario@dragonflydb.io>
1 parent ca6d97e commit b7e5f93

File tree

1 file changed

+33
-39
lines changed

1 file changed

+33
-39
lines changed

src/core/search/hnsw_alg.h

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ template <typename dist_t> class HierarchicalNSW : public hnswlib::AlgorithmInte
216216
return (data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_);
217217
}
218218

219+
// Return pointer to data by internal id
219220
inline char* getDataByInternalId(tableint internal_id) const {
220221
if (copy_vector_) {
221222
return (data_vector_memory_ + internal_id * data_size_);
@@ -292,22 +293,20 @@ template <typename dist_t> class HierarchicalNSW : public hnswlib::AlgorithmInte
292293
}
293294
size_t size = getListCount((linklistsizeint*)data);
294295
tableint* datal = (tableint*)(data + 1);
295-
#ifdef USE_SSE
296-
_mm_prefetch((char*)(visited_array + *(data + 1)), _MM_HINT_T0);
297-
_mm_prefetch((char*)(visited_array + *(data + 1) + 64), _MM_HINT_T0);
298-
_mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0);
299-
_mm_prefetch(getDataByInternalId(*(datal + 1)), _MM_HINT_T0);
300-
#endif
296+
297+
__builtin_prefetch((char*)(visited_array + *(data + 1)), 0, 3);
298+
__builtin_prefetch((char*)(visited_array + *(data + 1) + 64), 0, 3);
299+
__builtin_prefetch(getDataByInternalId(*datal), 0, 3);
301300

302301
for (size_t j = 0; j < size; j++) {
303302
tableint candidate_id = *(datal + j);
304-
// if (candidate_id == 0) continue;
305-
#ifdef USE_SSE
303+
// if (candidate_id == 0) continue;
304+
305+
// Request prefetching next vector data memory
306306
if (j + 1 < size) {
307-
_mm_prefetch((char*)(visited_array + *(datal + j + 1)), _MM_HINT_T0);
308-
_mm_prefetch(getDataByInternalId(*(datal + j + 1)), _MM_HINT_T0);
307+
__builtin_prefetch(getDataByInternalId(*(datal + j + 1)), 0, 3);
309308
}
310-
#endif
309+
311310
if (visited_array[candidate_id] == visited_array_tag)
312311
continue;
313312
visited_array[candidate_id] = visited_array_tag;
@@ -316,9 +315,8 @@ template <typename dist_t> class HierarchicalNSW : public hnswlib::AlgorithmInte
316315
dist_t dist1 = fstdistfunc_(data_point, currObj1, dist_func_param_);
317316
if (top_candidates.size() < ef_construction_ || lowerBound > dist1) {
318317
candidateSet.emplace(-dist1, candidate_id);
319-
#ifdef USE_SSE
320-
_mm_prefetch(getDataByInternalId(candidateSet.top().second), _MM_HINT_T0);
321-
#endif
318+
319+
__builtin_prefetch(getDataByInternalId(candidateSet.top().second), 0, 3);
322320

323321
if (!isMarkedDeleted(candidate_id))
324322
top_candidates.emplace(dist1, candidate_id);
@@ -401,25 +399,20 @@ template <typename dist_t> class HierarchicalNSW : public hnswlib::AlgorithmInte
401399
metric_distance_computations += size;
402400
}
403401

404-
#ifdef USE_SSE
405-
_mm_prefetch((char*)(visited_array + *(data + 1)), _MM_HINT_T0);
406-
_mm_prefetch((char*)(visited_array + *(data + 1) + 64), _MM_HINT_T0);
407-
_mm_prefetch(data_level0_memory_ + (*(data + 1)) * size_data_per_element_ + offsetData_,
408-
_MM_HINT_T0);
409-
_mm_prefetch((char*)(data + 2), _MM_HINT_T0);
410-
#endif
402+
__builtin_prefetch((char*)(visited_array + *(data + 1)), 0, 3);
403+
__builtin_prefetch((char*)(visited_array + *(data + 1) + 64), 0, 3);
404+
__builtin_prefetch(getDataByInternalId(*(data + 1)), 0, 3);
405+
__builtin_prefetch((char*)(data + 2), 0, 3);
411406

412407
for (size_t j = 1; j <= size; j++) {
413408
int candidate_id = *(data + j);
414-
// if (candidate_id == 0) continue;
415-
#ifdef USE_SSE
409+
// if (candidate_id == 0) continue;
410+
411+
// Request prefetching next vector data memory
416412
if (j + 1 < size) {
417-
_mm_prefetch((char*)(visited_array + *(data + j + 1)), _MM_HINT_T0);
418-
_mm_prefetch(
419-
data_level0_memory_ + (*(data + j + 1)) * size_data_per_element_ + offsetData_,
420-
_MM_HINT_T0); ////////////
413+
__builtin_prefetch(getDataByInternalId(*(data + j + 1)), 0, 3);
421414
}
422-
#endif
415+
423416
if (!(visited_array[candidate_id] == visited_array_tag)) {
424417
visited_array[candidate_id] = visited_array_tag;
425418

@@ -435,11 +428,11 @@ template <typename dist_t> class HierarchicalNSW : public hnswlib::AlgorithmInte
435428

436429
if (flag_consider_candidate) {
437430
candidate_set.emplace(-dist, candidate_id);
438-
#ifdef USE_SSE
439-
_mm_prefetch(data_level0_memory_ + candidate_set.top().second * size_data_per_element_ +
440-
offsetLevel0_, ///////////
441-
_MM_HINT_T0); ////////////////////////
442-
#endif
431+
432+
__builtin_prefetch(data_level0_memory_ +
433+
candidate_set.top().second * size_data_per_element_ +
434+
offsetLevel0_, ///////////
435+
0, 3); ////////////////////////
443436

444437
if (bare_bone_search ||
445438
(!isMarkedDeleted(candidate_id) &&
@@ -1152,13 +1145,14 @@ template <typename dist_t> class HierarchicalNSW : public hnswlib::AlgorithmInte
11521145
data = get_linklist_at_level(currObj, level);
11531146
int size = getListCount(data);
11541147
tableint* datal = (tableint*)(data + 1);
1155-
#ifdef USE_SSE
1156-
_mm_prefetch(getDataByInternalId(*datal), _MM_HINT_T0);
1157-
#endif
1148+
1149+
__builtin_prefetch(getDataByInternalId(*datal), 0, 3);
1150+
11581151
for (int i = 0; i < size; i++) {
1159-
#ifdef USE_SSE
1160-
_mm_prefetch(getDataByInternalId(*(datal + i + 1)), _MM_HINT_T0);
1161-
#endif
1152+
if (i + 1 < size) {
1153+
__builtin_prefetch(getDataByInternalId(*(datal + i + 1)), 1, 3);
1154+
}
1155+
11621156
tableint cand = datal[i];
11631157
dist_t d = fstdistfunc_(dataPoint, getDataByInternalId(cand), dist_func_param_);
11641158
if (d < curdist) {

0 commit comments

Comments
 (0)