Skip to content

Commit 1116b7c

Browse files
authored
Merge pull request #647 from unum-cloud/main-dev
Fix Isolating Removed Entries
2 parents 12304ae + f79e703 commit 1116b7c

File tree

4 files changed

+66
-19
lines changed

4 files changed

+66
-19
lines changed

.vscode/settings.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@
135135
"xstring": "cpp",
136136
"xtr1common": "cpp",
137137
"xtree": "cpp",
138-
"xutility": "cpp"
138+
"xutility": "cpp",
139+
"cfenv": "cpp"
139140
},
140141
"cSpell.words": [
141142
"allclose",

cpp/test.cpp

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -880,11 +880,11 @@ void test_exact_search(std::size_t dataset_count, std::size_t queries_count, std
880880
std::size_t dimensions = 32;
881881
metric_punned_t metric(dimensions, metric_kind_t::cos_k, scalar_kind<scalar_at>());
882882

883-
std::random_device rd;
884-
std::mt19937 gen(rd());
885-
std::uniform_real_distribution<> dis(0.0, 1.0);
883+
std::random_device seed_source;
884+
std::mt19937 generator(seed_source());
885+
std::uniform_real_distribution<> distribution(0.0, 1.0); // ! We can't pass `scalar_at` to the distribution
886886
std::vector<scalar_at> dataset(dataset_count * dimensions);
887-
std::generate(dataset.begin(), dataset.end(), [&] { return static_cast<scalar_at>(dis(gen)); });
887+
std::generate(dataset.begin(), dataset.end(), [&] { return static_cast<scalar_at>(distribution(generator)); });
888888

889889
exact_search_t search;
890890
auto results = search( //
@@ -1107,15 +1107,15 @@ void test_filtered_search() {
11071107
constexpr std::size_t dimensions = 32;
11081108
metric_punned_t metric(dimensions, metric_kind_t::cos_k);
11091109

1110-
std::random_device rd;
1111-
std::mt19937 gen(rd());
1112-
std::uniform_real_distribution<> dis(0.0, 1.0);
1110+
std::random_device seed_source;
1111+
std::mt19937 generator(seed_source());
1112+
std::uniform_real_distribution<float> distribution(0.0, 1.0);
11131113
using vector_of_vectors_t = std::vector<std::vector<float>>;
11141114

11151115
vector_of_vectors_t vector_of_vectors(dataset_count);
11161116
for (auto& vector : vector_of_vectors) {
11171117
vector.resize(dimensions);
1118-
std::generate(vector.begin(), vector.end(), [&] { return dis(gen); });
1118+
std::generate(vector.begin(), vector.end(), [&] { return distribution(generator); });
11191119
}
11201120

11211121
index_dense_t index = index_dense_t::make(metric);
@@ -1144,6 +1144,41 @@ void test_filtered_search() {
11441144
}
11451145
}
11461146

1147+
void test_isolate() {
1148+
constexpr std::size_t dataset_count = 16;
1149+
constexpr std::size_t dimensions = 32;
1150+
metric_punned_t metric(dimensions, metric_kind_t::cos_k);
1151+
1152+
std::random_device seed_source;
1153+
std::mt19937 generator(seed_source());
1154+
std::uniform_real_distribution<float> distribution(0.0, 1.0);
1155+
using vector_of_vectors_t = std::vector<std::vector<float>>;
1156+
1157+
vector_of_vectors_t vector_of_vectors(dataset_count);
1158+
for (auto& vector : vector_of_vectors) {
1159+
vector.resize(dimensions);
1160+
std::generate(vector.begin(), vector.end(), [&] { return distribution(generator); });
1161+
}
1162+
1163+
index_dense_t index = index_dense_t::make(metric);
1164+
index.reserve(dataset_count);
1165+
for (std::size_t idx = 0; idx < dataset_count; ++idx) {
1166+
index.add(idx, vector_of_vectors[idx].data());
1167+
}
1168+
expect_eq(index.size(), dataset_count);
1169+
1170+
for (std::size_t idx = 0; idx < dataset_count; ++idx) {
1171+
if (idx % 2 == 0)
1172+
index.remove(idx);
1173+
}
1174+
1175+
auto result = index.isolate();
1176+
for (std::size_t idx = 0; idx < dataset_count; ++idx) {
1177+
auto result = index.search(vector_of_vectors[idx].data(), 16);
1178+
expect_eq(result.size(), dataset_count / 2);
1179+
}
1180+
}
1181+
11471182
int main(int, char**) {
11481183
test_uint40();
11491184
test_cosine<float, std::int64_t, uint40_t>(10, 10);
@@ -1221,5 +1256,6 @@ int main(int, char**) {
12211256
test_strings<std::int64_t, slot32_t>();
12221257

12231258
test_filtered_search();
1259+
test_isolate();
12241260
return 0;
12251261
}

include/usearch/index.hpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2177,6 +2177,20 @@ class index_gt {
21772177
misaligned_store<compressed_slot_t>(tape_ + shift(n), slot);
21782178
misaligned_store<neighbors_count_t>(tape_, n + 1);
21792179
}
2180+
template <typename allow_slot_at> std::size_t erase_if(allow_slot_at&& allow_slot) noexcept {
2181+
std::size_t old_count = misaligned_load<neighbors_count_t>(tape_);
2182+
std::size_t removed_count = 0;
2183+
for (std::size_t i = 0; i < old_count; ++i) {
2184+
compressed_slot_t slot = misaligned_load<compressed_slot_t>(tape_ + shift(i));
2185+
if (allow_slot(slot)) {
2186+
removed_count++;
2187+
} else {
2188+
misaligned_store<compressed_slot_t>(tape_ + shift(i - removed_count), slot);
2189+
}
2190+
}
2191+
misaligned_store<neighbors_count_t>(tape_, old_count - removed_count);
2192+
return removed_count;
2193+
}
21802194
};
21812195

21822196
/**
@@ -3654,14 +3668,10 @@ class index_gt {
36543668
node_t node = node_at_(node_idx);
36553669
for (level_t level = 0; level <= node.level(); ++level) {
36563670
neighbors_ref_t neighbors = neighbors_(node, level);
3657-
std::size_t old_size = neighbors.size();
3658-
neighbors.clear();
3659-
for (std::size_t i = 0; i != old_size; ++i) {
3660-
compressed_slot_t neighbor_slot = neighbors[i];
3671+
neighbors.erase_if([&](compressed_slot_t neighbor_slot) {
36613672
node_t neighbor = node_at_(neighbor_slot);
3662-
if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}))
3663-
neighbors.push_back(neighbor_slot);
3664-
}
3673+
return !allow_member(member_cref_t{neighbor.ckey(), neighbor_slot});
3674+
});
36653675
}
36663676
++processed;
36673677
if (thread_idx == 0)

include/usearch/index_dense.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,12 +1709,12 @@ class index_dense_gt {
17091709
compaction_result_t isolate(executor_at&& executor = executor_at{}, progress_at&& progress = progress_at{}) {
17101710
compaction_result_t result;
17111711
std::atomic<std::size_t> pruned_edges;
1712-
auto disallow = [&](member_cref_t const& member) noexcept {
1712+
auto allow = [&](member_cref_t const& member) noexcept {
17131713
bool freed = member.key == free_key_;
17141714
pruned_edges += freed;
1715-
return freed;
1715+
return !freed;
17161716
};
1717-
typed_->isolate(disallow, std::forward<executor_at>(executor), std::forward<progress_at>(progress));
1717+
typed_->isolate(allow, std::forward<executor_at>(executor), std::forward<progress_at>(progress));
17181718
result.pruned_edges = pruned_edges;
17191719
return result;
17201720
}

0 commit comments

Comments
 (0)