Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/tsan_build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
echo race:libomp.so > omp.supp
echo race:libomp.so.5 >> omp.supp
echo race:fmt::v10::detail::format_decimal >> omp.supp
echo "race:libmkl_intel_thread.so.2" >> omp.supp
export TSAN_OPTIONS=suppressions=omp.supp
chmod +x ./build/tests/functests
./build/tests/functests "[concurrent]~[daily]"
56 changes: 47 additions & 9 deletions include/vsag/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,23 @@ using OffsetType = uint64_t;
using SizeType = uint64_t;
using WriteFuncType = std::function<void(OffsetType, SizeType, const void*)>;

enum class AddMode {
/** try to reuse the memory of the deleted vector, no recovery check */
DEFAULT = 0,

/** always allocate new memory for the vector, but also check whether recovery from the same id */
KEEP_TOMBSTONE = 1,
};

enum class RemoveMode {
/** mark the vector as deleted, but not remove it from index, no shrink and repair,
* this mode is fast */
MARK_REMOVE = 0,

/** remove the vector from index and repair the index, but not shrink the index,
* this mode is heavy */
REMOVE_AND_REPAIR = 1,
};
class Index {
public:
// [basic methods]
Expand Down Expand Up @@ -127,19 +144,40 @@ class Index {
* @return IDs that failed to insert into the index
*/
virtual tl::expected<std::vector<int64_t>, Error>
Add(const DatasetPtr& base) {
Add(const DatasetPtr& base, AddMode mode = AddMode::DEFAULT) {
throw std::runtime_error("Index not support adding vectors");
}

/**
* @brief Remove the vector corresponding to the given ID from the index
*
* @param id of the vector that need to be removed from the index
* @return result indicates whether the remove operation is successful.
*/
virtual tl::expected<bool, Error>
Remove(int64_t id) {
throw std::runtime_error("Index not support delete vector");
* @brief Remove the vectors corresponding to the given IDs from the index
*
* @param ids of the vectors that need to be removed from the index
* @return number of vectors that successfully removed from the index
*/
virtual tl::expected<uint32_t, Error>
Remove(const std::vector<int64_t>& ids, RemoveMode mode = RemoveMode::MARK_REMOVE) {
throw std::runtime_error("Index not support Remove");
}

/**
* @brief Remove the vector corresponding to the given ID from the index
*
* @param id of the vector that need to be removed from the index
* @return number of vectors that successfully removed from the index
*/
virtual tl::expected<uint32_t, Error>
Remove(int64_t id, RemoveMode mode = RemoveMode::MARK_REMOVE) {
return this->Remove(std::vector<int64_t>({id}), mode);
}

/**
* @brief
* 1. Shrink the index to release memory occupied by soft deleted vectors.
* 2. Repair the index which is corrupted by soft delete.
*/
virtual void
ShrinkAndRepair() {
throw std::runtime_error("Index not support ShrinkAndRepair");
}

/**
Expand Down
2 changes: 1 addition & 1 deletion include/vsag/vsag_ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class IndexHandler {
tl::expected<std::vector<int64_t>, Error>
Add(DatasetHandler* base);

tl::expected<bool, Error>
tl::expected<uint32_t, Error>
Remove(int64_t id);

tl::expected<DatasetHandler*, Error>
Expand Down
33 changes: 18 additions & 15 deletions mockimpl/vsag/simpleflat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ SimpleFlat::Build(const DatasetPtr& base) {
}

tl::expected<std::vector<int64_t>, Error>
SimpleFlat::Add(const DatasetPtr& base) {
SimpleFlat::Add(const DatasetPtr& base, AddMode mode) {
std::vector<int64_t> failed_ids;
if (not this->data_.empty()) {
if (this->dim_ != base->GetDim()) {
Expand Down Expand Up @@ -429,22 +429,25 @@ SimpleFlat::GetStats() const {
return j.dump();
}

tl::expected<bool, Error>
SimpleFlat::Remove(int64_t id) {
auto iter = std::find(ids_.begin(), ids_.end(), id);
if (iter != ids_.end()) {
int index = iter - ids_.begin();
num_elements_--;
ids_[index] = ids_[num_elements_];
std::memcpy(
data_.data() + index * dim_, data_.data() + num_elements_ * dim_, dim_ * sizeof(float));
ids_.resize(num_elements_);
data_.resize(num_elements_ * dim_);
} else {
return false;
tl::expected<uint32_t, Error>
SimpleFlat::Remove(const std::vector<int64_t>& ids, RemoveMode mode) {
uint32_t removed = 0;
for (auto id : ids) {
auto iter = std::find(ids_.begin(), ids_.end(), id);
if (iter != ids_.end()) {
int index = iter - ids_.begin();
num_elements_--;
ids_[index] = ids_[num_elements_];
std::memcpy(data_.data() + index * dim_,
data_.data() + num_elements_ * dim_,
dim_ * sizeof(float));
ids_.resize(num_elements_);
data_.resize(num_elements_ * dim_);
removed++;
}
}

return true;
return removed;
}

} // namespace vsag
6 changes: 3 additions & 3 deletions mockimpl/vsag/simpleflat.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ class SimpleFlat : public Index {
Build(const DatasetPtr& base) override;

virtual tl::expected<std::vector<int64_t>, Error>
Add(const DatasetPtr& base) override;
Add(const DatasetPtr& base, AddMode mode) override;

tl::expected<bool, Error>
Remove(int64_t id) override;
tl::expected<uint32_t, Error>
Remove(const std::vector<int64_t>& ids, RemoveMode mode) override;

tl::expected<DatasetPtr, Error>
KnnSearch(const DatasetPtr& query,
Expand Down
62 changes: 40 additions & 22 deletions src/algorithm/brute_force.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ BruteForce::Train(const DatasetPtr& data) {
}

std::vector<int64_t>
BruteForce::Add(const DatasetPtr& data) {
BruteForce::Add(const DatasetPtr& data, AddMode mode) {
std::vector<int64_t> failed_ids;
auto base_dim = data->GetDim();
CHECK_ARGUMENT(base_dim == dim_,
Expand Down Expand Up @@ -146,34 +146,44 @@ BruteForce::Add(const DatasetPtr& data) {
return failed_ids;
}

bool
BruteForce::Remove(int64_t label) {
uint32_t
BruteForce::Remove(const std::vector<int64_t>& ids, RemoveMode mode) {
CHECK_ARGUMENT(not use_attribute_filter_,
"remove is not supported when use_attribute_filter is true");

uint32_t delete_count = 0;
if (mode == RemoveMode::MARK_REMOVE) {
std::scoped_lock label_lock(this->label_lookup_mutex_);
delete_count = this->label_table_->MarkRemove(ids);
delete_count_ += delete_count;
return delete_count;
}

std::scoped_lock lock(this->add_mutex_, this->label_lookup_mutex_);
const auto last_inner_id = static_cast<InnerIdType>(this->total_count_ - 1);
const auto inner_id = this->label_table_->GetIdByLabel(label);
for (auto label : ids) {
const auto last_inner_id = static_cast<InnerIdType>(this->total_count_ - 1);
const auto inner_id = this->label_table_->GetIdByLabel(label);

CHECK_ARGUMENT(inner_id <= last_inner_id, "the element to be remove is invalid");
CHECK_ARGUMENT(inner_id <= last_inner_id, "the element to be remove is invalid");

const auto last_label = this->label_table_->GetLabelById(last_inner_id);
this->label_table_->Remove(label);
--this->label_table_->total_count_;
const auto last_label = this->label_table_->GetLabelById(last_inner_id);
this->label_table_->MarkRemove(label);
--this->label_table_->total_count_;

if (inner_id < last_inner_id) {
Vector<float> data(dim_, allocator_);
GetVectorByInnerId(last_inner_id, data.data());
if (inner_id < last_inner_id) {
Vector<float> data(dim_, allocator_);
GetVectorByInnerId(last_inner_id, data.data());

this->label_table_->Remove(last_label);
--this->label_table_->total_count_;
this->label_table_->MarkRemove(last_label);
--this->label_table_->total_count_;

this->inner_codes_->InsertVector(data.data(), inner_id);
this->label_table_->Insert(inner_id, last_label);
}
this->inner_codes_->InsertVector(data.data(), inner_id);
this->label_table_->Insert(inner_id, last_label);
}

this->total_count_--;
return true;
this->total_count_--;
}
return 1;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove should return the number of successfully deleted

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only support mark remove, the repair remove will be supported soon

}

DatasetPtr
Expand All @@ -199,10 +209,18 @@ BruteForce::SearchWithRequest(const SearchRequest& request) const {
DistHeapPtr heap = nullptr;
ExecutorPtr executor = nullptr;
Filter* attr_filter = nullptr;
Filter* filter = nullptr;

auto combined_filter = std::make_shared<CombinedFilter>();
combined_filter->AppendFilter(this->label_table_->GetDeletedIdsFilter());
if (request.filter_ != nullptr) {
filter = request.filter_.get();
combined_filter->AppendFilter(
std::make_shared<InnerIdWrapperFilter>(request.filter_, *this->label_table_));
}
FilterPtr ft = nullptr;
if (not combined_filter->IsEmpty()) {
ft = combined_filter;
}

if (request.enable_attribute_filter_) {
auto& schema = this->attr_filter_index_->field_type_map_;
auto expr = AstParse(request.attribute_filter_str_, &schema);
Expand All @@ -228,7 +246,7 @@ BruteForce::SearchWithRequest(const SearchRequest& request) const {
if (attr_filter != nullptr and not attr_filter->CheckValid(i)) {
continue;
}
if (filter == nullptr or filter->CheckValid(this->label_table_->GetLabelById(i))) {
if (ft == nullptr or ft->CheckValid(i)) {
inner_codes_->Query(&dist, computer, &i, 1);
++dist_cmp_local;
cur_heap->Push(dist, i);
Expand Down
15 changes: 11 additions & 4 deletions src/algorithm/brute_force.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class BruteForce : public InnerIndexInterface {
~BruteForce() override = default;

std::vector<int64_t>
Add(const DatasetPtr& data) override;
Add(const DatasetPtr& data, AddMode mode = AddMode::DEFAULT) override;

std::vector<int64_t>
Build(const DatasetPtr& data) override;
Expand Down Expand Up @@ -80,7 +80,12 @@ class BruteForce : public InnerIndexInterface {

[[nodiscard]] int64_t
GetNumElements() const override {
return this->total_count_;
return this->total_count_ - this->delete_count_;
}

[[nodiscard]] int64_t
GetNumberRemoved() const override {
return this->delete_count_;
}

void
Expand All @@ -102,8 +107,8 @@ class BruteForce : public InnerIndexInterface {
const FilterPtr& filter,
int64_t limited_size = -1) const override;

bool
Remove(int64_t label) override;
uint32_t
Remove(const std::vector<int64_t>& ids, RemoveMode mode = RemoveMode::MARK_REMOVE) override;

[[nodiscard]] DatasetPtr
SearchWithRequest(const SearchRequest& request) const override;
Expand Down Expand Up @@ -140,6 +145,8 @@ class BruteForce : public InnerIndexInterface {

uint64_t total_count_{0};

uint64_t delete_count_{0};

uint64_t resize_increase_count_bit_{DEFAULT_RESIZE_BIT};

mutable std::shared_mutex global_mutex_;
Expand Down
Loading
Loading