Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/server/search/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@ if (NOT WITH_SEARCH)
return()
endif()

add_library(dfly_search_server aggregator.cc doc_accessors.cc doc_index.cc search_family.cc index_join.cc)
add_library(dfly_search_server aggregator.cc doc_accessors.cc doc_index.cc search_family.cc index_join.cc global_vector_index.cc global_vector_search.cc)
target_link_libraries(dfly_search_server dfly_transaction dragonfly_lib dfly_facade redis_lib jsonpath TRDP::jsoncons)


cxx_test(search_family_test dfly_test_lib LABELS DFLY)
cxx_test(aggregator_test dfly_test_lib LABELS DFLY)
cxx_test(index_join_test dfly_test_lib LABELS DFLY)

cxx_test(performance_test dfly_test_lib LABELS DFLY)

add_dependencies(check_dfly search_family_test aggregator_test index_join_test)
103 changes: 98 additions & 5 deletions src/server/search/doc_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "server/engine_shard_set.h"
#include "server/family_utils.h"
#include "server/search/doc_accessors.h"
#include "server/search/global_vector_index.h"
#include "server/server_state.h"

namespace dfly {
Expand Down Expand Up @@ -238,6 +239,11 @@ string_view ShardDocIndex::DocKeyIndex::Get(DocId id) const {
return keys_[id];
}

std::optional<ShardDocIndex::DocId> ShardDocIndex::DocKeyIndex::Find(string_view key) const {
auto it = ids_.find(key);
return it != ids_.end() ? std::make_optional(it->second) : std::nullopt;
}

size_t ShardDocIndex::DocKeyIndex::Size() const {
return ids_.size();
}
Expand Down Expand Up @@ -670,8 +676,11 @@ void ShardDocIndices::DropIndexCache(const dfly::ShardDocIndex& shard_doc_index)
}

void ShardDocIndices::RebuildAllIndices(const OpArgs& op_args) {
for (auto& [_, ptr] : indices_)
for (auto& [index_name, ptr] : indices_) {
ptr->Rebuild(op_args, &local_mr_);
// PoC: Also rebuild global vector indices
ptr->RebuildGlobalVectorIndices(index_name, op_args);
}
}

vector<string> ShardDocIndices::GetIndexNames() const {
Expand All @@ -684,17 +693,23 @@ vector<string> ShardDocIndices::GetIndexNames() const {

void ShardDocIndices::AddDoc(string_view key, const DbContext& db_cntx, const PrimeValue& pv) {
DCHECK(IsIndexedKeyType(pv));
for (auto& [_, index] : indices_) {
if (index->Matches(key, pv.ObjType()))
for (auto& [index_name, index] : indices_) {
if (index->Matches(key, pv.ObjType())) {
index->AddDoc(key, db_cntx, pv);
// PoC: Also add to global vector index if document has vector fields
index->AddDocToGlobalVectorIndex(index_name, key, db_cntx, pv);
}
}
}

void ShardDocIndices::RemoveDoc(string_view key, const DbContext& db_cntx, const PrimeValue& pv) {
DCHECK(IsIndexedKeyType(pv));
for (auto& [_, index] : indices_) {
if (index->Matches(key, pv.ObjType()))
for (auto& [index_name, index] : indices_) {
if (index->Matches(key, pv.ObjType())) {
// PoC: Remove from global vector index first (before local removal)
index->RemoveDocFromGlobalVectorIndex(index_name, key, db_cntx, pv);
index->RemoveDoc(key, db_cntx, pv);
}
}
}

Expand All @@ -710,4 +725,82 @@ SearchStats ShardDocIndices::GetStats() const {
return {GetUsedMemory(), indices_.size(), total_entries};
}

// PoC: Global vector index integration
void ShardDocIndex::AddDocToGlobalVectorIndex(std::string_view index_name, std::string_view key,
const DbContext& db_cntx, const PrimeValue& pv) {
if (!indices_)
return;

auto accessor = GetAccessor(db_cntx, pv);
auto local_id = key_index_.Find(key);
if (!local_id)
return;

GlobalDocId global_id{EngineShard::tlocal()->shard_id(), *local_id};

for (const auto& [field_ident, field_info] : base_->schema.fields) {
if (field_info.type == search::SchemaField::VECTOR &&
!(field_info.flags & search::SchemaField::NOINDEX)) {
if (auto vector_info = accessor->GetVector(field_ident); vector_info && vector_info->first) {
const auto& vparams =
std::get<search::SchemaField::VectorParams>(field_info.special_params);
auto global_index = GlobalVectorIndexRegistry::Instance().GetOrCreateVectorIndex(
index_name, field_info.short_name, vparams);
global_index->AddVector(global_id, key, vector_info->first.get());
}
}
}
}

void ShardDocIndex::RemoveDocFromGlobalVectorIndex(std::string_view index_name,
std::string_view key, const DbContext& db_cntx,
const PrimeValue& pv) {
if (!indices_)
return;

auto local_id = key_index_.Find(key);
if (!local_id)
return;

GlobalDocId global_id{EngineShard::tlocal()->shard_id(), *local_id};

for (const auto& [field_ident, field_info] : base_->schema.fields) {
if (field_info.type == search::SchemaField::VECTOR &&
!(field_info.flags & search::SchemaField::NOINDEX)) {
if (auto global_index = GlobalVectorIndexRegistry::Instance().GetVectorIndex(
index_name, field_info.short_name)) {
global_index->RemoveVector(global_id, key);
}
}
}
}

void ShardDocIndex::RebuildGlobalVectorIndices(std::string_view index_name, const OpArgs& op_args) {
if (!indices_)
return;

auto cb = [this, index_name](string_view key, const BaseAccessor& doc) {
auto local_id = key_index_.Find(key);
if (!local_id)
return;

GlobalDocId global_id{EngineShard::tlocal()->shard_id(), *local_id};

for (const auto& [field_ident, field_info] : base_->schema.fields) {
if (field_info.type == search::SchemaField::VECTOR &&
!(field_info.flags & search::SchemaField::NOINDEX)) {
if (auto vector_info = doc.GetVector(field_ident); vector_info && vector_info->first) {
const auto& vparams =
std::get<search::SchemaField::VectorParams>(field_info.special_params);
auto global_index = GlobalVectorIndexRegistry::Instance().GetOrCreateVectorIndex(
index_name, field_info.short_name, vparams);
global_index->AddVector(global_id, key, vector_info->first.get());
}
}
}
};

TraverseAllMatching(*base_, op_args, cb);
}

} // namespace dfly
16 changes: 13 additions & 3 deletions src/server/search/doc_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
namespace dfly {

struct BaseAccessor;
class GlobalVectorIndex; // PoC: Forward declaration for global vector index

using SearchDocData = absl::flat_hash_map<std::string /*field*/, search::SortableValue /*value*/>;
using Synonyms = search::Synonyms;
Expand Down Expand Up @@ -222,6 +223,7 @@ class ShardDocIndex {
std::optional<DocId> Remove(std::string_view key);

std::string_view Get(DocId id) const;
std::optional<DocId> Find(std::string_view key) const; // PoC: Find DocId by key
size_t Size() const;

private:
Expand Down Expand Up @@ -277,13 +279,21 @@ class ShardDocIndex {
void RebuildForGroup(const OpArgs& op_args, const std::string_view& group_id,
const std::vector<std::string_view>& terms);

private:
// Clears internal data. Traverses all matching documents and assigns ids.
void Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr);
// PoC: Global vector index support
void AddDocToGlobalVectorIndex(std::string_view index_name, std::string_view key,
const DbContext& db_cntx, const PrimeValue& pv);
void RemoveDocFromGlobalVectorIndex(std::string_view index_name, std::string_view key,
const DbContext& db_cntx, const PrimeValue& pv);
void RebuildGlobalVectorIndices(std::string_view index_name, const OpArgs& op_args);

// PoC: Public access to LoadEntry for global search coordinator
using LoadedEntry = std::pair<std::string_view, std::unique_ptr<BaseAccessor>>;
std::optional<LoadedEntry> LoadEntry(search::DocId id, const OpArgs& op_args) const;

private:
// Clears internal data. Traverses all matching documents and assigns ids.
void Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr);

// Behaviour identical to SortIndex::Sort for non-sortable fields that need to be fetched first
std::vector<search::SortableValue> KeepTopKSorted(std::vector<DocId>* ids, size_t limit,
const SearchParams::SortOption& sort,
Expand Down
Loading
Loading