55#include " server/rdb_load.h"
66
77#include " absl/strings/escaping.h"
8+ #include " server/search/global_hnsw_index.h"
89#include " server/tiered_storage.h"
910
1011extern " C" {
@@ -2213,8 +2214,8 @@ error_code RdbLoader::Load(io::Source* src) {
22132214 }
22142215
22152216 if (type == RDB_OPCODE_VECTOR_INDEX) {
2216- // Stub: read and ignore HNSW vector index data
2217- // Binary format: [index_name , elements_number,
2217+ // Read HNSW vector index graph data and restore directly
2218+ // Binary format: [index_key , elements_number,
22182219 // then for each node (little-endian):
22192220 // internal_id (4 bytes), global_id (8 bytes), level (4 bytes),
22202221 // for each level (0 to level): links_num (4 bytes) + links (4 bytes each)]
@@ -2224,26 +2225,93 @@ error_code RdbLoader::Load(io::Source* src) {
22242225 uint64_t elements_number;
22252226 SET_OR_RETURN (LoadLen (nullptr ), elements_number);
22262227
2228+ // Only restore if shard count matches (GlobalDocId encodes shard_id)
2229+ bool should_restore =
2230+ shard_count_ > 0 && shard_set != nullptr && shard_count_ == shard_set->size ();
2231+
2232+ // Extract index_name and field_name from index_key
2233+ size_t colon_pos = index_key.find (' :' );
2234+ string index_name = (colon_pos != string::npos) ? index_key.substr (0 , colon_pos) : index_key;
2235+ string field_name = (colon_pos != string::npos) ? index_key.substr (colon_pos + 1 ) : " " ;
2236+
2237+ // Check if we can get the HNSW index (it should exist from FT.CREATE in aux)
2238+ auto hnsw_index = should_restore
2239+ ? GlobalHnswIndexRegistry::Instance ().Get (index_name, field_name)
2240+ : nullptr ;
2241+ if (should_restore && !hnsw_index) {
2242+ LOG (WARNING) << " HNSW index not found for restoration: " << index_key;
2243+ should_restore = false ;
2244+ }
2245+
2246+ std::vector<search::HnswNodeData> nodes;
2247+ if (should_restore) {
2248+ nodes.reserve (elements_number);
2249+ }
2250+
22272251 for (uint64_t elem = 0 ; elem < elements_number; ++elem) {
2228- [[maybe_unused]] uint32_t internal_id;
2252+ uint32_t internal_id;
22292253 SET_OR_RETURN (FetchInt<uint32_t >(), internal_id);
2230- [[maybe_unused]] uint64_t global_id;
2254+ uint64_t global_id;
22312255 SET_OR_RETURN (FetchInt<uint64_t >(), global_id);
22322256 uint32_t level;
22332257 SET_OR_RETURN (FetchInt<uint32_t >(), level);
22342258
2259+ search::HnswNodeData node;
2260+ if (should_restore) {
2261+ node.internal_id = internal_id;
2262+ node.global_id = global_id;
2263+ node.level = level;
2264+ node.levels_links .resize (level + 1 );
2265+ }
2266+
22352267 for (uint32_t lvl = 0 ; lvl <= level; ++lvl) {
22362268 uint32_t links_num;
22372269 SET_OR_RETURN (FetchInt<uint32_t >(), links_num);
2270+
2271+ if (should_restore) {
2272+ node.levels_links [lvl].reserve (links_num);
2273+ }
2274+
22382275 for (uint32_t i = 0 ; i < links_num; ++i) {
2239- [[maybe_unused]] uint32_t link;
2276+ uint32_t link;
22402277 SET_OR_RETURN (FetchInt<uint32_t >(), link);
2278+ if (should_restore) {
2279+ node.levels_links [lvl].push_back (link);
2280+ }
22412281 }
22422282 }
2283+
2284+ if (should_restore) {
2285+ nodes.push_back (std::move (node));
2286+ }
22432287 }
22442288
2245- VLOG (2 ) << " Ignoring HNSW vector index: " << index_key
2246- << " elements_number=" << elements_number;
2289+ if (should_restore && !nodes.empty ()) {
2290+ // Get metadata - it was set via SetMetadataForIndex after FT.CREATE
2291+ search::HnswIndexMetadata metadata = hnsw_index->GetMetadata ();
2292+
2293+ if (metadata.cur_element_count == 0 ) {
2294+ // Create default metadata from graph data
2295+ metadata.cur_element_count = nodes.size ();
2296+ metadata.maxlevel = -1 ;
2297+ metadata.enterpoint_node = 0 ;
2298+ for (const auto & node : nodes) {
2299+ if (node.level > metadata.maxlevel ) {
2300+ metadata.maxlevel = node.level ;
2301+ metadata.enterpoint_node = node.internal_id ;
2302+ }
2303+ }
2304+ }
2305+
2306+ // Restore the HNSW graph directly and mark as restored
2307+ hnsw_index->RestoreFromNodes (nodes, metadata);
2308+
2309+ LOG (INFO) << " Restored HNSW index " << index_key << " with " << nodes.size () << " nodes" ;
2310+ } else if (elements_number > 0 ) {
2311+ VLOG (2 ) << " Skipping HNSW vector index restore: " << index_key
2312+ << " elements_number=" << elements_number << " shard_count_=" << shard_count_
2313+ << " current_shards=" << (shard_set ? shard_set->size () : 0 );
2314+ }
22472315 continue ;
22482316 }
22492317
@@ -2975,7 +3043,6 @@ std::vector<std::string> RdbLoader::pending_synonym_cmds_;
29753043// Static synchronization for thread-safe search index creation
29763044base::SpinLock RdbLoader::search_index_mu_;
29773045absl::flat_hash_set<std::string> RdbLoader::created_search_indices_;
2978-
29793046std::vector<std::string> RdbLoader::TakePendingSynonymCommands () {
29803047 std::vector<std::string> result;
29813048 result.swap (pending_synonym_cmds_);
@@ -2985,6 +3052,8 @@ std::vector<std::string> RdbLoader::TakePendingSynonymCommands() {
29853052void RdbLoader::LoadSearchIndexDefFromAux (string&& def) {
29863053 string index_name;
29873054 string full_cmd;
3055+ string hnsw_field_name;
3056+ std::optional<search::HnswIndexMetadata> hnsw_meta;
29883057
29893058 // Check if this is new JSON format (starts with '{') or old format ("index_name cmd")
29903059 if (!def.empty () && def[0 ] == ' {' ) {
@@ -2998,9 +3067,28 @@ void RdbLoader::LoadSearchIndexDefFromAux(string&& def) {
29983067 const auto & json = *json_opt;
29993068 index_name = json[" name" ].as <string>();
30003069 string cmd = json[" cmd" ].as <string>();
3001-
3002- // TODO: restore HNSW metadata from json["hnsw_metadata"] if present
3003- // Currently we just restore the index definition, HNSW graph will be rebuilt
3070+ hnsw_field_name = json[" field" ].as <string>();
3071+
3072+ // Parse HNSW metadata if present
3073+ if (json.contains (" hnsw_metadata" )) {
3074+ const auto & meta = json[" hnsw_metadata" ];
3075+ search::HnswIndexMetadata m;
3076+ m.max_elements = meta[" max_elements" ].as <size_t >();
3077+ m.cur_element_count = meta[" cur_element_count" ].as <size_t >();
3078+ m.maxlevel = meta[" maxlevel" ].as <int >();
3079+ m.enterpoint_node = meta[" enterpoint_node" ].as <size_t >();
3080+ m.M = meta[" M" ].as <size_t >();
3081+ m.maxM = meta[" maxM" ].as <size_t >();
3082+ m.maxM0 = meta[" maxM0" ].as <size_t >();
3083+ m.ef_construction = meta[" ef_construction" ].as <size_t >();
3084+ m.mult = meta[" mult" ].as <double >();
3085+ hnsw_meta = m;
3086+
3087+ VLOG (1 ) << " Parsed HNSW metadata for index " << index_name << " field " << hnsw_field_name
3088+ << " : max_elements=" << m.max_elements
3089+ << " cur_element_count=" << m.cur_element_count << " maxlevel=" << m.maxlevel
3090+ << " M=" << m.M ;
3091+ }
30043092
30053093 full_cmd = absl::StrCat (index_name, " " , cmd);
30063094 } catch (const std::exception& e) {
@@ -3032,6 +3120,13 @@ void RdbLoader::LoadSearchIndexDefFromAux(string&& def) {
30323120 }
30333121
30343122 LoadSearchCommandFromAux (service_, std::move (full_cmd), " FT.CREATE" , " index definition" );
3123+
3124+ // Store metadata on HNSW index after index creation (for later graph restoration)
3125+ if (hnsw_meta && !hnsw_field_name.empty ()) {
3126+ if (auto index = GlobalHnswIndexRegistry::Instance ().Get (index_name, hnsw_field_name); index) {
3127+ index->SetMetadata (*hnsw_meta);
3128+ }
3129+ }
30353130}
30363131
30373132void RdbLoader::LoadSearchSynonymsFromAux (string&& def) {
@@ -3055,7 +3150,7 @@ void RdbLoader::PerformPostLoad(Service* service, bool is_error) {
30553150 if (is_error)
30563151 return ;
30573152
3058- // Rebuild all search indices as only their definitions are extracted from the snapshot
3153+ // Rebuild all search indices - for restored HNSW indices, this will populate vectors
30593154 shard_set->AwaitRunningOnShardQueue ([](EngineShard* es) {
30603155 OpArgs op_args{es, nullptr ,
30613156 DbContext{&namespaces->GetDefaultNamespace (), 0 , GetCurrentTimeMs ()}};
0 commit comments