@@ -18,7 +18,6 @@ namespace hnswlib {
1818 public:
1919 static const tableint max_update_element_locks = 65536 ;
2020 HierarchicalNSW (SpaceInterface<dist_t > *s) {
21-
2221 }
2322
2423 HierarchicalNSW (SpaceInterface<dist_t > *s, const std::string &location, bool nmslib = false , size_t max_elements=0 ) {
@@ -29,7 +28,7 @@ namespace hnswlib {
2928 link_list_locks_ (max_elements), link_list_update_locks_(max_update_element_locks), element_levels_(max_elements) {
3029 max_elements_ = max_elements;
3130
32- has_deletions_= false ;
31+ num_deleted_ = 0 ;
3332 data_size_ = s->get_data_size ();
3433 fstdistfunc_ = s->get_dist_func ();
3534 dist_func_param_ = s->get_dist_func_param ();
@@ -56,8 +55,6 @@ namespace hnswlib {
5655
5756 visited_list_pool_ = new VisitedListPool (1 , max_elements);
5857
59-
60-
6158 // initializations for special treatment of the first node
6259 enterpoint_node_ = -1 ;
6360 maxlevel_ = -1 ;
@@ -92,6 +89,7 @@ namespace hnswlib {
9289 size_t cur_element_count;
9390 size_t size_data_per_element_;
9491 size_t size_links_per_element_;
92+ size_t num_deleted_;
9593
9694 size_t M_;
9795 size_t maxM_;
@@ -112,20 +110,15 @@ namespace hnswlib {
112110 std::vector<std::mutex> link_list_update_locks_;
113111 tableint enterpoint_node_;
114112
115-
116113 size_t size_links_level0_;
117114 size_t offsetData_, offsetLevel0_;
118115
119-
120116 char *data_level0_memory_;
121117 char **linkLists_;
122118 std::vector<int > element_levels_;
123119
124120 size_t data_size_;
125121
126- bool has_deletions_;
127-
128-
129122 size_t label_offset_;
130123 DISTFUNC<dist_t > fstdistfunc_;
131124 void *dist_func_param_;
@@ -547,7 +540,7 @@ namespace hnswlib {
547540 }
548541 }
549542
550- if (has_deletions_ ) {
543+ if (num_deleted_ ) {
551544 std::priority_queue<std::pair<dist_t , tableint >> top_candidates1=searchBaseLayerST<true >(currObj, query_data,
552545 ef_);
553546 top_candidates.swap (top_candidates1);
@@ -623,8 +616,6 @@ namespace hnswlib {
623616 }
624617
625618 void loadIndex (const std::string &location, SpaceInterface<dist_t > *s, size_t max_elements_i=0 ) {
626-
627-
628619 std::ifstream input (location, std::ios::binary);
629620
630621 if (!input.is_open ())
@@ -639,7 +630,7 @@ namespace hnswlib {
639630 readBinaryPOD (input, max_elements_);
640631 readBinaryPOD (input, cur_element_count);
641632
642- size_t max_elements= max_elements_i;
633+ size_t max_elements = max_elements_i;
643634 if (max_elements < cur_element_count)
644635 max_elements = max_elements_;
645636 max_elements_ = max_elements;
@@ -688,26 +679,19 @@ namespace hnswlib {
688679
689680 input.seekg (pos,input.beg );
690681
691-
692682 data_level0_memory_ = (char *) malloc (max_elements * size_data_per_element_);
693683 if (data_level0_memory_ == nullptr )
694684 throw std::runtime_error (" Not enough memory: loadIndex failed to allocate level0" );
695685 input.read (data_level0_memory_, cur_element_count * size_data_per_element_);
696686
697-
698-
699-
700687 size_links_per_element_ = maxM_ * sizeof (tableint) + sizeof (linklistsizeint);
701688
702-
703689 size_links_level0_ = maxM0_ * sizeof (tableint) + sizeof (linklistsizeint);
704690 std::vector<std::mutex>(max_elements).swap (link_list_locks_);
705691 std::vector<std::mutex>(max_update_element_locks).swap (link_list_update_locks_);
706692
707-
708693 visited_list_pool_ = new VisitedListPool (1 , max_elements);
709694
710-
711695 linkLists_ = (char **) malloc (sizeof (void *) * max_elements);
712696 if (linkLists_ == nullptr )
713697 throw std::runtime_error (" Not enough memory: loadIndex failed to allocate linklists" );
@@ -731,11 +715,9 @@ namespace hnswlib {
731715 }
732716 }
733717
734- has_deletions_=false ;
735-
736718 for (size_t i = 0 ; i < cur_element_count; i++) {
737719 if (isMarkedDeleted (i))
738- has_deletions_= true ;
720+ num_deleted_ += 1 ;
739721 }
740722
741723 input.close ();
@@ -765,19 +747,19 @@ namespace hnswlib {
765747 }
766748
767749 static const unsigned char DELETE_MARK = 0x01 ;
768- // static const unsigned char REUSE_MARK = 0x10;
750+ // static const unsigned char REUSE_MARK = 0x10;
769751 /* *
770752 * Marks an element with the given label deleted, does NOT really change the current graph.
771753 * @param label
772754 */
773755 void markDelete (labeltype label)
774756 {
775- has_deletions_=true ;
776757 auto search = label_lookup_.find (label);
777758 if (search == label_lookup_.end ()) {
778759 throw std::runtime_error (" Label not found" );
779760 }
780- markDeletedInternal (search->second );
761+ tableint internalId = search->second ;
762+ markDeletedInternal (internalId);
781763 }
782764
783765 /* *
@@ -786,17 +768,49 @@ namespace hnswlib {
786768 * @param internalId
787769 */
788770 void markDeletedInternal (tableint internalId) {
789- unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
790- *ll_cur |= DELETE_MARK;
771+ assert (internalId < cur_element_count);
772+ if (!isMarkedDeleted (internalId))
773+ {
774+ unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
775+ *ll_cur |= DELETE_MARK;
776+ num_deleted_ += 1 ;
777+ }
778+ else
779+ {
780+ throw std::runtime_error (" The requested to delete element is already deleted" );
781+ }
782+ }
783+
784+ /* *
785+ * Remove the deleted mark of the node, does NOT really change the current graph.
786+ * @param label
787+ */
788+ void unmarkDelete (labeltype label)
789+ {
790+ auto search = label_lookup_.find (label);
791+ if (search == label_lookup_.end ()) {
792+ throw std::runtime_error (" Label not found" );
793+ }
794+ tableint internalId = search->second ;
795+ unmarkDeletedInternal (internalId);
791796 }
792797
793798 /* *
794799 * Remove the deleted mark of the node.
795800 * @param internalId
796801 */
797802 void unmarkDeletedInternal (tableint internalId) {
798- unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
799- *ll_cur &= ~DELETE_MARK;
803+ assert (internalId < cur_element_count);
804+ if (isMarkedDeleted (internalId))
805+ {
806+ unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
807+ *ll_cur &= ~DELETE_MARK;
808+ num_deleted_ -= 1 ;
809+ }
810+ else
811+ {
812+ throw std::runtime_error (" The requested to undelete element is not deleted" );
813+ }
800814 }
801815
802816 /* *
@@ -857,8 +871,8 @@ namespace hnswlib {
857871 }
858872
859873 for (auto && neigh : sNeigh ) {
860- // if (neigh == internalId)
861- // continue;
874+ // if (neigh == internalId)
875+ // continue;
862876
863877 std::priority_queue<std::pair<dist_t , tableint>, std::vector<std::pair<dist_t , tableint>>, CompareByFirst> candidates;
864878 size_t size = sCand .find (neigh) == sCand .end () ? sCand .size () : sCand .size () - 1 ; // sCand guaranteed to have size >= 1
@@ -1133,7 +1147,7 @@ namespace hnswlib {
11331147 }
11341148
11351149 std::priority_queue<std::pair<dist_t , tableint>, std::vector<std::pair<dist_t , tableint>>, CompareByFirst> top_candidates;
1136- if (has_deletions_ ) {
1150+ if (num_deleted_ ) {
11371151 top_candidates=searchBaseLayerST<true ,true >(
11381152 currObj, query_data, std::max (ef_, k));
11391153 }
0 commit comments