@@ -618,19 +618,24 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
618618 K last_child_last_key;
619619 K last_child_neighbor_key;
620620 BtreeNodePtr cur_child = child_node;
621+ auto sibling_first_child = true_sibling_first_child (parent_node);
622+ LOGTRACEMOD (wbcache, " Sibling first child id is {}" , sibling_first_child);
621623
622624 // We find the last child node by starting from the leftmost child and traversing through the
623625 // next_bnode links until we reach the end or find a sibling first child.
624626 bool found_child = false ;
625- auto sibling_first_child = true_sibling_first_child (parent_node);
626- LOGTRACEMOD (wbcache, " Sibling first child id is {}" , sibling_first_child);
627627 while (cur_child != nullptr ) {
628628 LOGTRACEMOD (wbcache, " Processing child node [{}]" , cur_child->to_string ());
629629 if (!cur_child->is_node_deleted () && cur_child->total_entries () > 0 ) {
630630 last_child_last_key = cur_child->get_last_key < K >();
631631 found_child = true ;
632632 }
633633
634+ if (child_node->total_entries () == 0 && orig_child_infos.contains (child_node->node_id ())) {
635+ last_child_last_key = orig_child_infos[child_node->node_id ()];
636+ found_child = true ;
637+ }
638+
634639 next_cur_child = nullptr ;
635640 if (cur_child->next_bnode () == empty_bnodeid ||
636641 read_node_impl (cur_child->next_bnode (), next_cur_child) != btree_status_t ::success) {
@@ -685,7 +690,6 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
685690 LOGTRACEMOD (wbcache,
686691 " No undeleted child found for parent_node [{}], keep normal repair (regular recovery)" ,
687692 parent_node->to_string ());
688- next_cur_child = nullptr ;
689693 }
690694 }
691695 }
@@ -700,6 +704,7 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
700704 // Walk across all child nodes until it gets the last_parent_key and keep fixing them.
701705 auto cur_parent = parent_node;
702706 BtreeNodeList new_parent_nodes;
707+ BtreeNodeList child_nodes_to_free;
703708 do {
704709 if (child_node->has_valid_edge () || (child_node->is_leaf () && child_node->next_bnode () == empty_bnodeid)) {
705710 LOGTRACEMOD (wbcache, " Child node [{}] is an edge node or a leaf with no next" , child_node->to_string ());
@@ -760,61 +765,6 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
760765 LOGTRACEMOD (wbcache, " Repairing node={}, child_node=[{}] child_last_key={}" , cur_parent->node_id (),
761766 child_node->to_string (), child_last_key.to_string ());
762767
763- // Check if we are beyond the last child node.
764- //
765- // There can be cases where the child level merge is successfully persisted but the parent level is
766- // not. In this case, you may have your rightmost child node with last key greater than the
767- // last_parent_key. That's why here we have to check if the child node is one of the original child
768- // nodes first.
769- if (!is_parent_edge_node && !orig_child_infos.contains (child_node->node_id ())) {
770- LOGTRACEMOD (
771- wbcache,
772- " Child node [{}] is not one of the original child nodes, so we need to check if it is beyond the "
773- " last parent key {}" ,
774- child_node->to_string (), last_parent_key.to_string ());
775- if (child_last_key.compare (last_parent_key) > 0 ) {
776- // We have reached a child beyond this parent, we can stop now
777- // TODO this case if child last key is less than last parent key to update the parent node.
778- // this case can potentially break the btree for put and remove op.
779- break ;
780- }
781- if (child_node->total_entries () == 0 ) {
782- // this child has no entries, but maybe in the middle of the parent node, we need to update the key
783- // of parent as previous one and go on
784- LOGTRACEMOD (wbcache,
785- " Reach to an empty child node {}, and this child doesn't belong to this parent; Hence "
786- " loop ends" ,
787- child_node->to_string ());
788- // now update the next of parent node by skipping all deleted siblings of this parent node
789- auto valid_sibling = cur_parent->next_bnode ();
790- while (valid_sibling != empty_bnodeid) {
791- BtreeNodePtr sibling;
792- if (read_node_impl (valid_sibling, sibling) == btree_status_t ::success) {
793- if (sibling->is_node_deleted ()) {
794- valid_sibling = sibling->next_bnode ();
795- continue ;
796- }
797- // cur_parent->set_next_bnode(sibling->node_id());
798- break ;
799- }
800- LOGTRACEMOD (wbcache, " Failed to read child node {} for parent node [{}] reason {}" ,
801- valid_sibling, cur_parent->to_string (), ret);
802- }
803- if (valid_sibling != empty_bnodeid) {
804- cur_parent->set_next_bnode (valid_sibling);
805- LOGTRACEMOD (wbcache, " Repairing node=[{}], child_node=[{}] is an edge node, end loop" ,
806- cur_parent->to_string (), child_node->to_string ());
807-
808- } else {
809- cur_parent->set_next_bnode (empty_bnodeid);
810- LOGTRACEMOD (wbcache, " Repairing node=[{}], child_node=[{}] is an edge node, end loop" ,
811- cur_parent->to_string (), child_node->to_string ());
812- }
813-
814- break ;
815- }
816- }
817-
818768 if (!cur_parent->has_room_for_put (btree_put_type::INSERT, K::get_max_size (),
819769 BtreeLinkInfo::get_fixed_size ())) {
820770 // No room in the parent_node, let us split the parent_node and continue
@@ -836,22 +786,25 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
836786 cur_parent = std::move (new_parent);
837787 }
838788
789+ // If the child node is empty, we mark it as deleted and remove them after the repair loop
790+ if (child_node->total_entries () == 0 ) {
791+ LOGTRACEMOD (wbcache, " Found an empty child node {}, marking it deleted" ,
792+ child_node->to_string ());
793+ child_node->set_node_deleted ();
794+ child_nodes_to_free.push_back (child_node);
795+ // the links to the previous child node will be fixed in the next code block
796+ }
797+
798+
839799 // Insert the last key of the child node into parent node
840800 if (!child_node->is_node_deleted ()) {
841801 if (child_node->total_entries () == 0 ) {
842- if (orig_child_infos.contains (child_node->node_id ())) {
843- child_last_key = orig_child_infos[child_node->node_id ()];
844- LOGTRACEMOD (wbcache,
845- " Reach to an empty child node [{}], but not the end of the parent node, so we need "
846- " to update the key of parent node as original one {}" ,
847- child_node->to_string (), child_last_key.to_string ());
848- } else {
849- LOGTRACEMOD (wbcache,
850- " Reach to an empty child node [{}] but not belonging to this parent (probably next "
851- " parent sibling); Hence end loop" ,
852- child_node->to_string ());
853- break ;
854- }
802+ BT_LOG_ASSERT (false ,
803+ " Child node [{}] is empty but not deleted and not an edge, while it doesn't "
804+ " belong to this parent node {}" ,
805+ child_node->to_string (), parent_node->to_string ());
806+ ret = btree_status_t ::not_found;
807+ break ;
855808 }
856809 cur_parent->insert (cur_parent->total_entries (), child_last_key,
857810 BtreeLinkInfo{child_node->node_id (), child_node->link_version ()});
@@ -873,13 +826,6 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
873826 IndexBtreeNode* idx_node = static_cast < IndexBtreeNode* >(pre_child_node.get ());
874827 idx_node->m_idx_buf ->set_state (index_buf_state_t ::CLEAN);
875828 write_node_impl (pre_child_node, cp_ctx);
876- // update the key of last entry of the parent with the last key of deleted child
877- child_last_key = orig_child_infos[child_node->node_id ()];
878- LOGTRACEMOD (wbcache, " updating parent [{}] current last key with {}" , cur_parent->to_string (),
879- child_last_key.to_string ());
880- // update it here to go to the next child node and unlock this node
881- LOGTRACEMOD (wbcache, " update the child node next to the next of previous child node" );
882- child_node->set_next_bnode (child_node->next_bnode ());
883829 }
884830 }
885831
@@ -914,6 +860,7 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
914860 child_node = nullptr ;
915861 break ;
916862 }
863+
917864 ret = this ->read_and_lock_node (next_node_id, child_node, locktype_t ::READ, locktype_t ::READ, cp_ctx);
918865 if (ret != btree_status_t ::success) {
919866 BT_LOG_ASSERT (false , " Parent node={} repair is partial, because child_node get has failed with ret={}" ,
@@ -925,6 +872,10 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
925872 } while (true );
926873
927874 if (child_node) { this ->unlock_node (child_node, locktype_t ::READ); }
875+
876+ // free the deleted child nodes
877+ for (const auto & cnode : child_nodes_to_free) { free_node_impl (cnode, cp_ctx); }
878+
928879 // if last parent has the key less than the last child key, then we need to update the parent node with
929880 // the last child key if it doesn't have edge.
930881 auto last_parent = parent_node;
@@ -996,7 +947,7 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
996947 }
997948
998949 if (sibling_node->is_node_deleted ()) {
999- LOGTRACEMOD (wbcache, " Sibling node [{}] is not the sibling for parent_node {}" ,
950+ LOGTRACEMOD (wbcache, " Sibling node [{}] is not the true sibling for parent_node {}" ,
1000951 sibling_node->to_string (), node->to_string ());
1001952 return find_true_sibling (sibling_node);
1002953 } else {
0 commit comments