@@ -618,12 +618,12 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
618618 K last_child_last_key;
619619 K last_child_neighbor_key;
620620 BtreeNodePtr cur_child = child_node;
621+ auto sibling_first_child = true_sibling_first_child (parent_node);
622+ LOGTRACEMOD (wbcache, " Sibling first child id is {}" , sibling_first_child);
621623
622624 // We find the last child node by starting from the leftmost child and traversing through the
623625 // next_bnode links until we reach the end or find a sibling first child.
624626 bool found_child = false ;
625- auto sibling_first_child = true_sibling_first_child (parent_node);
626- LOGTRACEMOD (wbcache, " Sibling first child id is {}" , sibling_first_child);
627627 while (cur_child != nullptr ) {
628628 LOGTRACEMOD (wbcache, " Processing child node [{}]" , cur_child->to_string ());
629629 if (!cur_child->is_node_deleted () && cur_child->total_entries () > 0 ) {
@@ -685,7 +685,6 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
685685 LOGTRACEMOD (wbcache,
686686 " No undeleted child found for parent_node [{}], keep normal repair (regular recovery)" ,
687687 parent_node->to_string ());
688- next_cur_child = nullptr ;
689688 }
690689 }
691690 }
@@ -700,6 +699,7 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
700699 // Walk across all child nodes until it gets the last_parent_key and keep fixing them.
701700 auto cur_parent = parent_node;
702701 BtreeNodeList new_parent_nodes;
702+ BtreeNodeList child_nodes_to_free;
703703 do {
704704 if (child_node->has_valid_edge () || (child_node->is_leaf () && child_node->next_bnode () == empty_bnodeid)) {
705705 LOGTRACEMOD (wbcache, " Child node [{}] is an edge node or a leaf with no next" , child_node->to_string ());
@@ -760,61 +760,6 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
760760 LOGTRACEMOD (wbcache, " Repairing node={}, child_node=[{}] child_last_key={}" , cur_parent->node_id (),
761761 child_node->to_string (), child_last_key.to_string ());
762762
763- // Check if we are beyond the last child node.
764- //
765- // There can be cases where the child level merge is successfully persisted but the parent level is
766- // not. In this case, you may have your rightmost child node with last key greater than the
767- // last_parent_key. That's why here we have to check if the child node is one of the original child
768- // nodes first.
769- if (!is_parent_edge_node && !orig_child_infos.contains (child_node->node_id ())) {
770- LOGTRACEMOD (
771- wbcache,
772- " Child node [{}] is not one of the original child nodes, so we need to check if it is beyond the "
773- " last parent key {}" ,
774- child_node->to_string (), last_parent_key.to_string ());
775- if (child_last_key.compare (last_parent_key) > 0 ) {
776- // We have reached a child beyond this parent, we can stop now
777- // TODO this case if child last key is less than last parent key to update the parent node.
778- // this case can potentially break the btree for put and remove op.
779- break ;
780- }
781- if (child_node->total_entries () == 0 ) {
782- // this child has no entries, but maybe in the middle of the parent node, we need to update the key
783- // of parent as previous one and go on
784- LOGTRACEMOD (wbcache,
785- " Reach to an empty child node {}, and this child doesn't belong to this parent; Hence "
786- " loop ends" ,
787- child_node->to_string ());
788- // now update the next of parent node by skipping all deleted siblings of this parent node
789- auto valid_sibling = cur_parent->next_bnode ();
790- while (valid_sibling != empty_bnodeid) {
791- BtreeNodePtr sibling;
792- if (read_node_impl (valid_sibling, sibling) == btree_status_t ::success) {
793- if (sibling->is_node_deleted ()) {
794- valid_sibling = sibling->next_bnode ();
795- continue ;
796- }
797- // cur_parent->set_next_bnode(sibling->node_id());
798- break ;
799- }
800- LOGTRACEMOD (wbcache, " Failed to read child node {} for parent node [{}] reason {}" ,
801- valid_sibling, cur_parent->to_string (), ret);
802- }
803- if (valid_sibling != empty_bnodeid) {
804- cur_parent->set_next_bnode (valid_sibling);
805- LOGTRACEMOD (wbcache, " Repairing node=[{}], child_node=[{}] is an edge node, end loop" ,
806- cur_parent->to_string (), child_node->to_string ());
807-
808- } else {
809- cur_parent->set_next_bnode (empty_bnodeid);
810- LOGTRACEMOD (wbcache, " Repairing node=[{}], child_node=[{}] is an edge node, end loop" ,
811- cur_parent->to_string (), child_node->to_string ());
812- }
813-
814- break ;
815- }
816- }
817-
818763 if (!cur_parent->has_room_for_put (btree_put_type::INSERT, K::get_max_size (),
819764 BtreeLinkInfo::get_fixed_size ())) {
820765 // No room in the parent_node, let us split the parent_node and continue
@@ -836,22 +781,25 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
836781 cur_parent = std::move (new_parent);
837782 }
838783
784+ // If the child node is empty, we mark it as deleted and remove them after the repair loop
785+ if (child_node->total_entries () == 0 ) {
786+ LOGTRACEMOD (wbcache, " Found an empty child node {}, marking it deleted" ,
787+ child_node->to_string ());
788+ child_node->set_node_deleted ();
789+ child_nodes_to_free.push_back (child_node);
790+ // the links to the previous child node will be fixed in the next coe block
791+ }
792+
793+
839794 // Insert the last key of the child node into parent node
840795 if (!child_node->is_node_deleted ()) {
841796 if (child_node->total_entries () == 0 ) {
842- if (orig_child_infos.contains (child_node->node_id ())) {
843- child_last_key = orig_child_infos[child_node->node_id ()];
844- LOGTRACEMOD (wbcache,
845- " Reach to an empty child node [{}], but not the end of the parent node, so we need "
846- " to update the key of parent node as original one {}" ,
847- child_node->to_string (), child_last_key.to_string ());
848- } else {
849- LOGTRACEMOD (wbcache,
850- " Reach to an empty child node [{}] but not belonging to this parent (probably next "
851- " parent sibling); Hence end loop" ,
852- child_node->to_string ());
853- break ;
854- }
797+ BT_LOG_ASSERT (child_node->total_entries () > 0 ,
798+ " Child node={} has 0 entries but is not marked deleted, parent_node={} repair is "
799+ " partial" ,
800+ child_node->node_id (), parent_node->node_id ());
801+ ret = btree_status_t ::not_found;
802+ break ;
855803 }
856804 cur_parent->insert (cur_parent->total_entries (), child_last_key,
857805 BtreeLinkInfo{child_node->node_id (), child_node->link_version ()});
@@ -873,13 +821,6 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
873821 IndexBtreeNode* idx_node = static_cast < IndexBtreeNode* >(pre_child_node.get ());
874822 idx_node->m_idx_buf ->set_state (index_buf_state_t ::CLEAN);
875823 write_node_impl (pre_child_node, cp_ctx);
876- // update the key of last entry of the parent with the last key of deleted child
877- child_last_key = orig_child_infos[child_node->node_id ()];
878- LOGTRACEMOD (wbcache, " updating parent [{}] current last key with {}" , cur_parent->to_string (),
879- child_last_key.to_string ());
880- // update it here to go to the next child node and unlock this node
881- LOGTRACEMOD (wbcache, " update the child node next to the next of previous child node" );
882- child_node->set_next_bnode (child_node->next_bnode ());
883824 }
884825 }
885826
@@ -914,6 +855,7 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
914855 child_node = nullptr ;
915856 break ;
916857 }
858+
917859 ret = this ->read_and_lock_node (next_node_id, child_node, locktype_t ::READ, locktype_t ::READ, cp_ctx);
918860 if (ret != btree_status_t ::success) {
919861 BT_LOG_ASSERT (false , " Parent node={} repair is partial, because child_node get has failed with ret={}" ,
@@ -925,6 +867,10 @@ class IndexTable : public IndexTableBase, public Btree< K, V > {
925867 } while (true );
926868
927869 if (child_node) { this ->unlock_node (child_node, locktype_t ::READ); }
870+
871+ // free the deleted child nodes
872+ for (const auto & cnode : child_nodes_to_free) { free_node_impl (cnode, cp_ctx); }
873+
928874 // if last parent has the key less than the last child key, then we need to update the parent node with
929875 // the last child key if it doesn't have edge.
930876 auto last_parent = parent_node;
0 commit comments