diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index f530c41b8730..9c0d0ce61194 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1508,6 +1508,16 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep /// It's a very rare case, and it's okay if some queries throw TIMEOUT_EXCEEDED when waiting for all replicas if (first_entry_to_mark_finished) { + /// Skip non-existing entries that were removed a long time ago (if the replica was offline for a long time) + Strings all_nodes = current_zookeeper->getChildren(fs::path(zookeeper_path) / "log"); + std::erase_if(all_nodes, [] (const String & s) { return !startsWith(s, "query-"); }); + auto oldest_node = std::min_element(all_nodes.begin(), all_nodes.end()); + if (oldest_node != all_nodes.end()) + { + UInt32 oldest_entry = DDLTaskBase::getLogEntryNumber(*oldest_node); + first_entry_to_mark_finished = std::max(oldest_entry, first_entry_to_mark_finished); + } + /// If the replica is new and some of the queries applied during recovery /// where issued after the replica was created, then other nodes might be /// waiting for this node to notify them that the query was applied.