Skip to content

Commit 4caa406

Browse files
committed
Fix close time bug for slow nodes as genesis
1 parent db5c22d commit 4caa406

File tree

2 files changed

+82
-0
lines changed

2 files changed

+82
-0
lines changed

src/herder/test/HerderTests.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6975,3 +6975,84 @@ TEST_CASE("nomination timeouts with partial upgrade arming",
69756975
// Verify the upgrade did not go through
69766976
REQUIRE(nodes[0]->getLedgerManager().getLastTxFee() == currentFee);
69776977
}
6978+
6979+
TEST_CASE("late joining node reaches consensus", "[herder]")
6980+
{
6981+
auto mode = Simulation::OVER_LOOPBACK;
6982+
auto networkID = sha256(getTestConfig().NETWORK_PASSPHRASE);
6983+
6984+
auto simulation = std::make_shared<Simulation>(mode, networkID, [](int i) {
6985+
auto cfg = getTestConfig(i);
6986+
// Set a very short drift time so close times become old quickly
6987+
cfg.MAXIMUM_LEDGER_CLOSETIME_DRIFT = 1;
6988+
return cfg;
6989+
});
6990+
6991+
// Use specific keys designed to make A's value win in combineCandidates.
6992+
// The selection is based on hash comparison when txSets are equal.
6993+
auto validatorAKey = SecretKey::fromSeed(sha256("AAA-first-validator"));
6994+
auto validatorBKey = SecretKey::fromSeed(sha256("b-second-validator"));
6995+
auto validatorCKey = SecretKey::fromSeed(sha256("z-late-validator"));
6996+
6997+
// Threshold 3 means all 3 nodes are needed for consensus
6998+
SCPQuorumSet qset;
6999+
qset.threshold = 3;
7000+
qset.validators.push_back(validatorAKey.getPublicKey());
7001+
qset.validators.push_back(validatorBKey.getPublicKey());
7002+
qset.validators.push_back(validatorCKey.getPublicKey());
7003+
7004+
// Start only nodes A and B initially
7005+
auto A = simulation->addNode(validatorAKey, qset);
7006+
auto B = simulation->addNode(validatorBKey, qset);
7007+
7008+
simulation->addPendingConnection(validatorAKey.getPublicKey(),
7009+
validatorBKey.getPublicKey());
7010+
simulation->startAllNodes();
7011+
7012+
// A and B are at genesis ledger, unable to reach consensus (need 3 nodes)
7013+
REQUIRE(A->getLedgerManager().getLastClosedLedgerNum() ==
7014+
LedgerManager::GENESIS_LEDGER_SEQ);
7015+
REQUIRE(B->getLedgerManager().getLastClosedLedgerNum() ==
7016+
LedgerManager::GENESIS_LEDGER_SEQ);
7017+
7018+
// Submit a transaction to A so its txSet is non-empty.
7019+
// Wait longer than CONSENSUS_STUCK_TIMEOUT_SECONDS (35s) so that nodes
7020+
// A and B go out of sync (HERDER_SYNCING_STATE). At that point, the
7021+
// enforceRecent check becomes active and close times that are older than
7022+
// MAXIMUM_LEDGER_CLOSETIME_DRIFT will be rejected.
7023+
// With MAXIMUM_LEDGER_CLOSETIME_DRIFT = 1, close times become old very
7024+
// quickly
7025+
auto waitTime = std::chrono::seconds(
7026+
Herder::CONSENSUS_STUCK_TIMEOUT_SECONDS.count() + 5);
7027+
simulation->crankForAtLeast(waitTime, false);
7028+
7029+
// Nodes should still be at genesis (can't reach consensus with only 2 of 3)
7030+
REQUIRE(A->getLedgerManager().getLastClosedLedgerNum() ==
7031+
LedgerManager::GENESIS_LEDGER_SEQ);
7032+
7033+
// After CONSENSUS_STUCK_TIMEOUT_SECONDS, nodes should go out of sync
7034+
REQUIRE(A->getHerder().getState() == Herder::HERDER_SYNCING_STATE);
7035+
REQUIRE(B->getHerder().getState() == Herder::HERDER_SYNCING_STATE);
7036+
7037+
// Now add node C (the late joiner)
7038+
auto C = simulation->addNode(validatorCKey, qset);
7039+
C->start();
7040+
7041+
// Add connections to C (use addConnection since nodes are already started)
7042+
simulation->addConnection(validatorAKey.getPublicKey(),
7043+
validatorCKey.getPublicKey());
7044+
simulation->addConnection(validatorBKey.getPublicKey(),
7045+
validatorCKey.getPublicKey());
7046+
7047+
// Now all 3 nodes should be able to reach consensus
7048+
// Give it enough time to close a few ledgers (increase timeout for safety)
7049+
auto targetLedger = LedgerManager::GENESIS_LEDGER_SEQ + 3;
7050+
simulation->crankUntil(
7051+
[&]() { return simulation->haveAllExternalized(targetLedger, 3); },
7052+
10 * simulation->getExpectedLedgerCloseTime(), false);
7053+
7054+
// Verify all nodes reached consensus
7055+
REQUIRE(A->getLedgerManager().getLastClosedLedgerNum() >= targetLedger);
7056+
REQUIRE(B->getLedgerManager().getLastClosedLedgerNum() >= targetLedger);
7057+
REQUIRE(C->getLedgerManager().getLastClosedLedgerNum() >= targetLedger);
7058+
}

src/overlay/test/OverlayTests.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2401,6 +2401,7 @@ TEST_CASE("disconnected topology recovery", "[overlay][simulation]")
24012401
cfg.KNOWN_PEERS = peers;
24022402
}
24032403
cfg.RUN_STANDALONE = false;
2404+
cfg.MAX_SLOTS_TO_REMEMBER = 2;
24042405
return cfg;
24052406
});
24062407
auto nodeIDs = simulation->getNodeIDs();

0 commit comments

Comments
 (0)