Skip to content

Commit 3204346

Browse files
committed
Fix: Don't flag consensus as stalled prematurely (#5658)
Fix stalled consensus detection to prevent false positives in situations where there are no disputed transactions. Stalled consensus detection was added to 2.5.0 in response to a network consensus halt that caused a round to run for over an hour. However, it has a flaw that makes it very easy to have false positives. Those false positives are usually mitigated by other checks that prevent them from having an effect, but there have been several instances of validators "running ahead" because there are circumstances where the other checks are "successful", allowing the stall state to be checked.
1 parent 1e01cd3 commit 3204346

File tree

5 files changed

+192
-54
lines changed

5 files changed

+192
-54
lines changed

src/test/consensus/Consensus_test.cpp

Lines changed: 143 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,10 @@ class Consensus_test : public beast::unit_test::suite
11361136
ConsensusParms p;
11371137
std::size_t peersUnchanged = 0;
11381138

1139+
auto logs = std::make_unique<Logs>(beast::severities::kError);
1140+
auto j = logs->journal("Test");
1141+
auto clog = std::make_unique<std::stringstream>();
1142+
11391143
// Three cases:
11401144
// 1 proposing, initial vote yes
11411145
// 2 proposing, initial vote no
@@ -1172,10 +1176,15 @@ class Consensus_test : public beast::unit_test::suite
11721176
BEAST_EXPECT(proposingFalse.getOurVote() == false);
11731177
BEAST_EXPECT(followingTrue.getOurVote() == true);
11741178
BEAST_EXPECT(followingFalse.getOurVote() == false);
1175-
BEAST_EXPECT(!proposingTrue.stalled(p, true, peersUnchanged));
1176-
BEAST_EXPECT(!proposingFalse.stalled(p, true, peersUnchanged));
1177-
BEAST_EXPECT(!followingTrue.stalled(p, false, peersUnchanged));
1178-
BEAST_EXPECT(!followingFalse.stalled(p, false, peersUnchanged));
1179+
BEAST_EXPECT(
1180+
!proposingTrue.stalled(p, true, peersUnchanged, j, clog));
1181+
BEAST_EXPECT(
1182+
!proposingFalse.stalled(p, true, peersUnchanged, j, clog));
1183+
BEAST_EXPECT(
1184+
!followingTrue.stalled(p, false, peersUnchanged, j, clog));
1185+
BEAST_EXPECT(
1186+
!followingFalse.stalled(p, false, peersUnchanged, j, clog));
1187+
BEAST_EXPECT(clog->str() == "");
11791188

11801189
// I'm in the majority, my vote should not change
11811190
BEAST_EXPECT(!proposingTrue.updateVote(5, true, p));
@@ -1189,10 +1198,15 @@ class Consensus_test : public beast::unit_test::suite
11891198
BEAST_EXPECT(!followingFalse.updateVote(10, false, p));
11901199

11911200
peersUnchanged = 2;
1192-
BEAST_EXPECT(!proposingTrue.stalled(p, true, peersUnchanged));
1193-
BEAST_EXPECT(!proposingFalse.stalled(p, true, peersUnchanged));
1194-
BEAST_EXPECT(!followingTrue.stalled(p, false, peersUnchanged));
1195-
BEAST_EXPECT(!followingFalse.stalled(p, false, peersUnchanged));
1201+
BEAST_EXPECT(
1202+
!proposingTrue.stalled(p, true, peersUnchanged, j, clog));
1203+
BEAST_EXPECT(
1204+
!proposingFalse.stalled(p, true, peersUnchanged, j, clog));
1205+
BEAST_EXPECT(
1206+
!followingTrue.stalled(p, false, peersUnchanged, j, clog));
1207+
BEAST_EXPECT(
1208+
!followingFalse.stalled(p, false, peersUnchanged, j, clog));
1209+
BEAST_EXPECT(clog->str() == "");
11961210

11971211
// Right now, the vote is 51%. The requirement is about to jump to
11981212
// 65%
@@ -1282,10 +1296,15 @@ class Consensus_test : public beast::unit_test::suite
12821296
BEAST_EXPECT(followingFalse.getOurVote() == false);
12831297

12841298
peersUnchanged = 3;
1285-
BEAST_EXPECT(!proposingTrue.stalled(p, true, peersUnchanged));
1286-
BEAST_EXPECT(!proposingFalse.stalled(p, true, peersUnchanged));
1287-
BEAST_EXPECT(!followingTrue.stalled(p, false, peersUnchanged));
1288-
BEAST_EXPECT(!followingFalse.stalled(p, false, peersUnchanged));
1299+
BEAST_EXPECT(
1300+
!proposingTrue.stalled(p, true, peersUnchanged, j, clog));
1301+
BEAST_EXPECT(
1302+
!proposingFalse.stalled(p, true, peersUnchanged, j, clog));
1303+
BEAST_EXPECT(
1304+
!followingTrue.stalled(p, false, peersUnchanged, j, clog));
1305+
BEAST_EXPECT(
1306+
!followingFalse.stalled(p, false, peersUnchanged, j, clog));
1307+
BEAST_EXPECT(clog->str() == "");
12891308

12901309
// Threshold jumps to 95%
12911310
BEAST_EXPECT(proposingTrue.updateVote(220, true, p));
@@ -1322,12 +1341,60 @@ class Consensus_test : public beast::unit_test::suite
13221341

13231342
for (peersUnchanged = 0; peersUnchanged < 6; ++peersUnchanged)
13241343
{
1325-
BEAST_EXPECT(!proposingTrue.stalled(p, true, peersUnchanged));
1326-
BEAST_EXPECT(!proposingFalse.stalled(p, true, peersUnchanged));
1327-
BEAST_EXPECT(!followingTrue.stalled(p, false, peersUnchanged));
1328-
BEAST_EXPECT(!followingFalse.stalled(p, false, peersUnchanged));
1344+
BEAST_EXPECT(
1345+
!proposingTrue.stalled(p, true, peersUnchanged, j, clog));
1346+
BEAST_EXPECT(
1347+
!proposingFalse.stalled(p, true, peersUnchanged, j, clog));
1348+
BEAST_EXPECT(
1349+
!followingTrue.stalled(p, false, peersUnchanged, j, clog));
1350+
BEAST_EXPECT(
1351+
!followingFalse.stalled(p, false, peersUnchanged, j, clog));
1352+
BEAST_EXPECT(clog->str() == "");
13291353
}
13301354

1355+
auto expectStalled = [this, &clog](
1356+
int txid,
1357+
bool ourVote,
1358+
int ourTime,
1359+
int peerTime,
1360+
int support,
1361+
std::uint32_t line) {
1362+
using namespace std::string_literals;
1363+
1364+
auto const s = clog->str();
1365+
expect(s.find("stalled"), s, __FILE__, line);
1366+
expect(
1367+
s.starts_with("Transaction "s + std::to_string(txid)),
1368+
s,
1369+
__FILE__,
1370+
line);
1371+
expect(
1372+
s.find("voting "s + (ourVote ? "YES" : "NO")) != s.npos,
1373+
s,
1374+
__FILE__,
1375+
line);
1376+
expect(
1377+
s.find("for "s + std::to_string(ourTime) + " rounds."s) !=
1378+
s.npos,
1379+
s,
1380+
__FILE__,
1381+
line);
1382+
expect(
1383+
s.find(
1384+
"votes in "s + std::to_string(peerTime) + " rounds.") !=
1385+
s.npos,
1386+
s,
1387+
__FILE__,
1388+
line);
1389+
expect(
1390+
s.ends_with(
1391+
"has "s + std::to_string(support) + "% support. "s),
1392+
s,
1393+
__FILE__,
1394+
line);
1395+
clog = std::make_unique<std::stringstream>();
1396+
};
1397+
13311398
for (int i = 0; i < 1; ++i)
13321399
{
13331400
BEAST_EXPECT(!proposingTrue.updateVote(250 + 10 * i, true, p));
@@ -1342,22 +1409,34 @@ class Consensus_test : public beast::unit_test::suite
13421409
BEAST_EXPECT(followingFalse.getOurVote() == false);
13431410

13441411
// true vote has changed recently, so not stalled
1345-
BEAST_EXPECT(!proposingTrue.stalled(p, true, 0));
1412+
BEAST_EXPECT(!proposingTrue.stalled(p, true, 0, j, clog));
1413+
BEAST_EXPECT(clog->str() == "");
13461414
// remaining votes have been unchanged in so long that we only
13471415
// need to hit the second round at 95% to be stalled, regardless
13481416
// of peers
1349-
BEAST_EXPECT(proposingFalse.stalled(p, true, 0));
1350-
BEAST_EXPECT(followingTrue.stalled(p, false, 0));
1351-
BEAST_EXPECT(followingFalse.stalled(p, false, 0));
1417+
BEAST_EXPECT(proposingFalse.stalled(p, true, 0, j, clog));
1418+
expectStalled(98, false, 11, 0, 2, __LINE__);
1419+
BEAST_EXPECT(followingTrue.stalled(p, false, 0, j, clog));
1420+
expectStalled(97, true, 11, 0, 97, __LINE__);
1421+
BEAST_EXPECT(followingFalse.stalled(p, false, 0, j, clog));
1422+
expectStalled(96, false, 11, 0, 3, __LINE__);
13521423

13531424
// true vote has changed recently, so not stalled
1354-
BEAST_EXPECT(!proposingTrue.stalled(p, true, peersUnchanged));
1425+
BEAST_EXPECT(
1426+
!proposingTrue.stalled(p, true, peersUnchanged, j, clog));
1427+
BEAST_EXPECTS(clog->str() == "", clog->str());
13551428
// remaining votes have been unchanged in so long that we only
13561429
// need to hit the second round at 95% to be stalled, regardless
13571430
// of peers
1358-
BEAST_EXPECT(proposingFalse.stalled(p, true, peersUnchanged));
1359-
BEAST_EXPECT(followingTrue.stalled(p, false, peersUnchanged));
1360-
BEAST_EXPECT(followingFalse.stalled(p, false, peersUnchanged));
1431+
BEAST_EXPECT(
1432+
proposingFalse.stalled(p, true, peersUnchanged, j, clog));
1433+
expectStalled(98, false, 11, 6, 2, __LINE__);
1434+
BEAST_EXPECT(
1435+
followingTrue.stalled(p, false, peersUnchanged, j, clog));
1436+
expectStalled(97, true, 11, 6, 97, __LINE__);
1437+
BEAST_EXPECT(
1438+
followingFalse.stalled(p, false, peersUnchanged, j, clog));
1439+
expectStalled(96, false, 11, 6, 3, __LINE__);
13611440
}
13621441
for (int i = 1; i < 3; ++i)
13631442
{
@@ -1374,19 +1453,31 @@ class Consensus_test : public beast::unit_test::suite
13741453

13751454
// true vote changed 2 rounds ago, and peers are changing, so
13761455
// not stalled
1377-
BEAST_EXPECT(!proposingTrue.stalled(p, true, 0));
1456+
BEAST_EXPECT(!proposingTrue.stalled(p, true, 0, j, clog));
1457+
BEAST_EXPECTS(clog->str() == "", clog->str());
13781458
// still stalled
1379-
BEAST_EXPECT(proposingFalse.stalled(p, true, 0));
1380-
BEAST_EXPECT(followingTrue.stalled(p, false, 0));
1381-
BEAST_EXPECT(followingFalse.stalled(p, false, 0));
1459+
BEAST_EXPECT(proposingFalse.stalled(p, true, 0, j, clog));
1460+
expectStalled(98, false, 11 + i, 0, 2, __LINE__);
1461+
BEAST_EXPECT(followingTrue.stalled(p, false, 0, j, clog));
1462+
expectStalled(97, true, 11 + i, 0, 97, __LINE__);
1463+
BEAST_EXPECT(followingFalse.stalled(p, false, 0, j, clog));
1464+
expectStalled(96, false, 11 + i, 0, 3, __LINE__);
13821465

13831466
// true vote changed 2 rounds ago, and peers are NOT changing,
13841467
// so stalled
1385-
BEAST_EXPECT(proposingTrue.stalled(p, true, peersUnchanged));
1468+
BEAST_EXPECT(
1469+
proposingTrue.stalled(p, true, peersUnchanged, j, clog));
1470+
expectStalled(99, true, 1 + i, 6, 97, __LINE__);
13861471
// still stalled
1387-
BEAST_EXPECT(proposingFalse.stalled(p, true, peersUnchanged));
1388-
BEAST_EXPECT(followingTrue.stalled(p, false, peersUnchanged));
1389-
BEAST_EXPECT(followingFalse.stalled(p, false, peersUnchanged));
1472+
BEAST_EXPECT(
1473+
proposingFalse.stalled(p, true, peersUnchanged, j, clog));
1474+
expectStalled(98, false, 11 + i, 6, 2, __LINE__);
1475+
BEAST_EXPECT(
1476+
followingTrue.stalled(p, false, peersUnchanged, j, clog));
1477+
expectStalled(97, true, 11 + i, 6, 97, __LINE__);
1478+
BEAST_EXPECT(
1479+
followingFalse.stalled(p, false, peersUnchanged, j, clog));
1480+
expectStalled(96, false, 11 + i, 6, 3, __LINE__);
13901481
}
13911482
for (int i = 3; i < 5; ++i)
13921483
{
@@ -1401,15 +1492,27 @@ class Consensus_test : public beast::unit_test::suite
14011492
BEAST_EXPECT(followingTrue.getOurVote() == true);
14021493
BEAST_EXPECT(followingFalse.getOurVote() == false);
14031494

1404-
BEAST_EXPECT(proposingTrue.stalled(p, true, 0));
1405-
BEAST_EXPECT(proposingFalse.stalled(p, true, 0));
1406-
BEAST_EXPECT(followingTrue.stalled(p, false, 0));
1407-
BEAST_EXPECT(followingFalse.stalled(p, false, 0));
1495+
BEAST_EXPECT(proposingTrue.stalled(p, true, 0, j, clog));
1496+
expectStalled(99, true, 1 + i, 0, 97, __LINE__);
1497+
BEAST_EXPECT(proposingFalse.stalled(p, true, 0, j, clog));
1498+
expectStalled(98, false, 11 + i, 0, 2, __LINE__);
1499+
BEAST_EXPECT(followingTrue.stalled(p, false, 0, j, clog));
1500+
expectStalled(97, true, 11 + i, 0, 97, __LINE__);
1501+
BEAST_EXPECT(followingFalse.stalled(p, false, 0, j, clog));
1502+
expectStalled(96, false, 11 + i, 0, 3, __LINE__);
14081503

1409-
BEAST_EXPECT(proposingTrue.stalled(p, true, peersUnchanged));
1410-
BEAST_EXPECT(proposingFalse.stalled(p, true, peersUnchanged));
1411-
BEAST_EXPECT(followingTrue.stalled(p, false, peersUnchanged));
1412-
BEAST_EXPECT(followingFalse.stalled(p, false, peersUnchanged));
1504+
BEAST_EXPECT(
1505+
proposingTrue.stalled(p, true, peersUnchanged, j, clog));
1506+
expectStalled(99, true, 1 + i, 6, 97, __LINE__);
1507+
BEAST_EXPECT(
1508+
proposingFalse.stalled(p, true, peersUnchanged, j, clog));
1509+
expectStalled(98, false, 11 + i, 6, 2, __LINE__);
1510+
BEAST_EXPECT(
1511+
followingTrue.stalled(p, false, peersUnchanged, j, clog));
1512+
expectStalled(97, true, 11 + i, 6, 97, __LINE__);
1513+
BEAST_EXPECT(
1514+
followingFalse.stalled(p, false, peersUnchanged, j, clog));
1515+
expectStalled(96, false, 11 + i, 6, 3, __LINE__);
14131516
}
14141517
}
14151518
}

src/xrpld/app/consensus/RCLValidations.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ RCLValidationsAdaptor::acquire(LedgerHash const& hash)
136136

137137
if (!ledger)
138138
{
139-
JLOG(j_.debug())
139+
JLOG(j_.warn())
140140
<< "Need validated ledger for preferred ledger analysis " << hash;
141141

142142
Application* pApp = &app_;

src/xrpld/consensus/Consensus.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ checkConsensusReached(
139139
return false;
140140
}
141141

142-
// We only get stalled when every disputed transaction unequivocally has 80%
143-
// (minConsensusPct) agreement, either for or against. That is: either under
144-
// 20% or over 80% consensus (repectively "nay" or "yay"). This prevents
145-
// manipulation by a minority of byzantine peers of which transactions make
146-
// the cut to get into the ledger.
142+
// We only get stalled when there are disputed transactions and all of them
143+
// unequivocally have 80% (minConsensusPct) agreement, either for or
144+
// against. That is: either under 20% or over 80% consensus (repectively
145+
// "nay" or "yay"). This prevents manipulation by a minority of byzantine
146+
// peers of which transactions make the cut to get into the ledger.
147147
if (stalled)
148148
{
149149
CLOG(clog) << "consensus stalled. ";

src/xrpld/consensus/Consensus.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ shouldCloseLedger(
8484
agree
8585
@param stalled the network appears to be stalled, where
8686
neither we nor our peers have changed their vote on any disputes in a
87-
while. This is undesirable, and will cause us to end consensus
88-
without 80% agreement.
87+
while. This is undesirable, and should be rare, and will cause us to
88+
end consensus without 80% agreement.
8989
@param parms Consensus constant parameters
9090
@param proposing whether we should count ourselves
9191
@param j journal for logging
@@ -1712,15 +1712,29 @@ Consensus<Adaptor>::haveConsensus(
17121712
<< ", disagree=" << disagree;
17131713

17141714
ConsensusParms const& parms = adaptor_.parms();
1715-
// Stalling is BAD
1715+
// Stalling is BAD. It means that we have a consensus on the close time, so
1716+
// peers are talking, but we have disputed transactions that peers are
1717+
// unable or unwilling to come to agreement on one way or the other.
17161718
bool const stalled = haveCloseTimeConsensus_ &&
1719+
!result_->disputes.empty() &&
17171720
std::ranges::all_of(result_->disputes,
1718-
[this, &parms](auto const& dispute) {
1721+
[this, &parms, &clog](auto const& dispute) {
17191722
return dispute.second.stalled(
17201723
parms,
17211724
mode_.get() == ConsensusMode::proposing,
1722-
peerUnchangedCounter_);
1725+
peerUnchangedCounter_,
1726+
j_,
1727+
clog);
17231728
});
1729+
if (stalled)
1730+
{
1731+
std::stringstream ss;
1732+
ss << "Consensus detects as stalled with " << (agree + disagree) << "/"
1733+
<< prevProposers_ << " proposers, and " << result_->disputes.size()
1734+
<< " stalled disputed transactions.";
1735+
JLOG(j_.error()) << ss.str();
1736+
CLOG(clog) << ss.str();
1737+
}
17241738

17251739
// Determine if we actually have consensus or not
17261740
result_->state = checkConsensus(

src/xrpld/consensus/DisputedTx.h

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,12 @@ class DisputedTx
8585
//! Are we and our peers "stalled" where we probably won't change
8686
//! our vote?
8787
bool
88-
stalled(ConsensusParms const& p, bool proposing, int peersUnchanged) const
88+
stalled(
89+
ConsensusParms const& p,
90+
bool proposing,
91+
int peersUnchanged,
92+
beast::Journal j,
93+
std::unique_ptr<std::stringstream> const& clog) const
8994
{
9095
// at() can throw, but the map is built by hand to ensure all valid
9196
// values are available.
@@ -123,8 +128,24 @@ class DisputedTx
123128
int const weight = support / total;
124129
// Returns true if the tx has more than minCONSENSUS_PCT (80) percent
125130
// agreement. Either voting for _or_ voting against the tx.
126-
return weight > p.minCONSENSUS_PCT ||
127-
weight < (100 - p.minCONSENSUS_PCT);
131+
bool const stalled =
132+
weight > p.minCONSENSUS_PCT || weight < (100 - p.minCONSENSUS_PCT);
133+
134+
if (stalled)
135+
{
136+
// stalling is an error condition for even a single
137+
// transaction.
138+
std::stringstream s;
139+
s << "Transaction " << ID() << " is stalled. We have been voting "
140+
<< (getOurVote() ? "YES" : "NO") << " for " << currentVoteCounter_
141+
<< " rounds. Peers have not changed their votes in "
142+
<< peersUnchanged << " rounds. The transaction has " << weight
143+
<< "% support. ";
144+
JLOG(j_.error()) << s.str();
145+
CLOG(clog) << s.str();
146+
}
147+
148+
return stalled;
128149
}
129150

130151
//! The disputed transaction.

0 commit comments

Comments
 (0)