Skip to content

Commit 3b59b5b

Browse files
authored
[mfp] limit accept votes by GC (#24183)
## Description Add a fix and a test for counting votes with GC. ## Test plan CI https://github.com/MystenLabs/sui/actions/runs/19240002505
1 parent 3674f77 commit 3b59b5b

File tree

7 files changed

+208
-34
lines changed

7 files changed

+208
-34
lines changed

consensus/core/src/commit_finalizer.rs

Lines changed: 183 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -345,23 +345,26 @@ impl CommitFinalizer {
345345
}
346346
}
347347

348-
/// To save bandwidth, blocks do not include explicit accept votes on transactions.
349-
/// Reject votes are included only the first time the block containing the voted-on
350-
/// transaction is linked in a block. Other first time linked transactions, when
351-
/// not rejected, are assumed to be accepted. This vote compression rule must also be
352-
/// applied during vote aggregation.
348+
/// Updates the set of origin descendants, by appending blocks from the last commit to
349+
/// origin descendants of previous linked blocks from the same origin.
353350
///
354-
/// Transactions in a block can only be voted on by its immediate descendants.
355-
/// A block is an **immediate descendant** if it can only link directly to the voted-on
356-
/// block, without any intermediate blocks from its own authority. Votes from
357-
/// non-immediate descendants are ignored.
351+
/// The purpose of maintaining the origin descendants per block is to save bandwidth by avoiding to explicitly
352+
/// list all accept votes on transactions in blocks.
353+
/// Instead when an ancestor block Ba is first included by a proposed block Bp, reject votes for transactions in Ba
354+
/// are explicitly listed (if they exist). The rest of non-rejected transactions in Ba are assumed to be accepted by Bp.
355+
/// This vote compression rule must be applied during vote aggregation as well.
358356
///
359-
/// This rule implies the following optimization is possible: after collecting votes from a block,
360-
/// we can skip collecting votes from its **origin descendants** (descendant blocks from the
361-
/// same authority), because their votes would be ignored anyway.
357+
/// The above rule is equivalent to saying that transactions in a block can only be voted on by its immediate descendants.
358+
/// A block Bp is an **immediate descendant** of Ba, if any directed path from Bp to Ba does not contain a block from Bp's own authority.
362359
///
363-
/// This function updates the set of origin descendants for all pending blocks using blocks
364-
/// from the last commit.
360+
/// This rule implies the following optimization is possible: after collecting votes for Ba from block Bp,
361+
/// we can skip collecting votes from Bp's **origin descendants** (descendant blocks from the
362+
/// same authority), because they cannot vote on Ba anyway.
363+
///
364+
/// This vote compression rule is easy to implement when proposing blocks. Reject votes can be gathered against
365+
/// all the newly included ancestors of the proposed block. But vote decompression is trickier to get right.
366+
/// One edge case is when a block may not be an immediate descendant, because of GC. In this case votes from the
367+
/// block should not be counted.
365368
fn append_origin_descendants_from_last_commit(&mut self) {
366369
let commit_state = self
367370
.pending_commits
@@ -468,7 +471,7 @@ impl CommitFinalizer {
468471
"CommitFinalizer::try_indirect_finalize_pending_transactions_in_first_commit",
469472
);
470473

471-
let pending_blocks: Vec<(BlockRef, BTreeSet<TransactionIndex>)> = self.pending_commits[0]
474+
let pending_blocks: Vec<_> = self.pending_commits[0]
472475
.pending_transactions
473476
.iter()
474477
.map(|(k, v)| (*k, v.clone()))
@@ -605,15 +608,41 @@ impl CommitFinalizer {
605608
continue;
606609
}
607610
let curr_block_state = blocks_map.get(&curr_block_ref).unwrap_or_else(|| panic!("Block {curr_block_ref} is either incorrectly gc'ed or failed to be recovered after crash.")).read();
608-
// Ignore info from the block if its direct ancestor has been processed.
611+
// The first ancestor of current block should have the same origin / author as the current block.
612+
// If it is not found in the blocks map but have round higher than the pending block, it might have
613+
// voted on the pending block but have been GC'ed.
614+
// Because the GC'ed block might have voted on the pending block and rejected some of the pending transactions,
615+
// we cannot assume current block is voting to accept transactions from the pending block.
616+
let curr_origin_ancestor_ref = curr_block_state.block.ancestors().first().unwrap();
617+
let skip_votes = curr_block_ref.author == curr_origin_ancestor_ref.author
618+
&& pending_block_ref.round < curr_origin_ancestor_ref.round
619+
&& !blocks_map.contains_key(curr_origin_ancestor_ref);
620+
// Skip counting votes from the block if it has been marked to be ignored.
609621
if ignored.insert(curr_block_ref) {
610622
// Skip collecting votes from origin descendants of current block.
611-
// Votes from origin descendants of current block do not count for this transactions.
623+
// Votes from origin descendants of current block do not count for these transactions.
612624
// Consider this case: block B is an origin descendant of block A (from the same authority),
613625
// and both blocks A and B link to another block C.
614626
// Only B's implicit and explicit transaction votes on C are considered.
615627
// None of A's implicit or explicit transaction votes on C should be considered.
628+
//
629+
// See append_origin_descendants_from_last_commit() for more details.
616630
ignored.extend(curr_block_state.origin_descendants.iter());
631+
// Skip counting votes from current block if the votes on pending block could have been
632+
// casted by an earlier block from the same origin.
633+
// Note: if the current block casts reject votes on transactions in the pending block,
634+
// it can be assumed that accept votes are also casted to other transactions in the pending block.
635+
// But we choose to skip counting the accept votes in this edge case for simplicity.
636+
if context.protocol_config.consensus_skip_gced_accept_votes() && skip_votes {
637+
let hostname = &context.committee.authority(curr_block_ref.author).hostname;
638+
context
639+
.metrics
640+
.node_metrics
641+
.finalizer_skipped_voting_blocks
642+
.with_label_values(&[hostname])
643+
.inc();
644+
continue;
645+
}
617646
// Get reject votes from current block to the pending block.
618647
let curr_block_reject_votes = curr_block_state
619648
.reject_votes
@@ -638,7 +667,7 @@ impl CommitFinalizer {
638667
for index in newly_finalized {
639668
accept_votes.remove(&index);
640669
}
641-
// End traversing if all blocks and requested transactions have reached quorum.
670+
// End traversal if all blocks and requested transactions have reached quorum.
642671
if accept_votes.is_empty() {
643672
break;
644673
}
@@ -769,6 +798,7 @@ struct BlockState {
769798
// Reject votes casted by this block, and by linked ancestors from the same authority.
770799
reject_votes: BTreeMap<BlockRef, BTreeSet<TransactionIndex>>,
771800
// Other committed blocks that are origin descendants of this block.
801+
// See the comment above append_origin_descendants_from_last_commit() for more details.
772802
origin_descendants: Vec<BlockRef>,
773803
}
774804

@@ -821,7 +851,13 @@ mod tests {
821851
}
822852

823853
fn create_commit_finalizer_fixture() -> Fixture {
824-
let (context, _keys) = Context::new_for_test(4);
854+
let (mut context, _keys) = Context::new_for_test(4);
855+
context
856+
.protocol_config
857+
.set_consensus_gc_depth_for_testing(5);
858+
context
859+
.protocol_config
860+
.set_consensus_skip_gced_accept_votes_for_testing(true);
825861
let context = Arc::new(context);
826862
let dag_state = Arc::new(RwLock::new(DagState::new(
827863
context.clone(),
@@ -1186,6 +1222,134 @@ mod tests {
11861222
assert!(fixture.commit_finalizer.is_empty());
11871223
}
11881224

1225+
// Test indirect finalization when transaction is rejected due to GC.
1226+
#[tokio::test]
1227+
async fn test_indirect_reject_with_gc() {
1228+
let mut fixture = create_commit_finalizer_fixture();
1229+
assert_eq!(fixture.context.protocol_config.consensus_gc_depth(), 5);
1230+
1231+
// Create round 1 blocks with 10 transactions each.
1232+
let mut dag_builder = DagBuilder::new(fixture.context.clone());
1233+
dag_builder
1234+
.layer(1)
1235+
.num_transactions(10)
1236+
.build()
1237+
.persist_layers(fixture.dag_state.clone());
1238+
let round_1_blocks = dag_builder.all_blocks();
1239+
fixture
1240+
.transaction_certifier
1241+
.add_voted_blocks(round_1_blocks.iter().map(|b| (b.clone(), vec![])).collect());
1242+
1243+
// Select B1(3) to have a rejected transaction.
1244+
let block_with_rejected_txn = round_1_blocks[3].clone();
1245+
// How transactions in this block will be voted:
1246+
// Txn 1 (GC reject): 1 reject vote at round 2. But the txn will get rejected because there are only
1247+
// 2 accept votes.
1248+
1249+
// Create round 2 blocks, with B2(1) rejecting transaction 1 from B1(3).
1250+
// Note that 3 blocks link to B1(3) without rejecting transaction 1.
1251+
let ancestors: Vec<BlockRef> = round_1_blocks.iter().map(|b| b.reference()).collect();
1252+
let round_2_blocks = vec![
1253+
create_block(2, 0, ancestors.clone(), 0, vec![]),
1254+
create_block(
1255+
2,
1256+
1,
1257+
ancestors.clone(),
1258+
0,
1259+
vec![BlockTransactionVotes {
1260+
block_ref: block_with_rejected_txn.reference(),
1261+
rejects: vec![1],
1262+
}],
1263+
),
1264+
create_block(2, 2, ancestors.clone(), 0, vec![]),
1265+
create_block(2, 3, ancestors.clone(), 0, vec![]),
1266+
];
1267+
fixture.add_blocks(round_2_blocks.clone());
1268+
1269+
// Create round 3-6 blocks without creating or linking to an authority 2 block.
1270+
// The goal is to GC B2(2).
1271+
let mut last_round_blocks: Vec<VerifiedBlock> = round_2_blocks
1272+
.iter()
1273+
.enumerate()
1274+
.filter_map(|(i, b)| if i != 2 { Some(b.clone()) } else { None })
1275+
.collect();
1276+
for r in 3..=6 {
1277+
let ancestors: Vec<BlockRef> =
1278+
last_round_blocks.iter().map(|b| b.reference()).collect();
1279+
last_round_blocks = [0, 1, 3]
1280+
.map(|i| create_block(r, i, ancestors.clone(), 0, vec![]))
1281+
.to_vec();
1282+
fixture.add_blocks(last_round_blocks.clone());
1283+
}
1284+
1285+
// Create round 7-10 blocks and add a leader from authority 0 of each round.
1286+
let mut leaders = vec![];
1287+
for r in 7..=10 {
1288+
let mut ancestors: Vec<BlockRef> =
1289+
last_round_blocks.iter().map(|b| b.reference()).collect();
1290+
last_round_blocks = (0..4)
1291+
.map(|i| {
1292+
if r == 7 && i == 2 {
1293+
// Link to the GC'ed block B2(2).
1294+
ancestors.push(round_2_blocks[2].reference());
1295+
}
1296+
create_block(r, i, ancestors.clone(), 0, vec![])
1297+
})
1298+
.collect();
1299+
leaders.push(last_round_blocks[0].clone());
1300+
fixture.add_blocks(last_round_blocks.clone());
1301+
}
1302+
1303+
// Create CommittedSubDag from leaders.
1304+
assert_eq!(leaders.len(), 4);
1305+
let committed_sub_dags = fixture.linearizer.handle_commit(leaders);
1306+
assert_eq!(committed_sub_dags.len(), 4);
1307+
1308+
// Ensure 1 reject vote is contained in B2(1) in commit 0.
1309+
assert!(committed_sub_dags[0].blocks.contains(&round_2_blocks[1]));
1310+
// Ensure B2(2) is GC'ed.
1311+
for commit in committed_sub_dags.iter() {
1312+
assert!(!commit.blocks.contains(&round_2_blocks[2]));
1313+
}
1314+
1315+
// Buffering the initial 3 commits should not finalize.
1316+
for commit in committed_sub_dags.iter().take(3) {
1317+
assert!(commit.decided_with_local_blocks);
1318+
let finalized_commits = fixture
1319+
.commit_finalizer
1320+
.process_commit(commit.clone())
1321+
.await;
1322+
assert_eq!(finalized_commits.len(), 0);
1323+
}
1324+
1325+
// Buffering the 4th commit should finalize all commits.
1326+
let finalized_commits = fixture
1327+
.commit_finalizer
1328+
.process_commit(committed_sub_dags[3].clone())
1329+
.await;
1330+
assert_eq!(finalized_commits.len(), 4);
1331+
1332+
// Check rejected transactions.
1333+
// B1(3) txn 1 gets rejected, even though there are has 3 blocks links to B1(3) without rejecting txn 1.
1334+
// This is because there are only 2 accept votes for this transaction, which is less than the quorum threshold.
1335+
let rejected_transactions = finalized_commits[0].rejected_transactions_by_block.clone();
1336+
assert_eq!(rejected_transactions.len(), 1);
1337+
assert_eq!(
1338+
rejected_transactions
1339+
.get(&block_with_rejected_txn.reference())
1340+
.unwrap(),
1341+
&vec![1]
1342+
);
1343+
1344+
// Other commits should have no rejected transactions.
1345+
for commit in finalized_commits.iter().skip(1) {
1346+
assert!(commit.rejected_transactions_by_block.is_empty());
1347+
}
1348+
1349+
// CommitFinalizer should be empty.
1350+
assert!(fixture.commit_finalizer.is_empty());
1351+
}
1352+
11891353
#[tokio::test]
11901354
async fn test_finalize_remote_commits_with_reject_votes() {
11911355
let mut fixture: Fixture = create_commit_finalizer_fixture();

consensus/core/src/metrics.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ pub(crate) struct NodeMetrics {
219219
pub(crate) finalizer_transaction_status: IntCounterVec,
220220
pub(crate) finalizer_reject_votes: IntCounterVec,
221221
pub(crate) finalizer_output_commits: IntCounterVec,
222+
pub(crate) finalizer_skipped_voting_blocks: IntCounterVec,
222223
pub(crate) uptime: Histogram,
223224
}
224225

@@ -910,6 +911,12 @@ impl NodeMetrics {
910911
&["type"],
911912
registry
912913
).unwrap(),
914+
finalizer_skipped_voting_blocks: register_int_counter_vec_with_registry!(
915+
"finalizer_skipped_voting_blocks",
916+
"Number of blocks skipped from voting due to potentially not being an immediate descendant.",
917+
&["authority"],
918+
registry
919+
).unwrap(),
913920
uptime: register_histogram_with_registry!(
914921
"uptime",
915922
"Total node uptime",

crates/sui-open-rpc/spec/openrpc.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,7 @@
13251325
"consensus_order_end_of_epoch_last": true,
13261326
"consensus_round_prober": false,
13271327
"consensus_round_prober_probe_accepted_rounds": false,
1328+
"consensus_skip_gced_accept_votes": false,
13281329
"consensus_smart_ancestor_selection": false,
13291330
"consensus_zstd_compression": false,
13301331
"convert_type_argument_error": false,

crates/sui-protocol-config/src/lib.rs

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,10 @@ struct FeatureFlags {
885885
// If true, deprecate global storage ops everywhere.
886886
#[serde(skip_serializing_if = "is_false")]
887887
deprecate_global_storage_ops: bool,
888+
889+
// If true, skip GC'ed accept votes in CommitFinalizer.
890+
#[serde(skip_serializing_if = "is_false")]
891+
consensus_skip_gced_accept_votes: bool,
888892
}
889893

890894
fn is_false(b: &bool) -> bool {
@@ -2157,9 +2161,6 @@ impl ProtocolConfig {
21572161
}
21582162

21592163
pub fn mysticeti_fastpath(&self) -> bool {
2160-
if let Some(enabled) = is_mysticeti_fpc_enabled_in_env() {
2161-
return enabled;
2162-
}
21632164
self.feature_flags.mysticeti_fastpath
21642165
}
21652166

@@ -2393,6 +2394,10 @@ impl ProtocolConfig {
23932394
pub fn deprecate_global_storage_ops(&self) -> bool {
23942395
self.feature_flags.deprecate_global_storage_ops
23952396
}
2397+
2398+
pub fn consensus_skip_gced_accept_votes(&self) -> bool {
2399+
self.feature_flags.consensus_skip_gced_accept_votes
2400+
}
23962401
}
23972402

23982403
#[cfg(not(msim))]
@@ -4265,6 +4270,8 @@ impl ProtocolConfig {
42654270
cfg.feature_flags.enable_ptb_execution_v2 = true;
42664271

42674272
cfg.poseidon_bn254_cost_base = Some(260);
4273+
4274+
cfg.feature_flags.consensus_skip_gced_accept_votes = true;
42684275
}
42694276
// Use this template when making changes:
42704277
//
@@ -4615,6 +4622,10 @@ impl ProtocolConfig {
46154622
pub fn allow_references_in_ptbs_for_testing(&mut self) {
46164623
self.feature_flags.allow_references_in_ptbs = true;
46174624
}
4625+
4626+
pub fn set_consensus_skip_gced_accept_votes_for_testing(&mut self, val: bool) {
4627+
self.feature_flags.consensus_skip_gced_accept_votes = val;
4628+
}
46184629
}
46194630

46204631
type OverrideFn = dyn Fn(ProtocolVersion, ProtocolConfig) -> ProtocolConfig + Send;
@@ -4704,18 +4715,6 @@ macro_rules! check_limit_by_meter {
47044715
result
47054716
}};
47064717
}
4707-
4708-
pub fn is_mysticeti_fpc_enabled_in_env() -> Option<bool> {
4709-
if let Ok(v) = std::env::var("CONSENSUS") {
4710-
if v == "mysticeti_fpc" {
4711-
return Some(true);
4712-
} else if v == "mysticeti" {
4713-
return Some(false);
4714-
}
4715-
}
4716-
None
4717-
}
4718-
47194718
#[cfg(all(test, not(msim)))]
47204719
mod test {
47214720
use insta::assert_yaml_snapshot;

crates/sui-protocol-config/src/snapshots/sui_protocol_config__test__Mainnet_version_104.snap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ feature_flags:
126126
generate_df_type_layouts: true
127127
private_generics_verifier_v2: true
128128
deprecate_global_storage_ops: true
129+
consensus_skip_gced_accept_votes: true
129130
max_tx_size_bytes: 131072
130131
max_input_objects: 2048
131132
max_size_written_objects: 5000000

crates/sui-protocol-config/src/snapshots/sui_protocol_config__test__Testnet_version_104.snap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ feature_flags:
127127
generate_df_type_layouts: true
128128
private_generics_verifier_v2: true
129129
deprecate_global_storage_ops: true
130+
consensus_skip_gced_accept_votes: true
130131
max_tx_size_bytes: 131072
131132
max_input_objects: 2048
132133
max_size_written_objects: 5000000

crates/sui-protocol-config/src/snapshots/sui_protocol_config__test__version_104.snap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ feature_flags:
130130
generate_df_type_layouts: true
131131
private_generics_verifier_v2: true
132132
deprecate_global_storage_ops: true
133+
consensus_skip_gced_accept_votes: true
133134
max_tx_size_bytes: 131072
134135
max_input_objects: 2048
135136
max_size_written_objects: 5000000

0 commit comments

Comments
 (0)