Skip to content

Commit 777d786

Browse files
Overload Raptor Chunk Header Epoch
A full node may join new RaptorCast groups from the same validator while it's syncing or stuck. In such cases, it may rebroadcast to an expired group because its local round hasn't advanced, and expired groups are not garbage-collected. As a result, many full nodes end up receiving unsolicited Raptor chunks from groups they no longer belong to. The root cause is that the node selects the rebroadcast group based on its local round. This PR resolves the issue by overloading the epoch field in the Raptor chunk header setting it to the round number for secondary raptorcast. This allows syncing or stuck full nodes to use the round number to identify and service the correct group
1 parent 33d321a commit 777d786

File tree

17 files changed

+237
-231
lines changed

17 files changed

+237
-231
lines changed

monad-consensus-state/src/command.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ where
6161
},
6262
PublishToFullNodes {
6363
epoch: Epoch,
64+
round: Round,
6465
message: Verified<ST, Validated<ConsensusMessage<ST, SCT, EPT>>>,
6566
},
6667
/// Schedule a timeout event for `round` to be emitted in `duration`
@@ -137,6 +138,7 @@ where
137138
cmds.push(ConsensusCommand::EnterRound(epoch, round));
138139
cmds.push(ConsensusCommand::PublishToFullNodes {
139140
epoch,
141+
round: high_certificate.round(),
140142
message: ConsensusMessage {
141143
version,
142144
message: ProtocolMessage::AdvanceRound(AdvanceRoundMessage {

monad-executor-glue/src/lib.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,12 @@ pub enum RouterCommand<ST: CertificateSignatureRecoverable, OM> {
6666
message: OM,
6767
priority: UdpPriority,
6868
},
69+
// Primary publishing embeds epoch as gropu_id in chunk header. Secondary
70+
// publishing embeds round as group_id in chunk header, as rebroadcasting
71+
// periods are defined in rounds
6972
PublishToFullNodes {
70-
epoch: Epoch, // Epoch gets embedded into the raptorcast message
73+
epoch: Epoch,
74+
round: Round,
7175
message: OM,
7276
},
7377
AddEpochValidatorSet {
@@ -103,9 +107,14 @@ impl<ST: CertificateSignatureRecoverable, OM> Debug for RouterCommand<ST, OM> {
103107
.field("target", target)
104108
.field("priority", priority)
105109
.finish(),
106-
Self::PublishToFullNodes { epoch, message: _ } => f
110+
Self::PublishToFullNodes {
111+
epoch,
112+
round,
113+
message: _,
114+
} => f
107115
.debug_struct("PublishToFullNodes")
108116
.field("epoch", epoch)
117+
.field("round", round)
109118
.finish(),
110119
Self::AddEpochValidatorSet {
111120
epoch,

monad-raptorcast/benches/encode_bench.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@ use monad_crypto::certificate_signature::{CertificateSignature, CertificateSigna
2222
use monad_dataplane::udp::DEFAULT_SEGMENT_SIZE;
2323
use monad_raptorcast::{
2424
packet,
25+
udp::GroupId,
2526
util::{BuildTarget, EpochValidators, Redundancy},
2627
};
2728
use monad_secp::SecpSignature;
2829
use monad_testutil::signing::get_key;
29-
use monad_types::{NodeId, Stake};
30+
use monad_types::{Epoch, NodeId, Stake};
3031

3132
const NUM_NODES: usize = 100;
3233

@@ -63,8 +64,8 @@ pub fn bench_build_messages(c: &mut Criterion, name: &str, message_size: usize,
6364
DEFAULT_SEGMENT_SIZE, // segment_size
6465
message.clone(),
6566
Redundancy::from_u8(2),
66-
0, // epoch_no
67-
0, // unix_ts_ms
67+
GroupId::Primary(Epoch(0)), // epoch_no
68+
0, // unix_ts_ms
6869
build_target.clone(),
6970
&known_addrs,
7071
);

monad-raptorcast/benches/raptor_bench.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ use monad_dataplane::udp::DEFAULT_SEGMENT_SIZE;
2424
use monad_raptor::ManagedDecoder;
2525
use monad_raptorcast::{
2626
packet::build_messages,
27-
udp::{parse_message, MAX_REDUNDANCY, SIGNATURE_CACHE_SIZE},
27+
udp::{parse_message, GroupId, MAX_REDUNDANCY, SIGNATURE_CACHE_SIZE},
2828
util::{BuildTarget, EpochValidators, Redundancy},
2929
};
3030
use monad_secp::{KeyPair, SecpSignature};
31-
use monad_types::{NodeId, Stake};
31+
use monad_types::{Epoch, NodeId, Stake};
3232

3333
#[allow(clippy::useless_vec)]
3434
pub fn criterion_benchmark(c: &mut Criterion) {
@@ -71,8 +71,8 @@ pub fn criterion_benchmark(c: &mut Criterion) {
7171
DEFAULT_SEGMENT_SIZE, // segment_size
7272
message.clone(),
7373
Redundancy::from_u8(2),
74-
0, // epoch_no
75-
0, // unix_ts_ms
74+
GroupId::Primary(Epoch(0)), // epoch_no
75+
0, // unix_ts_ms
7676
BuildTarget::Raptorcast(epoch_validators),
7777
&known_addresses,
7878
);
@@ -112,8 +112,8 @@ pub fn criterion_benchmark(c: &mut Criterion) {
112112
DEFAULT_SEGMENT_SIZE, // segment_size
113113
message.clone(),
114114
Redundancy::from_u8(2),
115-
0, // epoch_no
116-
0, // unix_ts_ms
115+
GroupId::Primary(Epoch(0)), // epoch_no
116+
0, // unix_ts_ms
117117
BuildTarget::Raptorcast(epoch_validators),
118118
&known_addresses,
119119
)

0 commit comments

Comments
 (0)