Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ce35620
Drop DRB calculation lock earlier in catchup (#3546)
ss-es Aug 26, 2025
205b9f5
Garbage collect view sync tasks on decide (#3544)
ss-es Aug 26, 2025
2d4c7f5
update
ss-es Aug 26, 2025
1d042a3
make sure we have 2 consecutive epochs
bfish713 Aug 26, 2025
6817bb5
move drb calculation outside of the catchup loop
ss-es Aug 27, 2025
5a18426
clippy
ss-es Aug 27, 2025
e02ab5b
fix for first epoch
bfish713 Aug 27, 2025
fa81bfe
sort the epoch roots
bfish713 Aug 27, 2025
cf73182
fix reload_stake()
imabdulbasit Aug 27, 2025
df39314
move drb calculation outside of the catchup loop
ss-es Aug 27, 2025
4855226
clippy
ss-es Aug 27, 2025
34cfbd1
Revert "fix reload_stake()"
rob-maron Aug 27, 2025
0807d99
properly signal catchup completion
ss-es Aug 28, 2025
eb35d00
Merge remote-tracking branch 'origin/main' into release-20250822-test1
rob-maron Aug 28, 2025
8b741c6
Merge remote-tracking branch 'origin/main' into release-20250822-test1
rob-maron Aug 28, 2025
73ccb5e
Merge remote-tracking branch 'origin/ss/dont-recalculate-unnecessary-…
rob-maron Aug 28, 2025
223cea4
don't get randomized committee in R/R
bfish713 Aug 28, 2025
e874b43
don't get randomized committee in R/R
bfish713 Aug 28, 2025
4c7aa32
update
ss-es Aug 28, 2025
3774f07
Merge remote-tracking branch 'origin/ss/dont-recalculate-unnecessary-…
rob-maron Aug 28, 2025
8bf48aa
use previous epoch in r/r if the current doesn't have stake table
bfish713 Aug 28, 2025
0cecd21
Merge remote-tracking branch 'origin/bf/rr' into release-20250822-test1
rob-maron Aug 28, 2025
c75c9d7
salman's revert
rob-maron Aug 28, 2025
c13cc10
Don't require next epoch stuff on DA when a new epoch is just starting
bfish713 Aug 28, 2025
bcbd92d
Merge remote-tracking branch 'origin/bf/da-fix' into release-20250822…
rob-maron Aug 28, 2025
7e324bf
fix deadlock
bfish713 Aug 28, 2025
9cc313b
Merge remote-tracking branch 'origin/bf/da-fix' into release-20250822…
rob-maron Aug 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion crates/hotshot/hotshot/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1259,7 +1259,9 @@ async fn load_start_epoch_info<TYPES: NodeType>(
.await
.set_first_epoch(first_epoch_number, INITIAL_DRB_RESULT);

for epoch_info in start_epoch_info {
let mut sorted_epoch_info = start_epoch_info.clone();
sorted_epoch_info.sort_by_key(|info| info.epoch);
for epoch_info in sorted_epoch_info {
if let Some(block_header) = &epoch_info.block_header {
tracing::info!("Calling add_epoch_root for epoch {}", epoch_info.epoch);

Expand Down
18 changes: 13 additions & 5 deletions crates/hotshot/task-impls/src/da.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,12 +436,20 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> DaTaskState<TYP
);
return Ok(());
}
let epoch_transition_indicator =
if self.consensus.read().await.is_high_qc_ge_root_block() {
EpochTransitionIndicator::InTransition
} else {
let consensus_reader = self.consensus.read().await;
let epoch_transition_indicator = if consensus_reader.is_high_qc_ge_root_block() {
if self.upgrade_lock.upgraded_drb_and_header(view_number).await
&& consensus_reader.is_high_qc_last_block()
{
EpochTransitionIndicator::NotInTransition
};
} else {
EpochTransitionIndicator::InTransition
}
} else {
EpochTransitionIndicator::NotInTransition
};
drop(consensus_reader);

let data: DaProposal2<TYPES> = DaProposal2 {
encoded_transactions: Arc::clone(encoded_transactions),
metadata: metadata.clone(),
Expand Down
8 changes: 8 additions & 0 deletions crates/hotshot/types/src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,14 @@ impl<TYPES: NodeType> Consensus<TYPES> {
let block_height = leaf.height();
is_ge_epoch_root(block_height, self.epoch_height)
}

pub fn is_high_qc_last_block(&self) -> bool {
let Some(block_height) = self.high_qc().data.block_number else {
tracing::warn!("We don't have a block number for the high QC");
return false;
};
is_last_block(block_height, self.epoch_height)
}
}

/// Alias for the block payload commitment and the associated metadata. The primary data
Expand Down
103 changes: 64 additions & 39 deletions crates/hotshot/types/src/epoch_membership.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ where
) {
// We need to fetch the requested epoch, that's for sure
let mut fetch_epochs = vec![];
fetch_epochs.push((epoch, epoch_tx));

let mut try_epoch = TYPES::Epoch::new(epoch.saturating_sub(1));
let maybe_first_epoch = self.membership.read().await.first_epoch();
Expand All @@ -225,8 +224,14 @@ where
let has_stake_table = self.membership.read().await.has_stake_table(try_epoch);
if has_stake_table {
// We have this stake table but we need to make sure we have the epoch root of the requested epoch
// and we have the previous epoch as well
if try_epoch <= TYPES::Epoch::new(epoch.saturating_sub(2)) {
break;
let previous_epoch = TYPES::Epoch::new(try_epoch.saturating_sub(1));
if try_epoch <= first_epoch + 1
|| self.membership.read().await.has_stake_table(previous_epoch)
{
break;
}
}
try_epoch = TYPES::Epoch::new(try_epoch.saturating_sub(1));
} else {
Expand Down Expand Up @@ -264,49 +269,14 @@ where

// Iterate through the epochs we need to fetch in reverse, i.e. from the oldest to the newest
while let Some((current_fetch_epoch, tx)) = fetch_epochs.pop() {
let root_leaf = match self.fetch_stake_table(current_fetch_epoch).await {
Ok(root_leaf) => root_leaf,
match self.fetch_stake_table(current_fetch_epoch).await {
Ok(_) => {},
Err(err) => {
fetch_epochs.push((current_fetch_epoch, tx));
self.catchup_cleanup(epoch, fetch_epochs, err).await;
return;
},
};
match <TYPES::Membership as Membership<TYPES>>::get_epoch_drb(
self.membership.clone(),
epoch,
)
.await
{
Ok(drb_result) => {
self.membership
.write()
.await
.add_drb_result(current_fetch_epoch, drb_result);
},
Err(err) => {
tracing::warn!(
"DRB result for epoch {} missing from membership. Beginning catchup to \
recalculate it. Error: {}",
current_fetch_epoch,
err
);

if let Err(err) = self
.compute_drb_result(current_fetch_epoch, root_leaf)
.await
{
tracing::info!(
"DRB calculation for epoch {} failed . Error: {}",
current_fetch_epoch,
err
);
fetch_epochs.push((current_fetch_epoch, tx));
self.catchup_cleanup(epoch, fetch_epochs, err).await;
return;
}
},
};

// Signal the other tasks about the success
if let Ok(Some(res)) = tx.try_broadcast(Ok(EpochMembership {
Expand All @@ -323,6 +293,61 @@ where
// Remove the epoch from the catchup map to indicate that the catchup is complete
self.catchup_map.lock().await.remove(&current_fetch_epoch);
}

let root_leaf = match self.fetch_stake_table(epoch).await {
Ok(root_leaf) => root_leaf,
Err(err) => {
fetch_epochs.push((epoch, epoch_tx));
self.catchup_cleanup(epoch, fetch_epochs, err).await;
return;
},
};

match <TYPES::Membership as Membership<TYPES>>::get_epoch_drb(
self.membership.clone(),
epoch,
)
.await
{
Ok(drb_result) => {
self.membership
.write()
.await
.add_drb_result(epoch, drb_result);
},
Err(err) => {
tracing::warn!(
"DRB result for epoch {} missing from membership. Beginning catchup to \
recalculate it. Error: {}",
epoch,
err
);

if let Err(err) = self.compute_drb_result(epoch, root_leaf).await {
tracing::error!(
"DRB calculation for epoch {} failed . Error: {}",
epoch,
err
);
self.catchup_cleanup(epoch, fetch_epochs, err).await;
}
},
};

// Signal the other tasks about the success
if let Ok(Some(res)) = epoch_tx.try_broadcast(Ok(EpochMembership {
epoch: Some(epoch),
coordinator: self.clone(),
})) {
tracing::warn!(
"The catchup channel for epoch {} was overflown, dropped message {:?}",
epoch,
res.map(|em| em.epoch)
);
}

// Remove the epoch from the catchup map to indicate that the catchup is complete
self.catchup_map.lock().await.remove(&epoch);
}

/// Call this method if you think catchup is in progress for a given epoch
Expand Down
5 changes: 3 additions & 2 deletions sequencer/src/request_response/recipient_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,12 @@ impl<I: NodeImplementation<SeqTypes>, V: Versions> RecipientSourceTrait<Request,
Ok(membership) => membership,
Err(e) => {
warn!(
"Failed to get membership for epoch {}: {e:#}. Failing over to genesis",
"Failed to get membership for epoch {}: {e:#}. Failing over to previous epoch",
epoch_number
);
let prev_epoch = epoch_number.saturating_sub(1);
self.memberships
.stake_table_for_epoch(Some(EpochNumber::genesis()))
.stake_table_for_epoch(Some(EpochNumber::new(prev_epoch)))
.await
.with_context(|| "failed to get stake table for epoch")?
},
Expand Down
Loading