diff --git a/crates/hotshot/hotshot/src/lib.rs b/crates/hotshot/hotshot/src/lib.rs index 2cf08d37e2..670f2939ce 100644 --- a/crates/hotshot/hotshot/src/lib.rs +++ b/crates/hotshot/hotshot/src/lib.rs @@ -1259,7 +1259,9 @@ async fn load_start_epoch_info( .await .set_first_epoch(first_epoch_number, INITIAL_DRB_RESULT); - for epoch_info in start_epoch_info { + let mut sorted_epoch_info = start_epoch_info.clone(); + sorted_epoch_info.sort_by_key(|info| info.epoch); + for epoch_info in sorted_epoch_info { if let Some(block_header) = &epoch_info.block_header { tracing::info!("Calling add_epoch_root for epoch {}", epoch_info.epoch); diff --git a/crates/hotshot/task-impls/src/da.rs b/crates/hotshot/task-impls/src/da.rs index 9ba07b85ad..e15fd93e14 100644 --- a/crates/hotshot/task-impls/src/da.rs +++ b/crates/hotshot/task-impls/src/da.rs @@ -436,12 +436,20 @@ impl, V: Versions> DaTaskState = DaProposal2 { encoded_transactions: Arc::clone(encoded_transactions), metadata: metadata.clone(), diff --git a/crates/hotshot/types/src/consensus.rs b/crates/hotshot/types/src/consensus.rs index 8e22139569..3a71bf83dc 100644 --- a/crates/hotshot/types/src/consensus.rs +++ b/crates/hotshot/types/src/consensus.rs @@ -1394,6 +1394,14 @@ impl Consensus { let block_height = leaf.height(); is_ge_epoch_root(block_height, self.epoch_height) } + + pub fn is_high_qc_last_block(&self) -> bool { + let Some(block_height) = self.high_qc().data.block_number else { + tracing::warn!("We don't have a block number for the high QC"); + return false; + }; + is_last_block(block_height, self.epoch_height) + } } /// Alias for the block payload commitment and the associated metadata. The primary data diff --git a/crates/hotshot/types/src/epoch_membership.rs b/crates/hotshot/types/src/epoch_membership.rs index 2476f30b16..d19dd8e8c8 100644 --- a/crates/hotshot/types/src/epoch_membership.rs +++ b/crates/hotshot/types/src/epoch_membership.rs @@ -208,7 +208,6 @@ where ) { // We need to fetch the requested epoch, that's for sure let mut fetch_epochs = vec![]; - fetch_epochs.push((epoch, epoch_tx)); let mut try_epoch = TYPES::Epoch::new(epoch.saturating_sub(1)); let maybe_first_epoch = self.membership.read().await.first_epoch(); @@ -225,8 +224,14 @@ where let has_stake_table = self.membership.read().await.has_stake_table(try_epoch); if has_stake_table { // We have this stake table but we need to make sure we have the epoch root of the requested epoch + // and we have the previous epoch as well if try_epoch <= TYPES::Epoch::new(epoch.saturating_sub(2)) { - break; + let previous_epoch = TYPES::Epoch::new(try_epoch.saturating_sub(1)); + if try_epoch <= first_epoch + 1 + || self.membership.read().await.has_stake_table(previous_epoch) + { + break; + } } try_epoch = TYPES::Epoch::new(try_epoch.saturating_sub(1)); } else { @@ -264,49 +269,14 @@ where // Iterate through the epochs we need to fetch in reverse, i.e. from the oldest to the newest while let Some((current_fetch_epoch, tx)) = fetch_epochs.pop() { - let root_leaf = match self.fetch_stake_table(current_fetch_epoch).await { - Ok(root_leaf) => root_leaf, + match self.fetch_stake_table(current_fetch_epoch).await { + Ok(_) => {}, Err(err) => { fetch_epochs.push((current_fetch_epoch, tx)); self.catchup_cleanup(epoch, fetch_epochs, err).await; return; }, }; - match >::get_epoch_drb( - self.membership.clone(), - epoch, - ) - .await - { - Ok(drb_result) => { - self.membership - .write() - .await - .add_drb_result(current_fetch_epoch, drb_result); - }, - Err(err) => { - tracing::warn!( - "DRB result for epoch {} missing from membership. Beginning catchup to \ - recalculate it. Error: {}", - current_fetch_epoch, - err - ); - - if let Err(err) = self - .compute_drb_result(current_fetch_epoch, root_leaf) - .await - { - tracing::info!( - "DRB calculation for epoch {} failed . Error: {}", - current_fetch_epoch, - err - ); - fetch_epochs.push((current_fetch_epoch, tx)); - self.catchup_cleanup(epoch, fetch_epochs, err).await; - return; - } - }, - }; // Signal the other tasks about the success if let Ok(Some(res)) = tx.try_broadcast(Ok(EpochMembership { @@ -323,6 +293,61 @@ where // Remove the epoch from the catchup map to indicate that the catchup is complete self.catchup_map.lock().await.remove(¤t_fetch_epoch); } + + let root_leaf = match self.fetch_stake_table(epoch).await { + Ok(root_leaf) => root_leaf, + Err(err) => { + fetch_epochs.push((epoch, epoch_tx)); + self.catchup_cleanup(epoch, fetch_epochs, err).await; + return; + }, + }; + + match >::get_epoch_drb( + self.membership.clone(), + epoch, + ) + .await + { + Ok(drb_result) => { + self.membership + .write() + .await + .add_drb_result(epoch, drb_result); + }, + Err(err) => { + tracing::warn!( + "DRB result for epoch {} missing from membership. Beginning catchup to \ + recalculate it. Error: {}", + epoch, + err + ); + + if let Err(err) = self.compute_drb_result(epoch, root_leaf).await { + tracing::error!( + "DRB calculation for epoch {} failed . Error: {}", + epoch, + err + ); + self.catchup_cleanup(epoch, fetch_epochs, err).await; + } + }, + }; + + // Signal the other tasks about the success + if let Ok(Some(res)) = epoch_tx.try_broadcast(Ok(EpochMembership { + epoch: Some(epoch), + coordinator: self.clone(), + })) { + tracing::warn!( + "The catchup channel for epoch {} was overflown, dropped message {:?}", + epoch, + res.map(|em| em.epoch) + ); + } + + // Remove the epoch from the catchup map to indicate that the catchup is complete + self.catchup_map.lock().await.remove(&epoch); } /// Call this method if you think catchup is in progress for a given epoch diff --git a/sequencer/src/request_response/recipient_source.rs b/sequencer/src/request_response/recipient_source.rs index af9fab82da..e291da72c8 100644 --- a/sequencer/src/request_response/recipient_source.rs +++ b/sequencer/src/request_response/recipient_source.rs @@ -52,11 +52,12 @@ impl, V: Versions> RecipientSourceTrait membership, Err(e) => { warn!( - "Failed to get membership for epoch {}: {e:#}. Failing over to genesis", + "Failed to get membership for epoch {}: {e:#}. Failing over to previous epoch", epoch_number ); + let prev_epoch = epoch_number.saturating_sub(1); self.memberships - .stake_table_for_epoch(Some(EpochNumber::genesis())) + .stake_table_for_epoch(Some(EpochNumber::new(prev_epoch))) .await .with_context(|| "failed to get stake table for epoch")? },