Skip to content

Commit 8fc8325

Browse files
authored
feat: report reorg if checkpoint index decreased but block height stayed the same or increased (#7212)
1 parent 7d6a86b commit 8fc8325

File tree

9 files changed

+299
-66
lines changed

9 files changed

+299
-66
lines changed

rust/main/agents/relayer/src/msg/db_loader.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ pub mod test {
422422
},
423423
};
424424
use hyperlane_core::{
425-
identifiers::UniqueIdentifier, test_utils::dummy_domain, GasPaymentKey,
425+
identifiers::UniqueIdentifier, test_utils::dummy_domain, CheckpointInfo, GasPaymentKey,
426426
InterchainGasPayment, InterchainGasPaymentMeta, MerkleTreeInsertion,
427427
PendingOperationStatus, H256,
428428
};
@@ -736,6 +736,9 @@ pub mod test {
736736
fn store_payload_uuids_by_message_id(&self, message_id: &H256, payload_uuids: Vec<UniqueIdentifier>) -> DbResult<()>;
737737

738738
fn retrieve_payload_uuids_by_message_id(&self, message_id: &H256) -> DbResult<Option<Vec<UniqueIdentifier>>>;
739+
740+
fn store_latest_checkpoint_info(&self, checkpoint_info: &CheckpointInfo) -> DbResult<()>;
741+
fn retrieve_latest_checkpoint_info(&self) -> DbResult<Option<CheckpointInfo>>;
739742
}
740743
}
741744

rust/main/agents/relayer/src/msg/pending_message.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,9 @@ mod test {
12021202
fn retrieve_highest_seen_message_nonce_number(&self) -> DbResult<Option<u32>>;
12031203
fn store_payload_uuids_by_message_id(&self, message_id: &H256, payload_uuids: Vec<UniqueIdentifier>) -> DbResult<()>;
12041204
fn retrieve_payload_uuids_by_message_id(&self, message_id: &H256) -> DbResult<Option<Vec<UniqueIdentifier>>>;
1205+
1206+
fn store_latest_checkpoint_info(&self, checkpoint_info: &CheckpointInfo) -> DbResult<()>;
1207+
fn retrieve_latest_checkpoint_info(&self) -> DbResult<Option<CheckpointInfo>>;
12051208
}
12061209
}
12071210

rust/main/agents/validator/src/submit.rs

Lines changed: 125 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ use hyperlane_core::{
1616
HyperlaneSignerExt, IncrementalMerkleAtBlock,
1717
};
1818
use hyperlane_core::{
19-
ChainResult, HyperlaneSigner, MerkleTreeHook, ReorgEvent, ReorgPeriod, SignedType,
19+
ChainResult, CheckpointInfo, HyperlaneSigner, MerkleTreeHook, ReorgEvent, ReorgPeriod,
20+
SignedType, H256,
2021
};
2122
use hyperlane_ethereum::{Signers, SingletonSignerHandle};
2223

@@ -118,6 +119,12 @@ impl ValidatorSubmitter {
118119
true
119120
};
120121

122+
let mut latest_seen_checkpoint = self
123+
.db
124+
.retrieve_latest_checkpoint_info()
125+
.unwrap_or_default()
126+
.unwrap_or_default();
127+
121128
loop {
122129
// Lag by reorg period because this is our correctness checkpoint.
123130
let latest_checkpoint = call_and_retry_indefinitely(|| {
@@ -127,6 +134,8 @@ impl ValidatorSubmitter {
127134
})
128135
.await;
129136

137+
self.verify_checkpoint(&tree, &latest_checkpoint, &latest_seen_checkpoint)
138+
.await;
130139
self.metrics
131140
.set_latest_checkpoint_observed(&latest_checkpoint);
132141

@@ -162,6 +171,24 @@ impl ValidatorSubmitter {
162171
// Set that initial consistency has been reached on first loop run. Subsequent runs are idempotent.
163172
self.metrics.reached_initial_consistency.set(1);
164173

174+
// Update latest seen valid checkpoint
175+
if let Some(block_height) = latest_checkpoint.block_height {
176+
tracing::debug!(
177+
?latest_checkpoint,
178+
?latest_seen_checkpoint,
179+
"Updating latest seen checkpoint index"
180+
);
181+
if block_height < latest_seen_checkpoint.block_height {
182+
tracing::warn!(
183+
?latest_checkpoint,
184+
?latest_seen_checkpoint,
185+
"Receive a checkpoint with a higher index, but lower block height"
186+
);
187+
}
188+
latest_seen_checkpoint.block_height = block_height;
189+
latest_seen_checkpoint.checkpoint_index = latest_checkpoint.index;
190+
}
191+
165192
sleep(self.interval).await;
166193
}
167194
}
@@ -238,41 +265,15 @@ impl ValidatorSubmitter {
238265
// If the tree's checkpoint doesn't match the correctness checkpoint, something went wrong
239266
// and we bail loudly.
240267
if checkpoint != correctness_checkpoint.checkpoint {
241-
let reorg_event = ReorgEvent::new(
268+
Self::panic_with_reorg(
269+
&self.reorg_reporter,
270+
&self.reorg_period,
271+
&self.checkpoint_syncer,
242272
tree.root(),
243-
correctness_checkpoint.root,
244-
checkpoint.index,
245-
chrono::Utc::now().timestamp() as u64,
246-
self.reorg_period.clone(),
247-
);
248-
error!(
249-
?checkpoint,
250-
?correctness_checkpoint,
251-
?reorg_event,
252-
"Incorrect tree root. Most likely a reorg has occurred. Please reach out for help, this is a potentially serious error impacting signed messages. Do NOT forcefully resume operation of this validator. Keep it crashlooping or shut down until you receive support."
253-
);
254-
255-
if let Some(height) = correctness_checkpoint.block_height {
256-
self.reorg_reporter.report_at_block(height).await;
257-
} else {
258-
info!("Blockchain does not support block height, reporting with reorg period");
259-
self.reorg_reporter
260-
.report_with_reorg_period(&self.reorg_period)
261-
.await;
262-
}
263-
264-
let mut panic_message = "Incorrect tree root. Most likely a reorg has occurred. Please reach out for help, this is a potentially serious error impacting signed messages. Do NOT forcefully resume operation of this validator. Keep it crashlooping or shut down until you receive support.".to_owned();
265-
if let Err(e) = self
266-
.checkpoint_syncer
267-
.write_reorg_status(&reorg_event)
268-
.await
269-
{
270-
panic_message.push_str(&format!(
271-
" Reorg troubleshooting details couldn't be written to checkpoint storage: {}",
272-
e
273-
));
274-
}
275-
panic!("{panic_message}");
273+
correctness_checkpoint,
274+
&checkpoint,
275+
)
276+
.await;
276277
}
277278

278279
tracing::info!(
@@ -296,6 +297,95 @@ impl ValidatorSubmitter {
296297
}
297298
}
298299

300+
/// Verify checkpoint is valid
301+
async fn verify_checkpoint(
302+
&self,
303+
tree: &IncrementalMerkle,
304+
latest_checkpoint: &CheckpointAtBlock,
305+
latest_seen_checkpoint: &CheckpointInfo,
306+
) {
307+
// if checkpoint has an index greater than last seen, then it is valid
308+
if latest_seen_checkpoint.checkpoint_index < latest_checkpoint.index {
309+
return;
310+
}
311+
312+
let block_height = match latest_checkpoint.block_height {
313+
Some(s) => s,
314+
None => return,
315+
};
316+
// if checkpoint has a block height greater than last seen, then it is valid
317+
if latest_seen_checkpoint.block_height < block_height {
318+
return;
319+
}
320+
321+
// otherwise, a reorg occurred when checkpoint has a lower index
322+
// but has the same or higher block height
323+
tracing::error!(
324+
?latest_checkpoint,
325+
?latest_seen_checkpoint,
326+
"Latest checkpoint index is lower than previously seen, but has a block height equal or greater.");
327+
328+
let checkpoint = self.checkpoint(tree);
329+
Self::panic_with_reorg(
330+
&self.reorg_reporter,
331+
&self.reorg_period,
332+
&self.checkpoint_syncer,
333+
tree.root(),
334+
latest_checkpoint,
335+
&checkpoint,
336+
)
337+
.await;
338+
}
339+
340+
async fn panic_with_reorg(
341+
reorg_reporter: &Arc<dyn ReorgReporter>,
342+
reorg_period: &ReorgPeriod,
343+
checkpoint_syncer: &Arc<dyn CheckpointSyncer>,
344+
tree_root: H256,
345+
correctness_checkpoint: &CheckpointAtBlock,
346+
incorrect_checkpoint: &Checkpoint,
347+
) {
348+
let reorg_event = ReorgEvent {
349+
local_merkle_root: tree_root,
350+
local_checkpoint_index: incorrect_checkpoint.index,
351+
canonical_merkle_root: correctness_checkpoint.root,
352+
canonical_checkpoint_index: correctness_checkpoint.index,
353+
unix_timestamp: chrono::Utc::now().timestamp() as u64,
354+
reorg_period: reorg_period.clone(),
355+
};
356+
error!(
357+
?incorrect_checkpoint,
358+
?correctness_checkpoint,
359+
?reorg_event,
360+
"Incorrect tree root. Most likely a reorg has occurred. Please reach out for help, this is a potentially serious error impacting signed messages. Do NOT forcefully resume operation of this validator. Keep it crashlooping or shut down until you receive support."
361+
);
362+
363+
Self::report_reorg_with_checkpoint(reorg_reporter, reorg_period, correctness_checkpoint)
364+
.await;
365+
366+
let mut panic_message = "Incorrect tree root. Most likely a reorg has occurred. Please reach out for help, this is a potentially serious error impacting signed messages. Do NOT forcefully resume operation of this validator. Keep it crashlooping or shut down until you receive support.".to_owned();
367+
if let Err(e) = checkpoint_syncer.write_reorg_status(&reorg_event).await {
368+
panic_message.push_str(&format!(
369+
" Reorg troubleshooting details couldn't be written to checkpoint storage: {}",
370+
e
371+
));
372+
}
373+
panic!("{panic_message}");
374+
}
375+
376+
async fn report_reorg_with_checkpoint(
377+
reorg_reporter: &Arc<dyn ReorgReporter>,
378+
reorg_period: &ReorgPeriod,
379+
correctness_checkpoint: &CheckpointAtBlock,
380+
) {
381+
if let Some(height) = correctness_checkpoint.block_height {
382+
reorg_reporter.report_at_block(height).await;
383+
} else {
384+
info!("Blockchain does not support block height, reporting with reorg period");
385+
reorg_reporter.report_with_reorg_period(reorg_period).await;
386+
}
387+
}
388+
299389
async fn sign_checkpoint(
300390
&self,
301391
checkpoint: CheckpointWithMessageId,

0 commit comments

Comments
 (0)