Add more tests

bkolad · bkolad · commit 68f7ecaa03ee · 2026-03-20T19:40:21.000+01:00
diff --git a/examples/demo-rollup/tests/replica/recovery.rs b/examples/demo-rollup/tests/replica/recovery.rs
@@ -95,3 +95,205 @@ async fn test_db_elected_leader_recovery_replica_keeps_running() {
     leader.shutdown().await.unwrap();
     setup.shutdown().await;
 }
+
+/// Test that when the replica enters recovery state (due to falling behind
+/// the deferred slots threshold), the leader continues to function and
+/// both nodes recover to normal operation.
+#[tokio::test(flavor = "multi_thread")]
+async fn test_db_elected_replica_recovery_leader_keeps_running() {
+    std::env::set_var("SOV_TEST_CONST_OVERRIDE_DEFERRED_SLOTS_COUNT", "40");
+
+    let Some(setup) = NodeDiscoveryTestSetup::new().await else {
+        return;
+    };
+
+    let key_and_address = read_private_key::<S>("tx_signer_private_key.json");
+
+    let node_1 = setup
+        .start_node("node_1", ConfiguredNodeRole::DbElected)
+        .await;
+    let node_2 = setup
+        .start_node("node_2", ConfiguredNodeRole::DbElected)
+        .await;
+
+    node_1.wait_for_sequencer_ready().await.unwrap();
+    node_2.wait_for_sequencer_ready().await.unwrap();
+
+    let (leader, replica) = establish_leader_and_replica(node_1, node_2).await;
+
+    let token_id = config_gas_token_id();
+    let receiver_addr = random_address();
+
+    // Pause only the replica's update_state loop to prevent it from processing batches
+    replica.pause_preferred_batches_for_node().await;
+
+    // Produce DA blocks while replica is paused to exceed the deferred slots threshold.
+    // With DEFERRED_SLOTS_COUNT=40, the 90% threshold triggers at ~26 blocks of lag.
+    for _ in 0..30 {
+        setup.da_service.produce_block_now().await.unwrap();
+    }
+
+    // Wait for the replica to sync the DA blocks
+    replica.wait_for_node_synced().await.unwrap();
+
+    // Resume batch processing; on the next state update the replica should enter recovery
+    replica.resume_preferred_batches_for_node().await;
+    setup.da_service.produce_block_now().await.unwrap();
+
+    // Wait until the replica is no longer ready (entered recovery)
+    let start = std::time::Instant::now();
+
+    while replica.is_sequencer_ready().await {
+        if start.elapsed() > Duration::from_secs(10) {
+            panic!("Timeout waiting for replica to enter recovery");
+        }
+        setup.da_service.produce_block_now().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(30)).await;
+    }
+
+    let mut was_recovering = false;
+
+    while !replica.is_sequencer_ready().await {
+        if replica.is_sequencer_recovering().await {
+            was_recovering = true
+        }
+
+        setup.da_service.produce_block_now().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+    }
+
+    assert!(was_recovering);
+
+    replica.wait_for_sequencer_ready().await.unwrap();
+
+    // Verify the leader is still operational after recovery by sending a new transaction
+    let tx = build_transfer_token_tx::<S>(
+        &key_and_address.private_key,
+        token_id,
+        receiver_addr,
+        AMOUNT,
+        0,
+    );
+    leader.send_tx_to_sequencer(&tx).await.unwrap();
+
+    let mut event_subscription = replica
+        .api_client()
+        .subscribe_to_events_with_filter("Bank/*")
+        .await
+        .unwrap();
+
+    wait_for_all_events_with_timeout(Duration::from_millis(500), 0, &mut event_subscription).await;
+
+    replica.shutdown().await.unwrap();
+    leader.shutdown().await.unwrap();
+    setup.shutdown().await;
+}
+
+/// Test that when both the leader and replica enter recovery state (due to
+/// falling behind the deferred slots threshold), both recover to normal
+/// operation.
+#[tokio::test(flavor = "multi_thread")]
+async fn test_db_elected_both_nodes_recovery() {
+    std::env::set_var("SOV_TEST_CONST_OVERRIDE_DEFERRED_SLOTS_COUNT", "40");
+
+    let Some(setup) = NodeDiscoveryTestSetup::new().await else {
+        return;
+    };
+
+    let key_and_address = read_private_key::<S>("tx_signer_private_key.json");
+
+    let node_1 = setup
+        .start_node("node_1", ConfiguredNodeRole::DbElected)
+        .await;
+    let node_2 = setup
+        .start_node("node_2", ConfiguredNodeRole::DbElected)
+        .await;
+
+    node_1.wait_for_sequencer_ready().await.unwrap();
+    node_2.wait_for_sequencer_ready().await.unwrap();
+
+    let (leader, replica) = establish_leader_and_replica(node_1, node_2).await;
+
+    let token_id = config_gas_token_id();
+    let receiver_addr = random_address();
+
+    // Pause both nodes' update_state loops to prevent batch processing
+    leader.pause_preferred_batches_for_node().await;
+    replica.pause_preferred_batches_for_node().await;
+
+    // Produce DA blocks while both nodes are paused to exceed the deferred slots threshold.
+    // With DEFERRED_SLOTS_COUNT=40, the 90% threshold triggers at ~26 blocks of lag.
+    for _ in 0..30 {
+        setup.da_service.produce_block_now().await.unwrap();
+    }
+
+    // Wait for both nodes to sync the DA blocks
+    leader.wait_for_node_synced().await.unwrap();
+    replica.wait_for_node_synced().await.unwrap();
+
+    // Resume batch processing on both; on the next state update both should enter recovery
+    leader.resume_preferred_batches_for_node().await;
+    replica.resume_preferred_batches_for_node().await;
+    setup.da_service.produce_block_now().await.unwrap();
+
+    // Wait until the leader is no longer ready (entered recovery)
+    let start = std::time::Instant::now();
+
+    while leader.is_sequencer_ready().await {
+        if start.elapsed() > Duration::from_secs(10) {
+            panic!("Timeout waiting for leader to enter recovery");
+        }
+        setup.da_service.produce_block_now().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(30)).await;
+    }
+
+    let mut leader_was_recovering = false;
+
+    while !leader.is_sequencer_ready().await {
+        if leader.is_sequencer_recovering().await {
+            leader_was_recovering = true
+        }
+
+        setup.da_service.produce_block_now().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+    }
+
+    assert!(leader_was_recovering);
+
+    leader.wait_for_sequencer_ready().await.unwrap();
+
+    // Wait for the replica to also recover
+    let start = std::time::Instant::now();
+
+    while !replica.is_sequencer_ready().await {
+        if start.elapsed() > Duration::from_secs(30) {
+            panic!("Timeout waiting for replica to recover");
+        }
+        setup.da_service.produce_block_now().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+    }
+
+    replica.wait_for_sequencer_ready().await.unwrap();
+
+    // Verify the cluster is operational after both nodes recovered
+    let tx = build_transfer_token_tx::<S>(
+        &key_and_address.private_key,
+        token_id,
+        receiver_addr,
+        AMOUNT,
+        0,
+    );
+    leader.send_tx_to_sequencer(&tx).await.unwrap();
+
+    let mut event_subscription = replica
+        .api_client()
+        .subscribe_to_events_with_filter("Bank/*")
+        .await
+        .unwrap();
+
+    wait_for_all_events_with_timeout(Duration::from_millis(500), 0, &mut event_subscription).await;
+
+    replica.shutdown().await.unwrap();
+    leader.shutdown().await.unwrap();
+    setup.shutdown().await;
+}