Skip to content

Commit df5132b

Browse files
authored
Replace fixed timer by retry loop when TCP/UDP servers start (#3885)
## Motivation Make e2e tests with TCP/UDP networks faster and more reliable I wrote this while debugging #3881 but it turns out to be unrelated. ## Proposal Use `RpcMessage::VersionInfoQuery` to detect when a simple server is running and build a retry loop. ## Test Plan CI
1 parent 1820109 commit df5132b

File tree

1 file changed

+46
-14
lines changed

1 file changed

+46
-14
lines changed

linera-service/src/cli_wrappers/local_net.rs

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -571,20 +571,20 @@ impl LocalNet {
571571
.args(["--genesis", "genesis.json"])
572572
.spawn_into()?;
573573

574+
let port = Self::proxy_port(validator);
575+
let nickname = format!("validator proxy {validator}");
574576
match self.network.external {
575577
Network::Grpc => {
576-
let port = Self::proxy_port(validator);
577-
let nickname = format!("validator proxy {validator}");
578578
Self::ensure_grpc_server_has_started(&nickname, port, "http").await?;
579579
}
580580
Network::Grpcs => {
581-
let port = Self::proxy_port(validator);
582-
let nickname = format!("validator proxy {validator}");
583581
Self::ensure_grpc_server_has_started(&nickname, port, "https").await?;
584582
}
585-
Network::Tcp | Network::Udp => {
586-
info!("Letting validator proxy {validator} start");
587-
linera_base::time::timer::sleep(Duration::from_secs(2)).await;
583+
Network::Tcp => {
584+
Self::ensure_simple_server_has_started(&nickname, port, "tcp").await?;
585+
}
586+
Network::Udp => {
587+
Self::ensure_simple_server_has_started(&nickname, port, "udp").await?;
588588
}
589589
}
590590
Ok(child)
@@ -658,6 +658,38 @@ impl LocalNet {
658658
bail!("Failed to start {nickname}");
659659
}
660660

661+
pub async fn ensure_simple_server_has_started(
662+
nickname: &str,
663+
port: usize,
664+
protocol: &str,
665+
) -> Result<()> {
666+
use linera_core::node::ValidatorNode as _;
667+
668+
let options = linera_rpc::NodeOptions {
669+
send_timeout: Duration::from_secs(5),
670+
recv_timeout: Duration::from_secs(5),
671+
retry_delay: Duration::from_secs(1),
672+
max_retries: 1,
673+
};
674+
let provider = linera_rpc::simple::SimpleNodeProvider::new(options);
675+
let address = format!("{protocol}:127.0.0.1:{port}");
676+
// All "simple" services (i.e. proxy and "server") are based on `RpcMessage` and
677+
// support `VersionInfoQuery`.
678+
let node = provider.make_node(&address)?;
679+
linera_base::time::timer::sleep(Duration::from_millis(100)).await;
680+
for i in 0..10 {
681+
linera_base::time::timer::sleep(Duration::from_millis(i * 500)).await;
682+
let result = node.get_version_info().await;
683+
if result.is_ok() {
684+
info!("Successfully started {nickname}");
685+
return Ok(());
686+
} else {
687+
warn!("Waiting for {nickname} to start");
688+
}
689+
}
690+
bail!("Failed to start {nickname}");
691+
}
692+
661693
async fn initialize_storage(&mut self, validator: usize) -> Result<()> {
662694
let namespace = format!("{}_server_{}_db", self.common_namespace, validator);
663695
let storage_config = self.common_storage_config.clone();
@@ -706,20 +738,20 @@ impl LocalNet {
706738
.args(self.cross_chain_config.to_args());
707739
let child = command.spawn_into()?;
708740

741+
let port = Self::shard_port(validator, shard);
742+
let nickname = format!("validator server {validator}:{shard}");
709743
match self.network.internal {
710744
Network::Grpc => {
711-
let port = Self::shard_port(validator, shard);
712-
let nickname = format!("validator server {validator}:{shard}");
713745
Self::ensure_grpc_server_has_started(&nickname, port, "http").await?;
714746
}
715747
Network::Grpcs => {
716-
let port = Self::shard_port(validator, shard);
717-
let nickname = format!("validator server {validator}:{shard}");
718748
Self::ensure_grpc_server_has_started(&nickname, port, "https").await?;
719749
}
720-
Network::Tcp | Network::Udp => {
721-
info!("Letting validator server {validator}:{shard} start");
722-
linera_base::time::timer::sleep(Duration::from_secs(2)).await;
750+
Network::Tcp => {
751+
Self::ensure_simple_server_has_started(&nickname, port, "tcp").await?;
752+
}
753+
Network::Udp => {
754+
Self::ensure_simple_server_has_started(&nickname, port, "udp").await?;
723755
}
724756
}
725757
Ok(child)

0 commit comments

Comments
 (0)