diff --git a/Cargo.lock b/Cargo.lock index f5a51ab819b..56a462c9cf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,16 @@ dependencies = [ "generic-array 0.14.9", ] +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array 0.14.9", +] + [[package]] name = "aes" version = "0.6.0" @@ -34,7 +44,18 @@ checksum = "884391ef1066acaa41e766ba8f596341b96e93ce34f9a43e7d24bf0a0eaf0561" dependencies = [ "aes-soft", "aesni", - "cipher", + "cipher 0.2.5", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher 0.4.4", + "cpufeatures", ] [[package]] @@ -43,11 +64,25 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5278b5fabbb9bd46e24aa69b2fdea62c99088e0a950a9be40e3e0101298f88da" dependencies = [ - "aead", - "aes", - "cipher", - "ctr", - "ghash", + "aead 0.3.2", + "aes 0.6.0", + "cipher 0.2.5", + "ctr 0.6.0", + "ghash 0.3.1", + "subtle", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead 0.5.2", + "aes 0.8.4", + "cipher 0.4.4", + "ctr 0.9.2", + "ghash 0.5.1", "subtle", ] @@ -57,7 +92,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be14c7498ea50828a38d0e24a765ed2effe92a705885b57d029cd67d45744072" dependencies = [ - "cipher", + "cipher 0.2.5", "opaque-debug", ] @@ -67,7 +102,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2e11f5e94c2f7d386164cc2aa1f97823fed6f259e486940a71c174dd01b0ce" dependencies = [ - "cipher", + "cipher 0.2.5", "opaque-debug", ] @@ -104,6 +139,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloca" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" +dependencies = [ + "cc", +] + [[package]] name = "allocator-api2" version = "0.2.21" @@ -2365,6 +2409,31 @@ dependencies = [ "zeroize", ] +[[package]] +name = "bon" +version = "3.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365" +dependencies = [ + "darling 0.21.3", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.111", +] + [[package]] name = "borsh" version = "1.6.0" @@ -2690,6 +2759,30 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher 0.4.4", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead 0.5.2", + "chacha20", + "cipher 0.4.4", + "poly1305", + "zeroize", +] + [[package]] name = "chrono" version = "0.4.42" @@ -2740,6 +2833,17 @@ dependencies = [ "generic-array 0.14.9", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "circular-buffer" version = "0.1.9" @@ -2871,6 +2975,31 @@ dependencies = [ "vbs", ] +[[package]] +name = "cliquenet" +version = "0.1.0" +dependencies = [ + "bimap", + "bon", + "bs58", + "bytes 1.11.0", + "criterion 0.8.1", + "ed25519-compact", + "hotshot-types", + "nohash-hasher", + "parking_lot", + "portpicker", + "quickcheck", + "rand 0.9.2", + "serde", + "serde_bytes", + "snow", + "thiserror 2.0.17", + "tokio", + "tracing", + "tracing-subscriber 0.3.22", +] + [[package]] name = "cmake" version = "0.1.54" @@ -3197,7 +3326,7 @@ version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a5d7b21829bc7b4bf4754a978a241ae54ea55a40f92bb20216e54096f4b951" dependencies = [ - "aes-gcm", + "aes-gcm 0.8.0", "base64 0.13.1", "hkdf 0.10.0", "hmac 0.10.1", @@ -3288,7 +3417,7 @@ dependencies = [ "cast", "ciborium", "clap 4.5.53", - "criterion-plot", + "criterion-plot 0.5.0", "is-terminal", "itertools 0.10.5", "num-traits", @@ -3304,6 +3433,31 @@ dependencies = [ "walkdir", ] +[[package]] +name = "criterion" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf" +dependencies = [ + "alloca", + "anes", + "cast", + "ciborium", + "clap 4.5.53", + "criterion-plot 0.8.1", + "itertools 0.13.0", + "num-traits", + "oorandom", + "page_size", + "plotters", + "rayon", + "regex", + "serde", + "serde_json", + "tinytemplate", + "walkdir", +] + [[package]] name = "criterion-plot" version = "0.5.0" @@ -3314,6 +3468,16 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "criterion-plot" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4" +dependencies = [ + "cast", + "itertools 0.13.0", +] + [[package]] name = "critical-section" version = "1.2.0" @@ -3423,13 +3587,28 @@ dependencies = [ "memchr", ] +[[package]] +name = "ct-codecs" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b10589d1a5e400d61f9f38f12f884cfd080ff345de8f17efda36fe0e4a02aa8" + [[package]] name = "ctr" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb4a30d54f7443bf3d6191dcd486aca19e67cb3c49fa7a06a319966346707e7f" dependencies = [ - "cipher", + "cipher 0.2.5", +] + +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher 0.4.4", ] [[package]] @@ -3996,6 +4175,16 @@ dependencies = [ "signature", ] +[[package]] +name = "ed25519-compact" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ce99a9e19c84beb4cc35ece85374335ccc398240712114c85038319ed709bd" +dependencies = [ + "ct-codecs", + "getrandom 0.3.4", +] + [[package]] name = "ed25519-dalek" version = "2.2.0" @@ -4119,6 +4308,16 @@ dependencies = [ "log", ] +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "log", + "regex", +] + [[package]] name = "env_logger" version = "0.11.8" @@ -4888,7 +5087,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97304e4cd182c3846f7575ced3890c53012ce534ad9114046b0a9e00bb30a375" dependencies = [ "opaque-debug", - "polyval", + "polyval 0.4.5", +] + +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval 0.6.2", ] [[package]] @@ -5304,10 +5513,12 @@ dependencies = [ "bimap", "bincode", "blake3", + "bytes 1.11.0", "cdn-broker", "cdn-client", "cdn-marshal", "chrono", + "cliquenet", "committable", "dashmap", "derive_more 2.1.0", @@ -6466,6 +6677,15 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array 0.14.9", +] + [[package]] name = "input_buffer" version = "0.4.0" @@ -8137,6 +8357,12 @@ dependencies = [ "vbs", ] +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" @@ -8493,6 +8719,16 @@ dependencies = [ "rayon", ] +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parity-scale-codec" version = "3.7.5" @@ -8843,6 +9079,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash 0.5.1", +] + [[package]] name = "polyval" version = "0.4.5" @@ -8851,7 +9098,19 @@ checksum = "eebcc4aa140b9abd2bc40d9c3f7ccec842679cd79045ac3a7ac698c1a064b7cd" dependencies = [ "cpuid-bool", "opaque-debug", - "universal-hash", + "universal-hash 0.4.0", +] + +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash 0.5.1", ] [[package]] @@ -9237,6 +9496,17 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "quickcheck" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" +dependencies = [ + "env_logger 0.8.4", + "log", + "rand 0.8.5", +] + [[package]] name = "quinn" version = "0.11.9" @@ -10972,6 +11242,23 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "snow" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "599b506ccc4aff8cf7844bc42cf783009a434c1e26c964432560fb6d6ad02d82" +dependencies = [ + "aes-gcm 0.10.3", + "blake2", + "chacha20poly1305", + "curve25519-dalek", + "getrandom 0.3.4", + "ring 0.17.14", + "rustc_version 0.4.1", + "sha2 0.10.9", + "subtle", +] + [[package]] name = "socket2" version = "0.4.10" @@ -11705,7 +11992,7 @@ version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37d53ac171c92a39e4769491c4b4dde7022c60042254b5fc044ae409d34a24d4" dependencies = [ - "env_logger", + "env_logger 0.11.8", "test-log-macros", "tracing-subscriber 0.3.22", ] @@ -12039,6 +12326,7 @@ dependencies = [ "mio", "parking_lot", "pin-project-lite 0.2.16", + "signal-hook-registry", "socket2 0.6.1", "tokio-macros", "tracing", @@ -12597,6 +12885,16 @@ dependencies = [ "subtle", ] +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "unsigned-varint" version = "0.7.2" @@ -12777,7 +13075,7 @@ dependencies = [ "ark-serialize 0.5.0", "ark-std 0.5.0", "base64-bytes", - "criterion", + "criterion 0.5.1", "displaydoc", "jf-crhf", "jf-merkle-tree", diff --git a/Cargo.toml b/Cargo.toml index 3c58ddef7df..b12251333fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "contracts/rust/diff-test", "contracts/rust/gen-vk-contract", "crates/builder", + "crates/cliquenet", "crates/hotshot-builder/legacy", "crates/hotshot-builder/refactored", "crates/hotshot/builder-api", @@ -56,6 +57,7 @@ default-members = [ "contracts/rust/diff-test", "contracts/rust/gen-vk-contract", "crates/builder", + "crates/cliquenet", "crates/hotshot/builder-api", "crates/hotshot/example-types", "crates/hotshot/examples", @@ -118,14 +120,18 @@ async-once-cell = "0.5" async-trait = "0.1" base64 = "0.22" base64-bytes = "0.1" +bimap = "0.6.3" bincode = "1.3.3" bitvec = { version = "1", features = ["serde"] } blake3 = "1.5" +bytes = { version = "1.11.0", features = ["serde"] } +bon = "3.8.2" cbor4ii = { version = "1.0", features = ["serde1"] } chrono = { version = "0.4", features = ["serde"] } circular-buffer = "0.1.9" clap = { version = "4.4", features = ["derive", "env", "string"] } cld = "0.5" +cliquenet = { path = "crates/cliquenet" } dashmap = "6" derive_builder = "0.20" derive_more = { version = "2.0", features = ["full"] } @@ -144,8 +150,10 @@ libp2p-swarm-derive = { version = "0.35" } moka = { version = "0.12.12", features = ["future"] } memoize = { version = "0.4", features = ["full"] } multiaddr = { version = "0.18" } +nohash-hasher = "0.2.0" num_cpus = "1" parking_lot = { version = "0.12", features = ["send_guard"] } +quickcheck = "1.0" semver = "1" serde-inline-default = "0.2" serde_bytes = { version = "0.11" } diff --git a/crates/cliquenet/Cargo.toml b/crates/cliquenet/Cargo.toml new file mode 100644 index 00000000000..78bab15f173 --- /dev/null +++ b/crates/cliquenet/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "cliquenet" +version = "0.1.0" +edition = "2024" +description = "A fully connected mesh network" + +[features] +metrics = ["dep:hotshot-types"] + +[dependencies] +bon = { workspace = true } +bytes = { workspace = true } +bimap = "0.6.3" +bs58 = "0.5.1" +ed25519-compact = "2.2.0" +nohash-hasher = { workspace = true } +parking_lot = { workspace = true } +rand = "0.9.2" +serde = { workspace = true } +serde_bytes = { workspace = true } +snow = { version = "0.10.0", features = ["ring-accelerated"] } +thiserror = { workspace = true } +tokio = { workspace = true, features = ["full"] } +tracing = { workspace = true } +# optional: +hotshot-types = { workspace = true, optional = true } + +[dev-dependencies] +criterion = "0.8.1" +portpicker = { workspace = true } +quickcheck = { workspace = true } +tracing-subscriber = { workspace = true } + +[[bench]] +name = "bench1" +harness = false diff --git a/crates/cliquenet/README.md b/crates/cliquenet/README.md new file mode 100644 index 00000000000..2e41b7951a8 --- /dev/null +++ b/crates/cliquenet/README.md @@ -0,0 +1,82 @@ +# Networking + +The networking layer operates over TCP, using the [Noise protocol framework][noise] +to create secure, authenticated links between parties. + +Creating a network requires + +- a `Keypair` whose public key identifies a party, +- an address to accept inbound connections on, and +- a set of `(PublicKey, Address)` pairs, comprising all parties that want to communicate. + +Once created, binary data can be sent to individual parties, addressed by their +`PublicKey`, or to all parties. Applications can also await receiving data from a party. +For details regarding the API, see [`Network`]. + +## Architecture + +When a [`Network`] is created it spawns a server task, that binds a TCP listener to the +provided address and starts accepting connections. It also immediately creates tasks +to connect to each party (except to itself), using the provided address. It then +enters the main event loop which handles task creation and termination. Connections +undergo a series of stages. + +### Accepting an inbound connection + +If the TCP listener accepts a new inbound connection it creates a handshake task which +attempts to perform a Noise handshake which involves a Diffie-Hellman key exchange and +-- if successful -- results in an authenticated and secure link with an identified +peer. + +### Connect task + +A connect task will indefinitely try to establish a TCP connection to a single peer. +Between connection attempts it waits for an increasing amount of time, but no more +than 30s. If the connection has been established, the task will also perform a Noise +handshake with the remote party. + +If either the handshake task or the connect task finish successfully, the connection +is ready to be used for the actual exchange of application data. + +### IP address check + +If a party's address is an IP address, we also check that the remote peer address is +actually the one given. For domain names, no such check takes place. + +### Simultaneous connects + +Given that all parties try to connect to each other, a network node may accept a +connection it has already established through its own connect task, or vice versa. +A node uses the order of public keys to decide which connection to keep, should +two connections exist at the same time, i.e. given two connections to the same +peer a node drops the one whose associated public key is smaller than its own. + +### I/O tasks + +After successful connection establishment, two tasks are created, one to continuously +read incoming data and one to send application data. The data is split and encrypted +into frames of 64 KiB (the maximum size of a Noise package) or less. Failure of either +task results in the termination of both and a new connect task is created to +re-establish the connection. + +### Heartbeats and latency measurements + +In addition to application data, a network node periodically sends a PING frame and +expects a PONG frame. When a PONG is received the embedded timestamp is used to +measure the network RTT. In addition, whenever a PING frame has been sent, a countdown +timer is started (if not already running) which will cause the connection to be dropped +if finished. Any data that is subsequently received will stop the countdown. This +mechanism is used like a heartbeat to ensure the remote peer is alive and responding. + +### Channels + +Communication between the various tasks proceeds over MPSC (multi producer, single +consumer) channels. When application code wishes to send data, it sends them over +the channel to the main event loop, which will forward the data over another MPSC +channel to the respective write task. The capacity of every channel is bounded. +If the one the application uses is full, backpressure is exercised, i.e. the +application has to wait. This can happen for example, if no connection is available +for some time. The channel to an I/O write task is also bounded, but if full, the +oldest item will be dropped. + +[noise]: https://noiseprotocol.org/ diff --git a/crates/cliquenet/benches/bench1.rs b/crates/cliquenet/benches/bench1.rs new file mode 100644 index 00000000000..6a739d50b4d --- /dev/null +++ b/crates/cliquenet/benches/bench1.rs @@ -0,0 +1,196 @@ +use std::{collections::HashMap, io, net::Ipv4Addr, sync::LazyLock, time::Duration}; + +use cliquenet::{Address, Keypair, MAX_MESSAGE_SIZE, NetConf, NetworkError, PublicKey, Retry}; +use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; +#[cfg(feature = "metrics")] +use hotshot_types::traits::metrics::NoMetrics; +use rand::RngCore; +use tokio::{ + io::{AsyncReadExt, AsyncWriteExt}, + net::{TcpListener, TcpStream}, + runtime::Runtime, + time::sleep, +}; + +const A: u8 = 0; +const B: u8 = 1; + +const SIZES: &[usize] = &[ + 128 * 1024, + 512 * 1024, + 1024 * 1024, + 5 * 1024 * 1024, + MAX_MESSAGE_SIZE, +]; + +static DATA: LazyLock>> = LazyLock::new(|| { + let mut g = rand::rng(); + HashMap::from_iter(SIZES.iter().map(|n| { + let mut v = vec![0; *n]; + g.fill_bytes(&mut v); + (*n, v) + })) +}); + +async fn setup_tcp() -> (TcpStream, TcpStream) { + let l = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let a = l.local_addr().unwrap(); + let (a, b) = tokio::join!(l.accept(), TcpStream::connect(a)); + let a = a.unwrap().0; + let b = b.unwrap(); + a.set_nodelay(true).unwrap(); + b.set_nodelay(true).unwrap(); + (a, b) +} + +async fn setup_cliquenet() -> (Retry, Retry) { + let a = Keypair::generate().unwrap(); + let b = Keypair::generate().unwrap(); + + let all: [(u8, PublicKey, Address); 2] = [ + ( + A, + a.public_key(), + Address::from(( + Ipv4Addr::from([127, 0, 0, 1]), + portpicker::pick_unused_port().unwrap(), + )), + ), + ( + B, + b.public_key(), + Address::from(( + Ipv4Addr::from([127, 0, 0, 1]), + portpicker::pick_unused_port().unwrap(), + )), + ), + ]; + + let net_a = Retry::create({ + let cfg = NetConf::builder() + .name("bench") + .label(A) + .keypair(a) + .bind(all[0].2.clone()) + .parties(all.clone()); + #[cfg(not(feature = "metrics"))] + { + cfg.build() + } + #[cfg(feature = "metrics")] + { + cfg.metrics(Box::new(NoMetrics)).build() + } + }) + .await + .unwrap(); + + let net_b = Retry::create({ + let cfg = NetConf::builder() + .name("bench") + .label(B) + .keypair(b) + .bind(all[1].2.clone()) + .parties(all.clone()); + #[cfg(not(feature = "metrics"))] + { + cfg.build() + } + #[cfg(feature = "metrics")] + { + cfg.metrics(Box::new(NoMetrics)).build() + } + }) + .await + .unwrap(); + + (net_a, net_b) +} + +async fn tcp(size: usize, srv: &mut TcpStream, clt: &mut TcpStream) { + async fn echo_server(stream: &mut TcpStream) -> io::Result<()> { + let len = stream.read_u32().await?; + let mut v = vec![0; len as usize]; + stream.read_exact(&mut v).await?; + stream.write_u32(len).await?; + stream.write_all(&v).await + } + + async fn echo_client(stream: &mut TcpStream, d: Vec) -> io::Result<()> { + stream.write_u32(d.len() as u32).await?; + stream.write_all(&d).await?; + let len = stream.read_u32().await?; + assert_eq!(len as usize, d.len()); + let mut v = vec![0; len as usize]; + stream.read_exact(&mut v).await?; + assert_eq!(&*v, &*d); + Ok(()) + } + + let dat = DATA[&size].clone(); + let (ra, rb) = tokio::join!(echo_server(srv), echo_client(clt, dat)); + ra.unwrap(); + rb.unwrap(); +} + +async fn cliquenet(to: u8, size: usize, srv: &mut Retry, clt: &mut Retry) { + async fn echo_server(net: &mut Retry) -> Result<(), NetworkError> { + let (src, data) = net.receive().await?; + let _ = net.unicast(src, 0, data.into()).await?; + Ok(()) + } + + async fn echo_client(to: u8, net: &mut Retry, d: Vec) -> Result<(), NetworkError> { + let _ = net.unicast(to, 0, d.clone()).await?; + let (src, data) = net.receive().await?; + assert_eq!(src, to); + assert_eq!(&*data, &*d); + Ok(()) + } + + let dat = DATA[&size].clone(); + let fa = echo_server(srv); + let fb = echo_client(to, clt, dat); + let (ra, rb) = tokio::join!(fa, fb); + ra.unwrap(); + rb.unwrap(); +} + +fn bench_tcp(c: &mut Criterion) { + let rt = Runtime::new().unwrap(); + let (mut srv, mut clt) = rt.block_on(setup_tcp()); + let mut group = c.benchmark_group("tcp"); + for n in SIZES { + group + .throughput(Throughput::Bytes(*n as u64)) + .bench_with_input( + BenchmarkId::from_parameter(format!("{}k", n / 1024)), + n, + |b, n| b.iter(|| rt.block_on(tcp(*n, &mut srv, &mut clt))), + ); + } + group.finish(); +} + +fn bench_cliquenet(c: &mut Criterion) { + let rt = Runtime::new().unwrap(); + let (mut srv, mut clt) = rt.block_on(async { + let (a, b) = setup_cliquenet().await; + sleep(Duration::from_secs(3)).await; + (a, b) + }); + let mut group = c.benchmark_group("cliquenet"); + for n in SIZES { + group + .throughput(Throughput::Bytes(*n as u64)) + .bench_with_input( + BenchmarkId::from_parameter(format!("{}k", n / 1024)), + n, + |b, n| b.iter(|| rt.block_on(cliquenet(A, *n, &mut srv, &mut clt))), + ); + } + group.finish(); +} + +criterion_group!(benches, bench_tcp, bench_cliquenet); +criterion_main!(benches); diff --git a/crates/cliquenet/src/addr.rs b/crates/cliquenet/src/addr.rs new file mode 100644 index 00000000000..f468598555e --- /dev/null +++ b/crates/cliquenet/src/addr.rs @@ -0,0 +1,183 @@ +use std::{ + fmt, + net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}, +}; + +use serde::{Deserialize, Deserializer, Serialize, Serializer, de}; + +/// A network address. +/// +/// Either an IP address and port number or else a hostname and port number. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Address { + Inet(IpAddr, u16), + Name(String, u16), +} + +impl Address { + /// Get the port number of an address. + pub fn port(&self) -> u16 { + match self { + Self::Inet(_, p) => *p, + Self::Name(_, p) => *p, + } + } + + /// Set the address port. + pub fn set_port(&mut self, p: u16) { + match self { + Self::Inet(_, o) => *o = p, + Self::Name(_, o) => *o = p, + } + } + + pub fn with_port(mut self, p: u16) -> Self { + match self { + Self::Inet(ip, _) => self = Self::Inet(ip, p), + Self::Name(hn, _) => self = Self::Name(hn, p), + } + self + } + + pub fn with_offset(mut self, o: u16) -> Self { + match self { + Self::Inet(ip, p) => self = Self::Inet(ip, p + o), + Self::Name(hn, p) => self = Self::Name(hn, p + o), + } + self + } + + pub fn is_ip(&self) -> bool { + matches!(self, Self::Inet(..)) + } +} + +impl fmt::Display for Address { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Inet(a, p) => write!(f, "{a}:{p}"), + Self::Name(h, p) => write!(f, "{h}:{p}"), + } + } +} + +impl From<(&str, u16)> for Address { + fn from((h, p): (&str, u16)) -> Self { + Self::Name(h.to_string(), p) + } +} + +impl From<(String, u16)> for Address { + fn from((h, p): (String, u16)) -> Self { + Self::Name(h, p) + } +} + +impl From<(IpAddr, u16)> for Address { + fn from((ip, p): (IpAddr, u16)) -> Self { + Self::Inet(ip, p) + } +} + +impl From<(Ipv4Addr, u16)> for Address { + fn from((ip, p): (Ipv4Addr, u16)) -> Self { + Self::Inet(IpAddr::V4(ip), p) + } +} + +impl From<(Ipv6Addr, u16)> for Address { + fn from((ip, p): (Ipv6Addr, u16)) -> Self { + Self::Inet(IpAddr::V6(ip), p) + } +} + +impl From for Address { + fn from(a: SocketAddr) -> Self { + Self::Inet(a.ip(), a.port()) + } +} + +impl std::str::FromStr for Address { + type Err = InvalidAddress; + + fn from_str(s: &str) -> Result { + let parse = |a: &str, p: Option<&str>| { + let p: u16 = if let Some(p) = p { + p.parse().map_err(|_| InvalidAddress(()))? + } else { + 0 + }; + IpAddr::from_str(a) + .map(|a| Self::Inet(a, p)) + .or_else(|_| Ok(Self::Name(a.to_string(), p))) + }; + match s.rsplit_once(':') { + None => parse(s, None), + Some((a, p)) => parse(a, Some(p)), + } + } +} + +impl TryFrom<&str> for Address { + type Error = InvalidAddress; + + fn try_from(val: &str) -> Result { + val.parse() + } +} + +#[derive(Debug, Clone, thiserror::Error)] +#[error("invalid address")] +pub struct InvalidAddress(()); + +impl Serialize for Address { + fn serialize(&self, s: S) -> Result { + self.to_string().serialize(s) + } +} + +impl<'de> Deserialize<'de> for Address { + fn deserialize>(d: D) -> Result { + let s = String::deserialize(d)?; + let a = s.parse().map_err(de::Error::custom)?; + Ok(a) + } +} + +#[cfg(test)] +mod tests { + use std::net::IpAddr; + + use super::Address; + + #[test] + fn test_parse() { + let a: Address = "127.0.0.1:1234".parse().unwrap(); + let Address::Inet(a, p) = a else { + unreachable!() + }; + assert_eq!(IpAddr::from([127, 0, 0, 1]), a); + assert_eq!(1234, p); + + let a: Address = "::1:1234".parse().unwrap(); + let Address::Inet(a, p) = a else { + unreachable!() + }; + assert_eq!("::1".parse::().unwrap(), a); + assert_eq!(1234, p); + + let a: Address = "localhost:1234".parse().unwrap(); + let Address::Name(h, p) = a else { + unreachable!() + }; + assert_eq!("localhost", &h); + assert_eq!(1234, p); + + let a: Address = "sub.domain.com:1234".parse().unwrap(); + let Address::Name(h, p) = a else { + unreachable!() + }; + assert_eq!("sub.domain.com", &h); + assert_eq!(1234, p); + } +} diff --git a/crates/cliquenet/src/chan.rs b/crates/cliquenet/src/chan.rs new file mode 100644 index 00000000000..93a440cfb08 --- /dev/null +++ b/crates/cliquenet/src/chan.rs @@ -0,0 +1,101 @@ +//! A channel implementation that keeps only a single copy of an item, as +//! identified by its Id. + +use std::{collections::VecDeque, sync::Arc}; + +use nohash_hasher::IntSet; +use parking_lot::Mutex; +use tokio::sync::Notify; + +use crate::Id; + +#[derive(Debug)] +pub struct Sender(Arc>); + +#[derive(Debug)] +pub struct Receiver(Arc>); + +#[derive(Debug)] +struct Chan { + /// Channel capacity. + cap: usize, + /// Notifier for receivers that are waiting for items. + sig: Notify, + /// The items currently in flight. + buf: Mutex>, +} + +#[derive(Debug)] +struct Buf { + /// Ordered queue of items. + xs: VecDeque<(Option, T)>, + /// The set of Ids in the queue. + ids: IntSet, +} + +impl Clone for Sender { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +pub fn channel(cap: usize) -> (Sender, Receiver) { + let chan = Arc::new(Chan { + cap, + sig: Notify::new(), + buf: Mutex::new(Buf { + xs: VecDeque::new(), + ids: IntSet::default(), + }), + }); + (Sender(chan.clone()), Receiver(chan)) +} + +impl Sender { + pub fn send(&self, id: Option, val: T) { + if let Some(id) = id { + let mut buf = self.0.buf.lock(); + if buf.ids.contains(&id) { + return; + } + if buf.xs.len() == self.0.cap + && let Some((Some(id), _)) = buf.xs.pop_front() + { + buf.ids.remove(&id); + } + buf.xs.push_back((Some(id), val)); + buf.ids.insert(id); + } else { + let mut buf = self.0.buf.lock(); + if buf.xs.len() == self.0.cap + && let Some((Some(id), _)) = buf.xs.pop_front() + { + buf.ids.remove(&id); + } + buf.xs.push_back((None, val)); + } + self.0.sig.notify_waiters(); + } + + pub fn capacity(&self) -> usize { + self.0.cap + } +} + +impl Receiver { + pub async fn recv(&self) -> Option { + loop { + let future = self.0.sig.notified(); + { + let mut buf = self.0.buf.lock(); + if let Some((id, val)) = buf.xs.pop_front() { + if let Some(id) = id { + buf.ids.remove(&id); + } + return Some(val); + } + } + future.await; + } + } +} diff --git a/crates/cliquenet/src/error.rs b/crates/cliquenet/src/error.rs new file mode 100644 index 00000000000..d12d8a705d5 --- /dev/null +++ b/crates/cliquenet/src/error.rs @@ -0,0 +1,53 @@ +use std::io; + +use thiserror::Error; + +use crate::{Address, frame::InvalidHeader}; + +/// The empty type has no values. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) enum Empty {} + +/// The various errors that can occur during networking. +#[derive(Debug, Error)] +pub enum NetworkError { + /// Generic I/O error. + #[error("i/o error: {0}")] + Io(#[from] io::Error), + + /// Bind error. + #[error("error binding to address {0}: {1}")] + Bind(Address, #[source] io::Error), + + /// The received frame header is not valid. + #[error("invalid frame header: {0}")] + InvalidFrameHeader(#[from] InvalidHeader), + + /// The received frame has an unknown type. + #[error("unknown frame type: {0}")] + UnknownFrameType(u8), + + /// Generic Noise error. + #[error("noise error: {0}")] + Noise(#[from] snow::Error), + + /// The Noise handshake message is not valid. + #[error("invalid handshake message")] + InvalidHandshakeMessage, + + /// The total message size exceeds the allowed maximum. + #[error("message too large")] + MessageTooLarge, + + /// An MPSC channel is unexpectedly closed. + #[error("channel closed")] + ChannelClosed, + + /// A receive budget has unexpectedly closed. + #[error("receive budget closed")] + BudgetClosed, + + /// An operation timed out. + #[error("timeout")] + Timeout, +} diff --git a/crates/cliquenet/src/frame.rs b/crates/cliquenet/src/frame.rs new file mode 100644 index 00000000000..d64dceaf5a2 --- /dev/null +++ b/crates/cliquenet/src/frame.rs @@ -0,0 +1,195 @@ +//! # Frame header +//! +//! The unit of data exchanged over the network is called a `Frame` and consists of +//! a 4-byte header and a body of variable size. The header has the following +//! structure: +//! +//! ```text +//! 0 1 2 3 +//! 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +//! +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +//! | | |P| | | +//! |Version| Type |a| Reserved | Payload length | +//! | | |r| | | +//! | | |t| | | +//! +-------+-------+-+-------------+-------------------------------+ +//! ``` +//! +//! where +//! +//! - Version (4 bits) +//! - Type (4 bits) +//! - Data (0) +//! - Ping (1) +//! - Pong (2) +//! - Partial (1 bit) +//! - Reserved (7 bits) +//! - Payload length (16 bits) +//! +//! If the partial bit is set, the frame is only a part of the message and the read task +//! will assemble all frames to produce the final message. The maximum total message size +//! is capped to 5 MiB. + +use std::fmt; + +/// The header of a frame. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Header(u32); + +impl Header { + pub const SIZE: usize = 4; + + /// Create a data header with the given payload length. + pub fn data(len: u16) -> Self { + Self(len as u32) + } + + /// Create a ping header with the given payload length. + pub fn ping(len: u16) -> Self { + Self(0x1000000 | len as u32) + } + + /// Create a pong header with the given payload length. + pub fn pong(len: u16) -> Self { + Self(0x2000000 | len as u32) + } + + /// The type of the frame following this header. + pub fn frame_type(self) -> Result { + match (self.0 & 0xF000000) >> 24 { + 0 => Ok(Type::Data), + 1 => Ok(Type::Ping), + 2 => Ok(Type::Pong), + t => Err(t as u8), + } + } + + /// Set the partial flag to indicate that more frames follow. + pub fn partial(self) -> Self { + Self(self.0 | 0x800000) + } + + /// Is this a data frame header? + pub fn is_data(self) -> bool { + self.0 & 0xF000000 == 0 + } + + /// Is this a ping frame header? + pub fn is_ping(self) -> bool { + self.0 & 0xF000000 == 0x1000000 + } + + /// Is this a pong frame header? + pub fn is_pong(self) -> bool { + self.0 & 0xF000000 == 0x2000000 + } + + /// Is this a partial frame? + pub fn is_partial(self) -> bool { + self.0 & 0x800000 == 0x800000 + } + + /// Get the payload length. + pub fn len(self) -> u16 { + (self.0 & 0xFFFF) as u16 + } + + /// Convert this header into a byte array. + pub fn to_bytes(self) -> [u8; Self::SIZE] { + self.0.to_be_bytes() + } +} + +/// The type of a frame. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Type { + Data, + Ping, + Pong, +} + +impl From
for [u8; Header::SIZE] { + fn from(val: Header) -> Self { + val.to_bytes() + } +} + +impl TryFrom<&[u8]> for Header { + type Error = InvalidHeader; + + fn try_from(val: &[u8]) -> Result { + let n = <[u8; Self::SIZE]>::try_from(val) + .map_err(|_| InvalidHeader("4-byte slice required"))?; + Ok(Self(u32::from_be_bytes(n))) + } +} + +impl TryFrom<[u8; Header::SIZE]> for Header { + type Error = InvalidHeader; + + fn try_from(val: [u8; Self::SIZE]) -> Result { + Ok(Self(u32::from_be_bytes(val))) + } +} + +impl fmt::Display for Header { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Header") + .field("type", &self.frame_type()) + .field("len", &self.len()) + .field("partial", &self.is_partial()) + .finish() + } +} + +#[derive(Debug, thiserror::Error)] +#[error("invalid header: {0}")] +pub struct InvalidHeader(&'static str); + +#[cfg(test)] +mod tests { + use quickcheck::quickcheck; + + use super::{Header, Type}; + + quickcheck! { + fn data(len: u16) -> bool { + let hdr = Header::data(len); + hdr.is_data() && !hdr.is_partial() && hdr.frame_type() == Ok(Type::Data) + } + + fn ping(len: u16) -> bool { + let hdr = Header::ping(len); + hdr.is_ping() && !hdr.is_partial() && hdr.frame_type() == Ok(Type::Ping) + } + + fn pong(len: u16) -> bool { + let hdr = Header::pong(len); + hdr.is_pong() && !hdr.is_partial() && hdr.frame_type() == Ok(Type::Pong) + } + + fn partial_data(len: u16) -> bool { + Header::data(len).partial().is_partial() + } + + fn partial_ping(len: u16) -> bool { + Header::ping(len).partial().is_partial() + } + + fn partial_pong(len: u16) -> bool { + Header::pong(len).partial().is_partial() + } + + fn data_len(len: u16) -> bool { + Header::data(len).len() == len + } + + fn ping_len(len: u16) -> bool { + Header::ping(len).len() == len + } + + fn pong_len(len: u16) -> bool { + Header::pong(len).len() == len + } + } +} diff --git a/crates/cliquenet/src/id.rs b/crates/cliquenet/src/id.rs new file mode 100644 index 00000000000..4dac4ec3376 --- /dev/null +++ b/crates/cliquenet/src/id.rs @@ -0,0 +1,22 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Id(pub(crate) u64); + +impl From for Id { + fn from(n: u64) -> Self { + Self(n) + } +} + +impl From for u64 { + fn from(n: Id) -> Self { + n.0 + } +} + +impl std::hash::Hash for Id { + fn hash(&self, h: &mut H) { + h.write_u64(self.0) + } +} + +impl nohash_hasher::IsEnabled for Id {} diff --git a/crates/cliquenet/src/lib.rs b/crates/cliquenet/src/lib.rs new file mode 100644 index 00000000000..bac64c764cc --- /dev/null +++ b/crates/cliquenet/src/lib.rs @@ -0,0 +1,104 @@ +mod addr; +mod chan; +mod error; +mod frame; +mod id; +mod net; +mod time; +mod x25519; + +#[cfg(feature = "metrics")] +mod metrics; + +pub mod retry; + +use std::sync::Arc; + +pub use addr::{Address, InvalidAddress}; +use bon::Builder; +pub use error::NetworkError; +#[cfg(feature = "metrics")] +use hotshot_types::traits::metrics::Metrics; +pub use id::Id; +pub use net::Network; +pub use retry::Retry; +use tokio::sync::Semaphore; +pub use x25519::{ + InvalidKeypair, InvalidPublicKey, InvalidSecretKey, Keypair, PublicKey, SecretKey, +}; + +/// Max. number of bytes for a message (potentially consisting of several frames). +pub const MAX_MESSAGE_SIZE: usize = 8 * 1024 * 1024; + +const NUM_DELAYS: usize = 5; +const LAST_DELAY: usize = NUM_DELAYS - 1; + +/// Network peer role. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Role { + /// Active peers receive broadcast messages. + Active, + /// Passive peers are excluded from broadcasts. + /// + /// Note however that passive peers can be addressed directly in + /// unicast or multicast operations. + Passive, +} + +impl Role { + pub fn is_active(self) -> bool { + matches!(self, Self::Active) + } +} + +#[derive(Debug, Builder)] +pub struct NetConf { + /// Network name. + name: &'static str, + + /// Network public key. + label: K, + + /// DH keypair + keypair: Keypair, + + /// Address to bind to. + bind: Address, + + /// Committee members with key material and bind address. + #[builder(with = <_>::from_iter)] + parties: Vec<(K, PublicKey, Address)>, + + /// Total egress channel capacity. + #[builder(default = 64 * parties.len())] + total_capacity_egress: usize, + + /// Total ingress channel capacity. + #[builder(default = 32 * parties.len())] + total_capacity_ingress: usize, + + /// Egress channel capacity per peer. + #[builder(default = 64)] + peer_capacity_egress: usize, + + /// Ingress channel capacity per peer. + #[builder(default = 32)] + peer_capacity_ingress: usize, + + /// Max. number of bytes per message to send or receive. + #[builder(default = MAX_MESSAGE_SIZE)] + max_message_size: usize, + + /// Default retry delays in seconds. + #[builder(default = [1, 3, 5, 15, 30])] + retry_delays: [u8; NUM_DELAYS], + + #[cfg(feature = "metrics")] + metrics: Box, +} + +impl NetConf { + fn new_budget(&self) -> Arc { + Arc::new(Semaphore::new(self.peer_capacity_ingress)) + } +} diff --git a/crates/cliquenet/src/metrics.rs b/crates/cliquenet/src/metrics.rs new file mode 100644 index 00000000000..41402902f5b --- /dev/null +++ b/crates/cliquenet/src/metrics.rs @@ -0,0 +1,164 @@ +use std::{collections::HashMap, fmt::Display, hash::Hash, sync::Arc, time::Duration}; + +use hotshot_types::traits::metrics::{Counter, CounterFamily, Gauge, GaugeFamily, Metrics}; + +const CONNECT_ATTEMPTS: &str = "connect_attempts"; +const LATENCY: &str = "latency_ms"; +const PEER_OQUEUE_CAP: &str = "peer_oqueue_cap"; +const PEER_IQUEUE_CAP: &str = "peer_iqueue_cap"; + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct NetworkMetrics { + pub connections: Box, + pub iqueue: Box, + pub oqueue: Box, + peer_counter_fams: HashMap<&'static str, Arc>, + peer_gauge_fams: HashMap<&'static str, Arc>, + connects: HashMap>, + latencies: HashMap>, + peer_oqueues: HashMap>, + peer_iqueues: HashMap>, +} + +impl NetworkMetrics +where + K: Display + Eq + Hash + Clone, +{ + pub fn new

(label: &str, metrics: &dyn Metrics, parties: P) -> Self + where + P: IntoIterator, + { + let group = metrics.subgroup(format!("cliquenet_{label})")); + + let peers = vec!["peers".into()]; + + let mut cf: HashMap<&'static str, Arc> = HashMap::new(); + cf.insert( + CONNECT_ATTEMPTS, + group + .counter_family(CONNECT_ATTEMPTS.into(), peers.clone()) + .into(), + ); + + let mut gf: HashMap<&'static str, Arc> = HashMap::new(); + gf.insert( + LATENCY, + group.gauge_family(LATENCY.into(), peers.clone()).into(), + ); + gf.insert( + PEER_OQUEUE_CAP, + group + .gauge_family(PEER_OQUEUE_CAP.into(), peers.clone()) + .into(), + ); + gf.insert( + PEER_IQUEUE_CAP, + group.gauge_family(PEER_IQUEUE_CAP.into(), peers).into(), + ); + + let connects = peer_counters(&*cf[CONNECT_ATTEMPTS], parties); + + Self { + connections: group.create_gauge("connections".into(), None), + iqueue: group.create_gauge("iqueue_cap".into(), None), + oqueue: group.create_gauge("oqueue_cap".into(), None), + latencies: peer_gauges(&*gf[LATENCY], connects.keys().cloned()), + peer_oqueues: peer_gauges(&*gf[PEER_OQUEUE_CAP], connects.keys().cloned()), + peer_iqueues: peer_gauges(&*gf[PEER_IQUEUE_CAP], connects.keys().cloned()), + connects, + peer_counter_fams: cf, + peer_gauge_fams: gf, + } + } + + pub fn add_connect_attempt(&self, k: &K) { + if let Some(c) = self.connects.get(k) { + c.add(1) + } + } + + pub fn set_latency(&self, k: &K, d: Duration) { + if let Some(g) = self.latencies.get(k) { + g.set(d.as_millis() as usize) + } + } + + pub fn set_peer_oqueue_cap(&self, k: &K, n: usize) { + if let Some(g) = self.peer_oqueues.get(k) { + g.set(n) + } + } + + pub fn set_peer_iqueue_cap(&self, k: &K, n: usize) { + if let Some(g) = self.peer_iqueues.get(k) { + g.set(n) + } + } + + pub fn add_parties

(&mut self, parties: P) + where + P: IntoIterator, + { + for k in parties { + if !self.connects.contains_key(&k) { + let c = self.peer_counter_fams[CONNECT_ATTEMPTS].create(vec![k.to_string()]); + self.connects.insert(k.clone(), c); + } + if !self.latencies.contains_key(&k) { + let g = self.peer_gauge_fams[LATENCY].create(vec![k.to_string()]); + self.latencies.insert(k.clone(), g); + } + if !self.peer_oqueues.contains_key(&k) { + let g = self.peer_gauge_fams[PEER_OQUEUE_CAP].create(vec![k.to_string()]); + self.peer_oqueues.insert(k.clone(), g); + } + if !self.peer_iqueues.contains_key(&k) { + let g = self.peer_gauge_fams[PEER_IQUEUE_CAP].create(vec![k.to_string()]); + self.peer_iqueues.insert(k, g); + } + } + } + + pub fn remove_parties<'a, P>(&mut self, parties: P) + where + P: IntoIterator, + K: 'a, + { + // TODO: Counters and gauges should be de-registered. + for k in parties { + self.connects.remove(k); + self.latencies.remove(k); + self.peer_oqueues.remove(k); + self.peer_iqueues.remove(k); + } + } +} + +fn peer_counters(fam: &dyn CounterFamily, peers: P) -> HashMap> +where + P: IntoIterator, + K: Display + Eq + Hash + Clone, +{ + peers + .into_iter() + .map(|k| { + let c = fam.create(vec![k.to_string()]); + (k, c) + }) + .collect() +} + +fn peer_gauges(fam: &dyn GaugeFamily, peers: P) -> HashMap> +where + P: IntoIterator, + K: Display + Eq + Hash + Clone, +{ + peers + .into_iter() + .map(|k| { + let c = fam.create(vec![k.to_string()]); + (k, c) + }) + .collect() +} diff --git a/crates/cliquenet/src/net.rs b/crates/cliquenet/src/net.rs new file mode 100644 index 00000000000..b1925cf0e64 --- /dev/null +++ b/crates/cliquenet/src/net.rs @@ -0,0 +1,1226 @@ +#![doc = include_str!("../README.md")] + +use std::{ + collections::HashMap, + fmt::Display, + future::pending, + hash::Hash, + iter::{once, repeat}, + sync::Arc, + time::Duration, +}; + +use bimap::BiHashMap; +use bytes::{Bytes, BytesMut}; +use parking_lot::Mutex; +use snow::{Builder, HandshakeState, TransportState}; +use tokio::{ + io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}, + net::{TcpListener, TcpStream}, + spawn, + sync::{ + Mutex as AsyncMutex, OwnedSemaphorePermit, Semaphore, + mpsc::{self, Receiver, Sender}, + }, + task::{self, AbortHandle, JoinHandle, JoinSet}, + time::{Interval, MissedTickBehavior, sleep, timeout}, +}; +use tracing::{debug, error, info, trace, warn}; + +#[cfg(feature = "metrics")] +use crate::metrics::NetworkMetrics; +use crate::{ + Address, Id, Keypair, LAST_DELAY, NUM_DELAYS, NetConf, NetworkError, PublicKey, Role, chan, + error::Empty, + frame::{Header, Type}, + time::{Countdown, Timestamp}, +}; + +type Budget = Arc; +type Result = std::result::Result; + +/// Max. message size using noise handshake. +const MAX_NOISE_HANDSHAKE_SIZE: usize = 1024; + +/// Max. message size using noise protocol. +const MAX_NOISE_MESSAGE_SIZE: usize = 64 * 1024; + +/// Max. number of bytes for payload data. +const MAX_PAYLOAD_SIZE: usize = MAX_NOISE_MESSAGE_SIZE - 32; + +/// Noise parameters to initialize the builders. +const NOISE_PARAMS: &str = "Noise_IK_25519_AESGCM_BLAKE2s"; + +/// Interval between ping protocol. +const PING_INTERVAL: Duration = Duration::from_secs(15); + +/// Max. allowed duration of a single TCP connect attempt. +const CONNECT_TIMEOUT: Duration = Duration::from_secs(30); + +/// Max. allowed duration of a Noise handshake. +const HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(10); + +/// Max. allowed duration to wait for a peer to answer. +/// +/// This is started when we have sent a ping. Unless we receive +/// some data back within this duration, the connection times +/// out and is dropped. +const REPLY_TIMEOUT: Duration = Duration::from_secs(30); + +/// `Network` is the API facade of this crate. +#[derive(Debug)] +pub struct Network { + /// Name of this network. + name: &'static str, + + /// Log label. + label: K, + + /// The network participants. + parties: Mutex>, + + /// MPSC sender of server task instructions. + tx: Sender>, + + /// MPSC receiver of messages from a remote party. + /// + /// The public key identifies the remote. + rx: AsyncMutex)>>, + + /// Handle of the server task that has been spawned by `Network`. + srv: JoinHandle>, + + /// Max. number of bytes per message. + max_message_size: usize, +} + +impl Drop for Network { + fn drop(&mut self) { + self.srv.abort() + } +} + +/// Server task instructions. +#[derive(Debug)] +pub(crate) enum Command { + /// Add the given peers. + Add(Vec<(K, PublicKey, Address)>), + /// Remove the given peers. + Remove(Vec), + /// Assign a `Role` to the given peers. + Assign(Role, Vec), + /// Send a message to one peer. + Unicast(K, Option, Bytes), + /// Send a message to some peers. + Multicast(Vec, Option, Bytes), + /// Send a message to all peers with `Role::Active`. + Broadcast(Option, Bytes), +} + +/// The `Server` is accepting connections and also establishing and +/// maintaining connections with all parties. +#[derive(Debug)] +struct Server { + conf: NetConf, + + /// This server's role. + role: Role, + + /// MPSC sender for messages received over a connection to a party. + /// + /// (see `Network` for the accompanying receiver). + ibound: Sender<(K, Bytes, Option)>, + + /// MPSC receiver for server task instructions. + /// + /// (see `Network` for the accompanying sender). + obound: Receiver>, + + /// All parties of the network and their addresses. + peers: HashMap, + + /// Bi-directional mapping of signing key and X25519 keys to identify + /// remote parties. + index: BiHashMap, + + /// Find the public key given a tokio task ID. + task2key: HashMap, + + /// Currently active connect attempts. + connecting: HashMap, + + /// Currently active connections (post handshake). + active: HashMap, + + /// Tasks performing a handshake with a remote party. + handshake_tasks: JoinSet>, + + /// Tasks connecting to a remote party and performing a handshake. + connect_tasks: JoinSet<(TcpStream, TransportState)>, + + /// Active I/O tasks, exchanging data with remote parties. + io_tasks: JoinSet>, + + /// Interval at which to ping peers. + ping_interval: Interval, + + /// For gathering network metrics. + #[cfg(feature = "metrics")] + metrics: Arc>, +} + +#[derive(Debug)] +struct Peer { + addr: Address, + role: Role, + budget: Budget, +} + +/// A connect task. +#[derive(Debug)] +struct ConnectTask { + h: AbortHandle, +} + +// Make sure the task is stopped when `ConnectTask` is dropped. +impl Drop for ConnectTask { + fn drop(&mut self) { + self.h.abort(); + } +} + +/// An I/O task, reading data from and writing data to a remote party. +#[derive(Debug)] +struct IoTask { + /// Abort handle of the read-half of the connection. + rh: AbortHandle, + + /// Abort handle of the write-half of the connection. + wh: AbortHandle, + + /// MPSC sender of outgoing messages to the remote. + tx: chan::Sender, +} + +// Make sure all tasks are stopped when `IoTask` is dropped. +impl Drop for IoTask { + fn drop(&mut self) { + self.rh.abort(); + self.wh.abort(); + } +} + +/// Unify the various data types we want to send to the writer task. +#[derive(Debug)] +enum Message { + Data(Bytes), + Ping(Timestamp), + Pong(Timestamp), +} + +impl Network +where + K: Eq + Ord + Clone + Display + Hash + Send + Sync + 'static, +{ + pub async fn create(cfg: NetConf) -> Result { + let listener = TcpListener::bind(cfg.bind.to_string()) + .await + .map_err(|e| NetworkError::Bind(cfg.bind.clone(), e))?; + + debug!( + name = %cfg.name, + node = %cfg.label, + addr = %listener.local_addr()?, + "listening" + ); + + let mut parties = HashMap::new(); + let mut peers = HashMap::new(); + let mut index = BiHashMap::new(); + + for (k, x, a) in cfg.parties.iter().cloned() { + parties.insert(k.clone(), Role::Active); + index.insert(k.clone(), x); + peers.insert( + k, + Peer { + addr: a, + role: Role::Active, + budget: cfg.new_budget(), + }, + ); + } + + // Command channel from application to network. + let (otx, orx) = mpsc::channel(cfg.total_capacity_egress); + + // Channel of messages from peers to the application. + let (itx, irx) = mpsc::channel(cfg.total_capacity_ingress); + + let mut interval = tokio::time::interval(PING_INTERVAL); + interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + + let name = cfg.name; + let label = cfg.label.clone(); + let mmsze = cfg.max_message_size; + + #[cfg(feature = "metrics")] + let metrics = { + let it = parties.keys().filter(|k| **k != label).cloned(); + NetworkMetrics::new(name, &*cfg.metrics, it) + }; + + let server = Server { + conf: cfg, + role: Role::Active, + ibound: itx, + obound: orx, + peers, + index, + connecting: HashMap::new(), + active: HashMap::new(), + task2key: HashMap::new(), + handshake_tasks: JoinSet::new(), + connect_tasks: JoinSet::new(), + io_tasks: JoinSet::new(), + ping_interval: interval, + #[cfg(feature = "metrics")] + metrics: Arc::new(metrics), + }; + + Ok(Self { + name, + label, + parties: Mutex::new(parties), + rx: AsyncMutex::new(irx), + tx: otx, + srv: spawn(server.run(listener)), + max_message_size: mmsze, + }) + } + + pub fn public_key(&self) -> &K { + &self.label + } + + pub fn name(&self) -> &str { + self.name + } + + pub fn parties(&self, r: Role) -> Vec { + self.parties + .lock() + .iter() + .filter(|&(_, x)| r == *x) + .map(|(k, _)| k.clone()) + .collect() + } + + /// Send a message to a party, identified by the given public key. + pub async fn unicast(&self, to: K, msg: Bytes) -> Result<()> { + if msg.len() > self.max_message_size { + warn!( + name = %self.name, + node = %self.label, + to = %to, + len = %msg.len(), + max = %self.max_message_size, + "message too large to send" + ); + return Err(NetworkError::MessageTooLarge); + } + self.tx + .send(Command::Unicast(to, None, msg)) + .await + .map_err(|_| NetworkError::ChannelClosed) + } + + /// Send a message to all parties. + pub async fn broadcast(&self, msg: Bytes) -> Result<()> { + if msg.len() > self.max_message_size { + warn!( + name = %self.name, + node = %self.label, + len = %msg.len(), + max = %self.max_message_size, + "message too large to broadcast" + ); + return Err(NetworkError::MessageTooLarge); + } + self.tx + .send(Command::Broadcast(None, msg)) + .await + .map_err(|_| NetworkError::ChannelClosed) + } + + /// Receive a message from a remote party. + pub async fn receive(&self) -> Result<(K, Bytes)> { + let mut rx = self.rx.lock().await; + let (k, b, _) = rx.recv().await.ok_or(NetworkError::ChannelClosed)?; + Ok((k, b)) + } + + /// Add the given peers to the network. + /// + /// NB that peers added here are passive. See `Network::assign` for + /// giving peers a different `Role`. + pub async fn add(&self, peers: Vec<(K, PublicKey, Address)>) -> Result<()> { + self.parties + .lock() + .extend(peers.iter().map(|(p, ..)| (p.clone(), Role::Passive))); + self.tx + .send(Command::Add(peers)) + .await + .map_err(|_| NetworkError::ChannelClosed) + } + + /// Remove the given peers from the network. + pub async fn remove(&self, peers: Vec) -> Result<()> { + { + let mut parties = self.parties.lock(); + for p in &peers { + parties.remove(p); + } + } + self.tx + .send(Command::Remove(peers)) + .await + .map_err(|_| NetworkError::ChannelClosed) + } + + /// Assign the given role to the given peers. + pub async fn assign(&self, r: Role, peers: Vec) -> Result<()> { + { + let mut parties = self.parties.lock(); + for p in &peers { + if let Some(role) = parties.get_mut(p) { + *role = r + } + } + } + self.tx + .send(Command::Assign(r, peers)) + .await + .map_err(|_| NetworkError::ChannelClosed) + } + + /// Get a clone of the MPSC sender. + pub(crate) fn sender(&self) -> Sender> { + self.tx.clone() + } +} + +impl Server +where + K: Eq + Ord + Clone + Display + Hash + Send + Sync + 'static, +{ + /// Runs the main loop of this network node. + /// + /// This function: + /// + /// - Tries to connect to each remote peer in the committee. + /// - Handles tasks that have been completed or terminated. + /// - Processes new messages we received on the network. + async fn run(mut self, listener: TcpListener) -> Result { + self.handshake_tasks.spawn(pending()); + self.io_tasks.spawn(pending()); + + // Connect to all peers. + for k in self + .peers + .keys() + .filter(|k| **k != self.conf.label) + .cloned() + .collect::>() + { + self.spawn_connect(k) + } + + loop { + trace!( + name = %self.conf.name, + node = %self.conf.label, + active = %self.active.len(), + connects = %self.connect_tasks.len(), + handshakes = %self.handshake_tasks.len().saturating_sub(1), // -1 for `pending()` + io_tasks = %self.io_tasks.len().saturating_sub(1), // -1 for `pending()` + tasks_ids = %self.task2key.len(), + iqueue = %self.ibound.capacity(), + oqueue = %self.obound.capacity(), + ); + + #[cfg(feature = "metrics")] + { + self.metrics.iqueue.set(self.ibound.capacity()); + self.metrics.oqueue.set(self.obound.capacity()); + } + + tokio::select! { + // Accepted a new connection. + i = listener.accept() => match i { + Ok((s, a)) => { + debug!( + name = %self.conf.name, + node = %self.conf.label, + addr = %a, + "accepted connection" + ); + self.spawn_handshake(s) + } + Err(e) => { + warn!( + name = %self.conf.name, + node = %self.conf.label, + err = %e, + "error accepting connection" + ) + } + }, + // The handshake of an inbound connection completed. + Some(h) = self.handshake_tasks.join_next() => match h { + Ok(Ok((s, t))) => { + let Some((k, peer)) = self.lookup_peer(&t) else { + info!( + name = %self.conf.name, + node = %self.conf.label, + peer = ?t.get_remote_static().and_then(|k| PublicKey::try_from(k).ok()), + addr = ?s.peer_addr().ok(), + "unknown peer" + ); + continue + }; + if !self.is_valid_ip(&k, &s) { + warn!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + addr = ?s.peer_addr().ok(), "invalid peer ip addr" + ); + continue + } + // We only accept connections whose party has a public key that + // is larger than ours, or if we do not have a connection for + // that key at the moment. + if k > self.conf.label || !self.active.contains_key(&k) { + self.spawn_io(k, s, t, peer.budget.clone()) + } else { + debug!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "dropping accepted connection" + ); + } + } + Ok(Err(e)) => { + warn!( + name = %self.conf.name, + node = %self.conf.label, + err = %e, + "handshake failed" + ) + } + Err(e) => { + if !e.is_cancelled() { + error!( + name = %self.conf.name, + node = %self.conf.label, + err = %e, + "handshake task panic" + ) + } + } + }, + // One of our connection attempts completed. + Some(tt) = self.connect_tasks.join_next_with_id() => { + match tt { + Ok((id, (s, t))) => { + self.on_connect_task_end(id); + let Some((k, peer)) = self.lookup_peer(&t) else { + warn!( + name = %self.conf.name, + node = %self.conf.label, + peer = ?t.get_remote_static().and_then(|k| PublicKey::try_from(k).ok()), + addr = ?s.peer_addr().ok(), + "connected to unknown peer" + ); + continue + }; + // We only keep the connection if our key is larger than the remote, + // or if we do not have a connection for that key at the moment. + if k < self.conf.label || !self.active.contains_key(&k) { + self.spawn_io(k, s, t, peer.budget.clone()) + } else { + debug!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "dropping new connection" + ) + } + } + Err(e) => { + if !e.is_cancelled() { + error!( + name = %self.conf.name, + node = %self.conf.label, + err = %e, + "connect task panic" + ) + } + self.on_connect_task_end(e.id()); + } + } + }, + // A read or write task completed. + Some(io) = self.io_tasks.join_next_with_id() => { + match io { + Ok((id, r)) => { + if let Err(e) = r { + warn!( + name = %self.conf.name, + node = %self.conf.label, + err = %e, + "i/o error" + ) + } + self.on_io_task_end(id); + } + Err(e) => { + if e.is_cancelled() { + // If one half completes we cancel the other, so there is + // nothing else to do here, except to remove the cancelled + // tasks's ID. Same if we kill the connection, both tasks + // get cancelled. + self.task2key.remove(&e.id()); + continue + } + // If the task has not been cancelled, it must have panicked. + error!( + name = %self.conf.name, + node = %self.conf.label, + err = %e, + "i/o task panic" + ); + self.on_io_task_end(e.id()) + } + }; + }, + cmd = self.obound.recv() => match cmd { + Some(Command::Add(peers)) => { + #[cfg(feature = "metrics")] + Arc::make_mut(&mut self.metrics).add_parties(peers.iter().map(|(k, ..)| k).cloned()); + for (k, x, a) in peers { + if self.peers.contains_key(&k) { + warn!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "peer to add already exists" + ); + continue + } + info!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "adding peer" + ); + let p = Peer { + addr: a, + role: Role::Passive, + budget: self.conf.new_budget() + }; + self.peers.insert(k.clone(), p); + self.index.insert(k.clone(), x); + self.spawn_connect(k) + } + } + Some(Command::Remove(peers)) => { + for k in &peers { + info!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "removing peer" + ); + self.peers.remove(k); + self.index.remove_by_left(k); + self.connecting.remove(k); + self.active.remove(k); + } + #[cfg(feature = "metrics")] + Arc::make_mut(&mut self.metrics).remove_parties(&peers) + } + Some(Command::Assign(role, peers)) => { + for k in &peers { + if let Some(p) = self.peers.get_mut(k) { + p.role = role + } else { + warn!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + role = ?role, + "peer to assign role to not found" + ); + } + } + } + Some(Command::Unicast(to, id, m)) => { + if to == self.conf.label { + trace!( + name = %self.conf.name, + node = %self.conf.label, + to = %to, + len = %m.len(), + queue = self.ibound.capacity(), + "sending message" + ); + if let Err(err) = self.ibound.try_send((self.conf.label.clone(), m, None)) { + warn!( + name = %self.conf.name, + node = %self.conf.label, + err = %err, + cap = %self.ibound.capacity(), + "channel full => dropping unicast message" + ) + } + continue + } + if let Some(task) = self.active.get(&to) { + trace!( + name = %self.conf.name, + node = %self.conf.label, + to = %to, + len = %m.len(), + queue = task.tx.capacity(), + "sending message" + ); + #[cfg(feature = "metrics")] + self.metrics.set_peer_oqueue_cap(&to, task.tx.capacity()); + task.tx.send(id, Message::Data(m)) + } + } + Some(Command::Multicast(peers, id, m)) => { + if peers.contains(&self.conf.label) { + trace!( + name = %self.conf.name, + node = %self.conf.label, + to = %self.conf.label, + len = %m.len(), + queue = self.ibound.capacity(), + "sending message" + ); + if let Err(err) = self.ibound.try_send((self.conf.label.clone(), m.clone(), None)) { + warn!( + name = %self.conf.name, + node = %self.conf.label, + err = %err, + cap = %self.ibound.capacity(), + "channel full => dropping multicast message" + ) + } + } + for (to, task) in &self.active { + if !peers.contains(to) { + continue + } + trace!( + name = %self.conf.name, + node = %self.conf.label, + to = %to, + len = %m.len(), + queue = task.tx.capacity(), + "sending message" + ); + #[cfg(feature = "metrics")] + self.metrics.set_peer_oqueue_cap(to, task.tx.capacity()); + task.tx.send(id, Message::Data(m.clone())) + } + } + Some(Command::Broadcast(id, m)) => { + if self.role.is_active() { + trace!( + name = %self.conf.name, + node = %self.conf.label, + to = %self.conf.label, + len = %m.len(), + queue = self.ibound.capacity(), + "sending message" + ); + if let Err(err) = self.ibound.try_send((self.conf.label.clone(), m.clone(), None)) { + warn!( + name = %self.conf.name, + node = %self.conf.label, + err = %err, + cap = %self.ibound.capacity(), + "channel full => dropping broadcast message" + ) + } + } + for (to, task) in &self.active { + if Some(Role::Active) != self.peers.get(to).map(|p| p.role) { + continue + } + trace!( + name = %self.conf.name, + node = %self.conf.label, + to = %to, + len = %m.len(), + queue = task.tx.capacity(), + "sending message" + ); + #[cfg(feature = "metrics")] + self.metrics.set_peer_oqueue_cap(to, task.tx.capacity()); + task.tx.send(id, Message::Data(m.clone())) + } + } + None => { + return Err(NetworkError::ChannelClosed) + } + }, + _ = self.ping_interval.tick() => { + let now = Timestamp::now(); + for task in self.active.values() { + task.tx.send(None, Message::Ping(now)) + } + } + } + } + } + + /// Handles a completed connect task. + fn on_connect_task_end(&mut self, id: task::Id) { + let Some(k) = self.task2key.remove(&id) else { + error!(name = %self.conf.name, node = %self.conf.label, "no key for connect task"); + return; + }; + self.connecting.remove(&k); + } + + /// Handles a completed I/O task. + /// + /// This function will get the public key of the task that was terminated + /// and then cleanly removes the associated I/O task data and re-connects + /// to the peer node it was interacting with. + fn on_io_task_end(&mut self, id: task::Id) { + let Some(k) = self.task2key.remove(&id) else { + error!(name = %self.conf.name, node = %self.conf.label, "no key for i/o task"); + return; + }; + let Some(task) = self.active.get(&k) else { + return; + }; + if task.rh.id() == id { + debug!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "read-half closed => dropping connection" + ); + self.active.remove(&k); + self.spawn_connect(k) + } else if task.wh.id() == id { + debug!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "write-half closed => dropping connection" + ); + self.active.remove(&k); + self.spawn_connect(k) + } else { + debug!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "i/o task was previously replaced" + ); + } + } + + /// Spawns a new connection task to a peer identified by public key. + /// + /// This function will look up the x25519 public key of the ed25519 key + /// and the remote address and then spawn a connection task. + fn spawn_connect(&mut self, k: K) { + if self.connecting.contains_key(&k) { + debug!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + "connect task already started" + ); + return; + } + let x = self.index.get_by_left(&k).expect("known public key"); + let p = self.peers.get(&k).expect("known peer"); + let h = self.connect_tasks.spawn(connect( + self.conf.name, + (self.conf.label.clone(), self.conf.keypair.clone()), + (k.clone(), *x), + p.addr.clone(), + self.conf.retry_delays, + #[cfg(feature = "metrics")] + self.metrics.clone(), + )); + assert!(self.task2key.insert(h.id(), k.clone()).is_none()); + self.connecting.insert(k, ConnectTask { h }); + } + + /// Spawns a new `Noise` responder handshake task using the IK pattern. + /// + /// This function will create the responder handshake machine using its + /// own private key and then spawn a task that awaits an initiator handshake + /// to which it will respond. + fn spawn_handshake(&mut self, s: TcpStream) { + let h = Builder::new(NOISE_PARAMS.parse().expect("valid noise params")) + .local_private_key(&self.conf.keypair.secret_key().as_bytes()) + .expect("valid private key") + .prologue(self.conf.name.as_bytes()) + .expect("1st time we set the prologue") + .build_responder() + .expect("valid noise params yield valid handshake state"); + self.handshake_tasks.spawn(async move { + timeout(HANDSHAKE_TIMEOUT, on_handshake(h, s)) + .await + .or(Err(NetworkError::Timeout))? + }); + } + + /// Spawns a new I/O task for handling communication with a remote peer over + /// a TCP connection using the noise framework to create an authenticated + /// secure link. + fn spawn_io(&mut self, k: K, s: TcpStream, t: TransportState, b: Budget) { + debug!( + name = %self.conf.name, + node = %self.conf.label, + peer = %k, + addr = ?s.peer_addr().ok(), + "starting i/o tasks" + ); + let (to_remote, from_remote) = chan::channel(self.conf.peer_capacity_egress); + let (r, w) = s.into_split(); + let t1 = Arc::new(Mutex::new(t)); + let t2 = t1.clone(); + let ibound = self.ibound.clone(); + let to_write = to_remote.clone(); + let countdown = Countdown::new(); + let rh = self.io_tasks.spawn(recv_loop( + self.conf.name, + k.clone(), + r, + t1, + ibound, + to_write, + #[cfg(feature = "metrics")] + self.metrics.clone(), + b, + countdown.clone(), + self.conf.max_message_size, + )); + let wh = self + .io_tasks + .spawn(send_loop(w, t2, from_remote, countdown)); + assert!(self.task2key.insert(rh.id(), k.clone()).is_none()); + assert!(self.task2key.insert(wh.id(), k.clone()).is_none()); + let io = IoTask { + rh, + wh, + tx: to_remote, + }; + self.active.insert(k, io); + #[cfg(feature = "metrics")] + self.metrics.connections.set(self.active.len()); + } + + /// Get the public key of a party by their static X25519 public key. + fn lookup_peer(&self, t: &TransportState) -> Option<(K, &Peer)> { + let x = t.get_remote_static()?; + let x = PublicKey::try_from(x).ok()?; + let k = self.index.get_by_right(&x)?; + self.peers.get(k).map(|p| (k.clone(), p)) + } + + /// Check if the socket's peer IP address corresponds to the configured one. + fn is_valid_ip(&self, k: &K, s: &TcpStream) -> bool { + self.peers + .get(k) + .map(|p| { + let Address::Inet(ip, _) = p.addr else { + return true; + }; + Some(ip) == s.peer_addr().ok().map(|a| a.ip()) + }) + .unwrap_or(false) + } +} + +/// Connect to the given socket address. +/// +/// This function will only return, when a connection has been established and the handshake +/// has been completed. +async fn connect( + name: &'static str, + this: (K, Keypair), + to: (K, PublicKey), + addr: Address, + delays: [u8; NUM_DELAYS], + #[cfg(feature = "metrics")] metrics: Arc>, +) -> (TcpStream, TransportState) +where + K: Eq + Hash + Display + Clone, +{ + use rand::prelude::*; + + let new_handshake_state = || { + Builder::new(NOISE_PARAMS.parse().expect("valid noise params")) + .local_private_key(this.1.secret_key().as_slice()) + .expect("valid private key") + .remote_public_key(to.1.as_slice()) + .expect("valid remote pub key") + .prologue(name.as_bytes()) + .expect("1st time we set the prologue") + .build_initiator() + .expect("valid noise params yield valid handshake state") + }; + + let delays = once(rand::rng().random_range(0..=1000)) + .chain(delays.into_iter().map(|d| u64::from(d) * 1000)) + .chain(repeat(u64::from(delays[LAST_DELAY]) * 1000)); + + let addr = addr.to_string(); + + for d in delays { + sleep(Duration::from_millis(d)).await; + debug!(%name, node = %this.0, peer = %to.0, %addr, "connecting"); + #[cfg(feature = "metrics")] + metrics.add_connect_attempt(&to.0); + match timeout(CONNECT_TIMEOUT, TcpStream::connect(&addr)).await { + Ok(Ok(s)) => { + if let Err(err) = s.set_nodelay(true) { + error!(%name, node = %this.0, %err, "failed to set NO_DELAY socket option"); + continue; + } + match timeout(HANDSHAKE_TIMEOUT, handshake(new_handshake_state(), s)).await { + Ok(Ok(x)) => { + debug!(%name, node = %this.0, peer = %to.0, %addr, "connection established"); + return x; + }, + Ok(Err(err)) => { + warn!(%name, node = %this.0, peer = %to.0, %addr, %err, "handshake failure"); + }, + Err(_) => { + warn!(%name, node = %this.0, peer = %to.0, %addr, "handshake timeout"); + }, + } + }, + Ok(Err(err)) => { + warn!(%name, node = %this.0, peer = %to.0, %addr, %err, "failed to connect"); + }, + Err(_) => { + warn!(%name, node = %this.0, peer = %to.0, %addr, "connect timeout"); + }, + } + } + + unreachable!("for loop repeats forever") +} + +/// Perform a noise handshake as initiator with the remote party. +async fn handshake( + mut hs: HandshakeState, + mut stream: TcpStream, +) -> Result<(TcpStream, TransportState)> { + let mut b = vec![0; MAX_NOISE_HANDSHAKE_SIZE]; + let n = hs.write_message(&[], &mut b[Header::SIZE..])?; + let h = Header::data(n as u16); + send_frame(&mut stream, h, &mut b[..Header::SIZE + n]).await?; + let (h, m) = recv_frame(&mut stream).await?; + if !h.is_data() || h.is_partial() { + return Err(NetworkError::InvalidHandshakeMessage); + } + hs.read_message(&m, &mut b)?; + Ok((stream, hs.into_transport_mode()?)) +} + +/// Perform a noise handshake as responder with a remote party. +async fn on_handshake( + mut hs: HandshakeState, + mut stream: TcpStream, +) -> Result<(TcpStream, TransportState)> { + stream.set_nodelay(true)?; + let (h, m) = recv_frame(&mut stream).await?; + if !h.is_data() || h.is_partial() { + return Err(NetworkError::InvalidHandshakeMessage); + } + let mut b = vec![0; MAX_NOISE_HANDSHAKE_SIZE]; + hs.read_message(&m, &mut b)?; + let n = hs.write_message(&[], &mut b[Header::SIZE..])?; + let h = Header::data(n as u16); + send_frame(&mut stream, h, &mut b[..Header::SIZE + n]).await?; + Ok((stream, hs.into_transport_mode()?)) +} + +/// Read messages from the remote by assembling frames together. +/// +/// Once complete the message will be handed over to the given MPSC sender. +#[allow(clippy::too_many_arguments)] +async fn recv_loop( + name: &'static str, + id: K, + mut reader: R, + state: Arc>, + to_deliver: Sender<(K, Bytes, Option)>, + to_writer: chan::Sender, + #[cfg(feature = "metrics")] metrics: Arc>, + budget: Arc, + mut countdown: Countdown, + max_message_size: usize, +) -> Result<()> +where + R: AsyncRead + Unpin, + K: Eq + Hash + Display + Clone, +{ + let mut buf = vec![0; MAX_NOISE_MESSAGE_SIZE]; + loop { + #[cfg(feature = "metrics")] + metrics.set_peer_iqueue_cap(&id, budget.available_permits()); + let permit = budget + .clone() + .acquire_owned() + .await + .map_err(|_| NetworkError::BudgetClosed)?; + let mut msg = BytesMut::new(); + loop { + tokio::select! { + val = recv_frame(&mut reader) => { + countdown.stop(); + match val { + Ok((h, f)) => { + match h.frame_type() { + Ok(Type::Ping) => { + // Received ping message; sending pong to writer + let n = state.lock().read_message(&f, &mut buf)?; + if let Some(ping) = Timestamp::try_from_slice(&buf[..n]) { + to_writer.send(None, Message::Pong(ping)) + } + } + Ok(Type::Pong) => { + // Received pong message; measure elapsed time + let _n = state.lock().read_message(&f, &mut buf)?; + #[cfg(feature = "metrics")] + if let Some(ping) = Timestamp::try_from_slice(&buf[.._n]) + && let Some(delay) = Timestamp::now().diff(ping) + { + metrics.set_latency(&id, delay) + } + } + Ok(Type::Data) => { + let n = state.lock().read_message(&f, &mut buf)?; + msg.extend_from_slice(&buf[..n]); + if !h.is_partial() { + break; + } + if msg.len() > max_message_size { + return Err(NetworkError::MessageTooLarge); + } + } + Err(t) => return Err(NetworkError::UnknownFrameType(t)), + } + } + Err(e) => return Err(e) + } + }, + () = &mut countdown => { + warn!(%name, node = %id, "timeout waiting for peer"); + return Err(NetworkError::Timeout) + } + } + } + if to_deliver + .send((id.clone(), msg.freeze(), Some(permit))) + .await + .is_err() + { + break; + } + } + Ok(()) +} + +/// Consume messages to be delivered to remote parties and send them. +/// +/// The function automatically splits large messages into chunks that fit into +/// a noise package. +async fn send_loop( + mut writer: W, + state: Arc>, + rx: chan::Receiver, + countdown: Countdown, +) -> Result<()> +where + W: AsyncWrite + Unpin, +{ + let mut buf = vec![0; MAX_NOISE_MESSAGE_SIZE]; + + while let Some(msg) = rx.recv().await { + match msg { + Message::Ping(ping) => { + let n = state + .lock() + .write_message(&ping.to_bytes()[..], &mut buf[Header::SIZE..])?; + let h = Header::ping(n as u16); + send_frame(&mut writer, h, &mut buf[..Header::SIZE + n]).await?; + countdown.start(REPLY_TIMEOUT) + }, + Message::Pong(pong) => { + let n = state + .lock() + .write_message(&pong.to_bytes()[..], &mut buf[Header::SIZE..])?; + let h = Header::pong(n as u16); + send_frame(&mut writer, h, &mut buf[..Header::SIZE + n]).await? + }, + Message::Data(msg) => { + let mut it = msg.chunks(MAX_PAYLOAD_SIZE).peekable(); + while let Some(m) = it.next() { + let n = state.lock().write_message(m, &mut buf[Header::SIZE..])?; + let h = if it.peek().is_some() { + Header::data(n as u16).partial() + } else { + Header::data(n as u16) + }; + send_frame(&mut writer, h, &mut buf[..Header::SIZE + n]).await? + } + }, + } + } + Ok(()) +} + +/// Read a single frame (header + payload) from the remote. +async fn recv_frame(r: &mut R) -> Result<(Header, Vec)> +where + R: AsyncRead + Unpin, +{ + let b = r.read_u32().await?; + let h = Header::try_from(b.to_be_bytes())?; + let mut v = vec![0; h.len().into()]; + r.read_exact(&mut v).await?; + Ok((h, v)) +} + +/// Write a single frame (header + payload) to the remote. +/// +/// The header is serialised into the first 4 bytes of `msg`. It is the +/// caller's responsibility to ensure there is room at the beginning. +async fn send_frame(w: &mut W, hdr: Header, msg: &mut [u8]) -> Result<()> +where + W: AsyncWrite + Unpin, +{ + debug_assert!(msg.len() <= MAX_NOISE_MESSAGE_SIZE); + msg[..Header::SIZE].copy_from_slice(&hdr.to_bytes()); + w.write_all(msg).await?; + Ok(()) +} diff --git a/crates/cliquenet/src/retry.rs b/crates/cliquenet/src/retry.rs new file mode 100644 index 00000000000..3e0400b6e50 --- /dev/null +++ b/crates/cliquenet/src/retry.rs @@ -0,0 +1,429 @@ +use std::{ + collections::BTreeMap, + convert::Infallible, + fmt::{self, Display}, + hash::Hash, + sync::{ + Arc, + atomic::{AtomicU64, Ordering}, + }, +}; + +use bytes::{Bytes, BytesMut}; +use nohash_hasher::IntMap; +use parking_lot::Mutex; +use tokio::{ + spawn, + sync::mpsc::{Sender, error::TrySendError}, + task::JoinHandle, + time::{self, Duration, Instant}, +}; +use tracing::warn; + +use crate::{ + Address, Id, NUM_DELAYS, NetConf, Network, NetworkError, PublicKey, Role, net::Command, +}; + +type Result = std::result::Result; + +/// Max. bucket number. +pub const MAX_BUCKET: Bucket = Bucket(u64::MAX); + +/// `Retry` wraps a [`Network`] and returns acknowledgements to senders. +/// +/// It also retries messages until either an acknowledgement has been received +/// or client code has indicated that the messages are no longer of interest +/// by invoking `Retry::gc`. +/// +/// Each message that is sent has a trailer appended that contains the bucket +/// number and ID of the message. Receivers will send this trailer back. The +/// sender then stops retrying the corresponding message. +/// +/// Note that if malicious parties modify the trailer and have it point to a +/// different message, they can only remove themselves from the set of parties +/// the sender is expecting an acknowledgement from. +#[derive(Debug, Clone)] +pub struct Retry { + inner: Arc>, +} + +#[derive(Debug)] +struct Inner { + this: K, + net: Network, + sender: Sender>, + id: AtomicU64, + buffer: Buffer, + retry: JoinHandle, + pending: Mutex>>, +} + +impl Drop for Retry { + fn drop(&mut self) { + self.inner.retry.abort() + } +} + +/// Buckets conceptionally contain messages. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Bucket(u64); + +/// Messages are associated with IDs and put into buckets. +/// +/// Bucket numbers are given to us by clients which also garbage collect +/// explicitly by specifying the bucket up to which to remove messages. +/// Buckets often correspond to rounds elsewhere. +#[derive(Debug, Clone)] +#[allow(clippy::type_complexity)] +struct Buffer(Arc>>>>); + +impl Default for Buffer { + fn default() -> Self { + Self(Default::default()) + } +} + +#[derive(Debug)] +struct Message { + /// The message bytes to (re-)send. + data: Bytes, + /// The time we started sending this message. + time: Instant, + /// The number of times we have sent this message. + retries: usize, + /// The remaining number of parties that have to acknowledge the message. + remaining: Vec, +} + +/// Meta information appended at the end of a message. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +struct Trailer { + /// The bucket number the message corresponds to. + bucket: Bucket, + /// The message ID. + id: Id, +} + +/// Data we have received but could not acknowledge yet. +#[derive(Debug)] +struct Pending { + src: K, + data: Bytes, + trailer: Bytes, +} + +enum Target { + Single(K), + Multi(Vec), + All, +} + +impl Retry +where + K: Eq + Ord + Clone + Display + Hash + Send + Sync + 'static, +{ + pub async fn create(mut cfg: NetConf) -> Result { + cfg.max_message_size += Trailer::SIZE; + let delays = cfg.retry_delays; + let net = Network::create(cfg).await?; + let buffer = Buffer::default(); + let retry = spawn(retry(buffer.clone(), net.sender(), delays)); + Ok(Self { + inner: Arc::new(Inner { + this: net.public_key().clone(), + sender: net.sender(), + net, + buffer, + id: AtomicU64::new(0), + retry, + pending: Mutex::new(BTreeMap::new()), + }), + }) + } + + pub async fn broadcast(&self, b: B, data: Vec) -> Result + where + B: Into, + { + self.send(b.into(), Target::All, data).await + } + + pub async fn multicast(&self, to: Vec, b: B, data: Vec) -> Result + where + B: Into, + { + self.send(b.into(), Target::Multi(to), data).await + } + + pub async fn unicast(&self, to: K, b: B, data: Vec) -> Result + where + B: Into, + { + self.send(b.into(), Target::Single(to), data).await + } + + pub async fn add(&self, peers: Vec<(K, PublicKey, Address)>) -> Result<()> { + self.inner.net.add(peers).await + } + + pub async fn remove(&self, peers: Vec) -> Result<()> { + self.inner.net.remove(peers).await + } + + pub async fn assign(&self, r: Role, peers: Vec) -> Result<()> { + self.inner.net.assign(r, peers).await + } + + pub async fn receive(&self) -> Result<(K, Bytes)> { + let pending = self.inner.pending.lock().pop_first(); + if let Some((_, Pending { src, data, trailer })) = pending { + self.inner + .sender + .send(Command::Unicast(src.clone(), None, trailer.clone())) + .await + .map_err(|_| NetworkError::ChannelClosed)?; + return Ok((src, data)); + } + loop { + let (src, mut bytes) = self.inner.net.receive().await?; + + let Some((trailer, trailer_bytes)) = Trailer::from_bytes(&mut bytes) else { + warn!(node = %self.inner.this, "invalid trailer"); + continue; + }; + + if !bytes.is_empty() { + // Send the trailer back as acknowledgement: + match self + .inner + .sender + .try_send(Command::Unicast(src.clone(), None, trailer_bytes)) + { + Ok(()) => return Ok((src, bytes)), + Err(TrySendError::Closed(_)) => return Err(NetworkError::ChannelClosed), + Err(TrySendError::Full(Command::Unicast(src, _, trailer_bytes))) => { + // Save received data for cancellation safety: + self.inner.pending.lock().insert( + trailer, + Pending { + src: src.clone(), + data: bytes.clone(), + trailer: trailer_bytes.clone(), + }, + ); + self.inner + .sender + .send(Command::Unicast(src.clone(), None, trailer_bytes)) + .await + .map_err(|_| NetworkError::ChannelClosed)?; + self.inner.pending.lock().remove(&trailer); + return Ok((src, bytes)); + }, + Err(TrySendError::Full(_)) => { + unreachable!( + "We tried sending a Command::Unicast so this is what we get back." + ) + }, + } + } + + let mut messages = self.inner.buffer.0.lock(); + + if let Some(buckets) = messages.get_mut(&trailer.bucket) + && let Some(m) = buckets.get_mut(&trailer.id) + { + m.remaining.retain(|k| *k != src); + if m.remaining.is_empty() { + buckets.remove(&trailer.id); + } + } + } + } + + pub fn gc>(&self, bucket: B) { + let bucket = bucket.into(); + self.inner.buffer.0.lock().retain(|b, _| *b >= bucket); + } + + pub fn rm>(&self, bucket: B, id: Id) { + let bucket = bucket.into(); + if let Some(messages) = self.inner.buffer.0.lock().get_mut(&bucket) { + messages.remove(&id); + } + } + + async fn send(&self, b: Bucket, to: Target, data: Vec) -> Result { + let id = self.next_id(); + + let trailer = Trailer { bucket: b, id }; + + let mut msg = BytesMut::from(Bytes::from(data)); + msg.extend_from_slice(&trailer.to_bytes()); + let msg = msg.freeze(); + + let now = Instant::now(); + + let rem = match to { + Target::Single(to) => { + self.inner + .sender + .send(Command::Unicast(to.clone(), Some(id), msg.clone())) + .await + .map_err(|_| NetworkError::ChannelClosed)?; + vec![to] + }, + Target::Multi(peers) => { + self.inner + .sender + .send(Command::Multicast(peers.clone(), Some(id), msg.clone())) + .await + .map_err(|_| NetworkError::ChannelClosed)?; + peers + }, + Target::All => { + self.inner + .sender + .send(Command::Broadcast(Some(id), msg.clone())) + .await + .map_err(|_| NetworkError::ChannelClosed)?; + self.inner.net.parties(Role::Active) + }, + }; + + self.inner.buffer.0.lock().entry(b).or_default().insert( + id, + Message { + data: msg, + time: now, + retries: 0, + remaining: rem, + }, + ); + + Ok(id) + } + + fn next_id(&self) -> Id { + Id::from(self.inner.id.fetch_add(1, Ordering::Relaxed)) + } +} + +async fn retry(buf: Buffer, net: Sender>, delays: [u8; NUM_DELAYS]) -> Infallible +where + K: Clone, +{ + let mut i = time::interval(Duration::from_secs(1)); + i.set_missed_tick_behavior(time::MissedTickBehavior::Skip); + + let mut buckets = Vec::new(); + let mut ids = Vec::new(); + + loop { + let now = i.tick().await; + + debug_assert!(buckets.is_empty()); + buckets.extend(buf.0.lock().keys().copied()); + + for b in buckets.drain(..) { + debug_assert!(ids.is_empty()); + ids.extend( + buf.0 + .lock() + .get(&b) + .into_iter() + .flat_map(|m| m.keys().copied()), + ); + + for id in ids.drain(..) { + let message; + let remaining; + + { + let mut buf = buf.0.lock(); + let Some(m) = buf.get_mut(&b).and_then(|m| m.get_mut(&id)) else { + continue; + }; + + let delay = delays + .get(m.retries) + .copied() + .or_else(|| delays.last().copied()) + .unwrap_or(30); + + if now.saturating_duration_since(m.time) < Duration::from_secs(delay.into()) { + continue; + } + + m.time = now; + m.retries = m.retries.saturating_add(1); + + message = m.data.clone(); + remaining = m.remaining.clone(); + } + + let _ = net + .send(Command::Multicast(remaining, Some(id), message.clone())) + .await; + } + } + } +} + +impl From for Bucket { + fn from(val: u64) -> Self { + Self(val) + } +} + +impl From for u64 { + fn from(val: Bucket) -> Self { + val.0 + } +} + +impl fmt::Display for Bucket { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +impl Trailer { + const SIZE: usize = 16; + + fn from_bytes(bytes: &mut Bytes) -> Option<(Self, Bytes)> { + if bytes.len() < Self::SIZE { + return None; + } + let slice = bytes.split_off(bytes.len() - Self::SIZE); + let both = u128::from_be_bytes(slice[..].try_into().ok()?); + let this = Self { + bucket: ((both >> 64) as u64).into(), + id: (both as u64).into(), + }; + Some((this, slice)) + } + + fn to_bytes(self) -> [u8; Self::SIZE] { + (u128::from(self.bucket.0) << 64 | u128::from(self.id.0)).to_be_bytes() + } +} + +#[cfg(test)] +mod tests { + use bytes::Bytes; + use quickcheck::quickcheck; + + use super::Trailer; + + quickcheck! { + fn to_from_bytes(b: u64, i: u64) -> bool { + let a = Trailer { + bucket: b.into(), + id: i.into() + }; + let mut bytes = Bytes::copy_from_slice(&a.to_bytes()); + let (b, _) = Trailer::from_bytes(&mut bytes).unwrap(); + a == b + } + } +} diff --git a/crates/cliquenet/src/time.rs b/crates/cliquenet/src/time.rs new file mode 100644 index 00000000000..14bdd97d691 --- /dev/null +++ b/crates/cliquenet/src/time.rs @@ -0,0 +1,173 @@ +use std::{ + future::Future, + pin::Pin, + sync::{Arc, LazyLock}, + task::{Context, Poll, Waker}, +}; + +use parking_lot::Mutex; +use tokio::time::{Duration, Instant, Sleep, sleep}; + +/// An unspecified epoch for use with `Timestamp`. +static EPOCH: LazyLock = LazyLock::new(Instant::now); + +/// Time measured as duration in µs since an unspecified epoch. +#[derive(Debug, Copy, Clone)] +pub struct Timestamp(u64); + +impl Timestamp { + pub fn now() -> Self { + Self(Instant::now().saturating_duration_since(*EPOCH).as_micros() as u64) + } + + pub fn from_bytes(bytes: [u8; 8]) -> Self { + Self(u64::from_be_bytes(bytes)) + } + + pub fn to_bytes(self) -> [u8; 8] { + self.0.to_be_bytes() + } + + pub fn try_from_slice(b: &[u8]) -> Option { + let bytes = b.try_into().ok()?; + Some(Self::from_bytes(bytes)) + } + + #[allow(unused)] + pub fn diff(self, other: Self) -> Option { + self.0.checked_sub(other.0).map(Duration::from_micros) + } +} + +/// A countdown timer that can be reset. +#[derive(Debug, Clone)] +pub struct Countdown { + inner: Arc>, +} + +#[derive(Debug)] +struct Inner { + // The actual future to await. + sleep: Option>>, + + // Is this countdown running? + // + // We could utilise the `sleep` `Option` for same purpose, and arguably + // it would be cleaner to use `Some` as the running state, and `None` as + // the opposite. However we would like to avoid the allocation every time + // the countdown is (re-)started, hence this flag. + stopped: bool, + + /// Waker to call when a stopped `Countdown` should be polled again. + waker: Option, +} + +impl Default for Countdown { + fn default() -> Self { + Self::new() + } +} + +impl Countdown { + /// Create a new countdown. + /// + /// When ready, use `Countdown::start` to begin. + pub fn new() -> Self { + Self { + inner: Arc::new(Mutex::new(Inner { + sleep: None, + stopped: true, + waker: None, + })), + } + } + + /// Start the countdown. + /// + /// Once started, a countdown can not be started again, unless + /// `Countdown::stop` is invoked first. + pub fn start(&self, timeout: Duration) { + let mut inner = self.inner.lock(); + if !inner.stopped { + // The countdown is already running. + return; + } + inner.stopped = false; + if let Some(sleep) = &mut inner.sleep { + sleep.as_mut().reset(Instant::now() + timeout) + } else { + inner.sleep = Some(Box::pin(sleep(timeout))) + } + if let Some(w) = inner.waker.take() { + w.wake() + } + } + + /// Stop this countdown. + pub fn stop(&self) { + self.inner.lock().stopped = true + } +} + +impl Future for Countdown { + type Output = (); + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let mut inner = self.inner.lock(); + if inner.stopped { + if let Some(w) = inner.waker.as_mut() { + // Update existing waker: + w.clone_from(cx.waker()) + } else { + inner.waker = Some(cx.waker().clone()) + } + return Poll::Pending; + } + debug_assert!(inner.waker.is_none()); + let sleep = inner.sleep.as_mut().expect("!stopped => sleep future"); + sleep.as_mut().poll(cx) + } +} + +#[cfg(test)] +mod tests { + use tokio::time::{Duration, Instant, sleep, timeout}; + + use super::{Countdown, Timestamp}; + + #[tokio::test] + async fn duration() { + let d = Duration::from_millis(50); + let a = Timestamp::now(); + sleep(d).await; + let b = Timestamp::now(); + let x = b.diff(a).unwrap(); + assert!(x - d < Duration::from_millis(5)) + } + + #[tokio::test] + async fn countdown() { + let mut c = Countdown::new(); + + let now = Instant::now(); + c.start(Duration::from_secs(1)); + (&mut c).await; + assert!(now.elapsed() >= Duration::from_secs(1)); + + // Once finished, the countdown stays finished: + let now = Instant::now(); + (&mut c).await; + assert!(now.elapsed() < Duration::from_millis(1)); + + // If stopped it does not end: + c.start(Duration::from_secs(1)); + c.stop(); + assert!(timeout(Duration::from_secs(2), &mut c).await.is_err()); + + // until started again: + c.start(Duration::from_secs(1)); + let now = Instant::now(); + (&mut c).await; + assert!(now.elapsed() >= Duration::from_secs(1)); + } +} diff --git a/crates/cliquenet/src/x25519.rs b/crates/cliquenet/src/x25519.rs new file mode 100644 index 00000000000..d5fabd5ff71 --- /dev/null +++ b/crates/cliquenet/src/x25519.rs @@ -0,0 +1,183 @@ +use std::{cmp::Ordering, fmt}; + +use ed25519_compact::x25519; + +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct Keypair { + pair: x25519::KeyPair, +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct PublicKey { + key: x25519::PublicKey, +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct SecretKey { + key: x25519::SecretKey, +} + +impl Keypair { + pub fn generate() -> Result { + let pair = x25519::KeyPair::generate(); + if pair.validate().is_err() { + return Err(InvalidKeypair(())); + } + Ok(Self { pair }) + } + + pub fn public_key(&self) -> PublicKey { + PublicKey { key: self.pair.pk } + } + + pub fn secret_key(&self) -> SecretKey { + SecretKey { + key: self.pair.sk.clone(), + } + } +} + +impl PublicKey { + pub fn as_bytes(&self) -> [u8; 32] { + *self.key + } + + pub fn as_slice(&self) -> &[u8] { + &self.key[..] + } +} + +impl SecretKey { + pub fn public_key(&self) -> PublicKey { + let key = self.key.recover_public_key().expect("valid public key"); + PublicKey { key } + } + + pub fn as_bytes(&self) -> [u8; 32] { + *self.key + } + + pub fn as_slice(&self) -> &[u8] { + &self.key[..] + } +} + +impl From for Keypair { + fn from(k: SecretKey) -> Self { + let p = k.public_key(); + Self { + pair: x25519::KeyPair { + sk: k.key, + pk: p.key, + }, + } + } +} + +impl From for PublicKey { + fn from(k: SecretKey) -> Self { + k.public_key() + } +} + +impl Ord for PublicKey { + fn cmp(&self, other: &Self) -> Ordering { + self.key[..].cmp(&other.key[..]) + } +} + +impl PartialOrd for PublicKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl fmt::Debug for SecretKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("SecretKey") + } +} + +impl fmt::Debug for Keypair { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Keypair") + .field("public_key", &self.public_key()) + .field("secret_key", &"SecretKey") + .finish() + } +} + +impl fmt::Debug for PublicKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", bs58::encode(&self.as_bytes()).into_string()) + } +} + +impl fmt::Display for PublicKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + ::fmt(self, f) + } +} + +impl TryFrom<&[u8]> for PublicKey { + type Error = InvalidPublicKey; + + fn try_from(value: &[u8]) -> Result { + let key = x25519::PublicKey::from_slice(value).map_err(|_| InvalidPublicKey(()))?; + Ok(Self { key }) + } +} + +impl TryFrom<&[u8]> for SecretKey { + type Error = InvalidSecretKey; + + fn try_from(s: &[u8]) -> Result { + let k = x25519::SecretKey::from_slice(s).map_err(|_| InvalidSecretKey(()))?; + if k.recover_public_key().is_err() { + return Err(InvalidSecretKey(())); + } + Ok(Self { key: k }) + } +} + +impl TryFrom<&str> for PublicKey { + type Error = InvalidPublicKey; + + fn try_from(s: &str) -> Result { + bs58::decode(s) + .into_vec() + .map_err(|_| InvalidPublicKey(())) + .and_then(|v| PublicKey::try_from(v.as_slice())) + } +} + +impl TryFrom<&str> for SecretKey { + type Error = InvalidSecretKey; + + fn try_from(s: &str) -> Result { + bs58::decode(s) + .into_vec() + .map_err(|_| InvalidSecretKey(())) + .and_then(|v| SecretKey::try_from(v.as_slice())) + } +} + +impl From<[u8; 32]> for SecretKey { + fn from(bytes: [u8; 32]) -> Self { + SecretKey { + key: x25519::SecretKey::new(bytes), + } + } +} + +#[derive(Debug, thiserror::Error)] +#[error("invalid keypair")] +pub struct InvalidKeypair(()); + +#[derive(Debug, thiserror::Error)] +#[error("invalid secret key")] +pub struct InvalidSecretKey(()); + +#[derive(Debug, thiserror::Error)] +#[error("invalid public key")] +pub struct InvalidPublicKey(()); diff --git a/crates/cliquenet/tests/frame-handling.rs b/crates/cliquenet/tests/frame-handling.rs new file mode 100644 index 00000000000..6aedc8702b6 --- /dev/null +++ b/crates/cliquenet/tests/frame-handling.rs @@ -0,0 +1,84 @@ +use std::{collections::HashMap, net::Ipv4Addr}; + +use bytes::Bytes; +use cliquenet::{Address, Keypair, NetConf, Retry}; +#[cfg(feature = "metrics")] +use hotshot_types::traits::metrics::NoMetrics; +use rand::RngCore; + +/// Send and receive messages of various sizes between 1 byte and 5 MiB. +#[tokio::test] +async fn multiple_frames() { + let _ = tracing_subscriber::fmt::try_init(); + + const PARTIES: u16 = 30; + + let parties = (0..PARTIES) + .map(|i| { + ( + i, + Keypair::generate().unwrap(), + Address::from((Ipv4Addr::LOCALHOST, 50000 + i)), + ) + }) + .collect::>(); + + let mut networks = HashMap::new(); + for (k, x, a) in parties.clone() { + networks.insert( + k, + Retry::create({ + let cfg = NetConf::builder() + .name("frames") + .keypair(x) + .label(k) + .bind(a) + .parties( + parties + .iter() + .map(|(i, x, a)| (*i, x.public_key(), a.clone())), + ); + #[cfg(not(feature = "metrics"))] + { + cfg.build() + } + #[cfg(feature = "metrics")] + { + cfg.metrics(Box::new(NoMetrics)).build() + } + }) + .await + .unwrap(), + ); + } + + let mut counters: HashMap> = HashMap::new(); + + for b in 0..10 { + for net in networks.values_mut() { + net.broadcast(b, gen_message()).await.unwrap(); + } + loop { + for (k, net) in &mut networks { + if counters.get(k).map(|m| m.len()).unwrap_or(0) == usize::from(PARTIES) { + continue; + } + let (_, data) = net.receive().await.unwrap(); + *counters.entry(*k).or_default().entry(data).or_default() += 1 + } + if counters.values().all(|m| m.len() == usize::from(PARTIES)) { + break; + } + } + for net in networks.values_mut() { + net.gc(b) + } + } +} + +fn gen_message() -> Vec { + let mut g = rand::rng(); + let mut v = vec![0; 5 * 1024 * 1024]; + g.fill_bytes(&mut v); + v +} diff --git a/crates/hotshot/example-types/src/membership/fetcher.rs b/crates/hotshot/example-types/src/membership/fetcher.rs index 85c46a04fe9..2de5daae65f 100644 --- a/crates/hotshot/example-types/src/membership/fetcher.rs +++ b/crates/hotshot/example-types/src/membership/fetcher.rs @@ -10,12 +10,15 @@ use anyhow::Context; use async_broadcast::{Receiver, RecvError}; use hotshot::traits::NodeImplementation; use hotshot_types::{ - data::Leaf2, + data::{Leaf2, ViewNumber}, event::{Event, EventType}, message::{Message, MessageKind}, traits::{ - block_contents::BlockHeader, network::ConnectedNetwork, node_implementation::NodeType, + block_contents::BlockHeader, + network::ConnectedNetwork, + node_implementation::{ConsensusTime, NodeType}, }, + vote::HasViewNumber, }; use tokio::task::JoinHandle; use vbs::{bincode_serializer::BincodeSerializer, version::StaticVersion, BinarySerializer}; @@ -34,7 +37,11 @@ pub type RecvMessageFn = std::sync::Arc BoxFuture<'static, anyhow::Result>> + Send + Sync>; pub type DirectMessageFn = std::sync::Arc< - dyn Fn(Vec, ::SignatureKey) -> BoxFuture<'static, anyhow::Result<()>> + dyn Fn( + ViewNumber, + Vec, + ::SignatureKey, + ) -> BoxFuture<'static, anyhow::Result<()>> + Send + Sync, >; @@ -46,11 +53,12 @@ pub struct NetworkFunctions { pub async fn direct_message_impl>( network: Arc<>::Network>, + view: ViewNumber, message: Vec, recipient: ::SignatureKey, ) -> anyhow::Result<()> { network - .direct_message(message, recipient.clone()) + .direct_message(view, message, recipient.clone()) .await .context(format!("Failed to send message to recipient {recipient}")) } @@ -58,9 +66,11 @@ pub async fn direct_message_impl>( pub fn direct_message_fn>( network: Arc<>::Network>, ) -> DirectMessageFn { - Arc::new(move |message, recipient| { + Arc::new(move |view, message, recipient| { let network = network.clone(); - Box::pin(direct_message_impl::(network, message, recipient)) + Box::pin(direct_message_impl::( + network, view, message, recipient, + )) }) } @@ -101,7 +111,7 @@ impl Leaf2Fetcher { loop { match network_receiver.recv_direct().await { Ok(Event { - view_number: _, + view_number: view, event: EventType::ExternalMessageReceived { sender: _, data }, }) => { let (requested_height, requester): (u64, TYPES::SignatureKey) = @@ -148,9 +158,12 @@ impl Leaf2Fetcher { BincodeSerializer::>::serialize(&leaf_response) .expect("Failed to serialize leaf response"); - if let Err(e) = - (network_functions.direct_message)(serialized_leaf_response, requester) - .await + if let Err(e) = (network_functions.direct_message)( + view.u64().into(), + serialized_leaf_response, + requester, + ) + .await { tracing::error!( "Failed to send leaf response in test membership fetcher: {e}, \ @@ -183,6 +196,7 @@ impl Leaf2Fetcher { .expect("Failed to serialize leaf request"), ), }; + let view = leaf_request.view_number(); let leaves: BTreeMap> = self .storage @@ -217,8 +231,12 @@ impl Leaf2Fetcher { BincodeSerializer::>::serialize(&leaf_request) .expect("Failed to serialize leaf request"); - if let Err(e) = - (self.network_functions.direct_message)(serialized_leaf_request, source).await + if let Err(e) = (self.network_functions.direct_message)( + view.u64().into(), + serialized_leaf_request, + source, + ) + .await { tracing::error!("Failed to send leaf request in test membership fetcher: {e}"); }; diff --git a/crates/hotshot/example-types/src/node_types.rs b/crates/hotshot/example-types/src/node_types.rs index b2e1057343e..818aae13da1 100644 --- a/crates/hotshot/example-types/src/node_types.rs +++ b/crates/hotshot/example-types/src/node_types.rs @@ -9,7 +9,7 @@ use std::{ }; use hotshot::traits::{ - implementations::{CombinedNetworks, Libp2pNetwork, MemoryNetwork, PushCdnNetwork}, + implementations::{Cliquenet, CombinedNetworks, Libp2pNetwork, MemoryNetwork, PushCdnNetwork}, NodeImplementation, }; use hotshot_types::{ @@ -290,6 +290,10 @@ pub struct MemoryImpl; #[derive(Clone, Debug, Deserialize, Serialize, Hash, Eq, PartialEq)] pub struct Libp2pImpl; +/// Cliquenet network implementation +#[derive(Clone, Debug, Deserialize, Serialize, Hash, Eq, PartialEq)] +pub struct CliquenetImpl; + /// Web server network implementation #[derive(Clone, Debug, Deserialize, Serialize, Hash, Eq, PartialEq)] pub struct WebImpl; @@ -318,6 +322,11 @@ impl NodeImplementation for Libp2pImpl { type Storage = TestStorage; } +impl NodeImplementation for CliquenetImpl { + type Network = Cliquenet; + type Storage = TestStorage; +} + #[derive(Clone, Debug, Copy)] pub struct TestVersions {} diff --git a/crates/hotshot/hotshot/Cargo.toml b/crates/hotshot/hotshot/Cargo.toml index a7e8ca5c1d2..57157129c47 100644 --- a/crates/hotshot/hotshot/Cargo.toml +++ b/crates/hotshot/hotshot/Cargo.toml @@ -27,10 +27,12 @@ async-trait = { workspace = true } bimap = "0.6" bincode = { workspace = true } blake3 = { workspace = true } +bytes = { workspace = true } cdn-broker = { workspace = true } cdn-client = { workspace = true } cdn-marshal = { workspace = true } chrono = { workspace = true } +cliquenet = { workspace = true, features = ["metrics"] } committable = { workspace = true } dashmap = { workspace = true } derive_more = { workspace = true } diff --git a/crates/hotshot/hotshot/src/lib.rs b/crates/hotshot/hotshot/src/lib.rs index e16646c0ec6..8af9e5681b3 100644 --- a/crates/hotshot/hotshot/src/lib.rs +++ b/crates/hotshot/hotshot/src/lib.rs @@ -621,6 +621,7 @@ impl, V: Versions> SystemContext { + net: Retry, +} + +impl Cliquenet { + pub async fn create( + name: &'static str, + key: T::SignatureKey, + keypair: Keypair, + addr: A, + parties: P, + metrics: M, + ) -> Result + where + A: Into

, + B: Into
, + P: IntoIterator, + M: Metrics + 'static, + { + let cfg = NetConf::builder() + .name(name) + .label(key) + .keypair(keypair) + .bind(addr.into()) + .parties(parties.into_iter().map(|(k, x, a)| (k, x, a.into()))) + .metrics(Box::new(metrics)) + .build(); + let net = Retry::create(cfg) + .await + .map_err(|e| NetworkError::ListenError(format!("cliquenet creation failed: {e}")))?; + Ok(Self { net }) + } +} + +pub fn derive_keypair(k: &K::PrivateKey) -> Keypair { + SecretKey::from(blake3::derive_key("cliquenet key", &k.to_bytes())).into() +} + +#[async_trait] +impl ConnectedNetwork for Cliquenet { + async fn broadcast_message( + &self, + v: ViewNumber, + m: Vec, + _: Topic, + _: BroadcastDelay, + ) -> Result<(), NetworkError> { + self.net.broadcast(*v, m).await.map_err(|e| { + NetworkError::MessageSendError(format!("cliquenet broadcast error: {e}")) + })?; + Ok(()) + } + + async fn da_broadcast_message( + &self, + v: ViewNumber, + m: Vec, + recipients: Vec, + _: BroadcastDelay, + ) -> Result<(), NetworkError> { + self.net.multicast(recipients, *v, m).await.map_err(|e| { + NetworkError::MessageSendError(format!("cliquenet da_broadcast error: {e}")) + })?; + Ok(()) + } + + async fn direct_message( + &self, + v: ViewNumber, + m: Vec, + recipient: T::SignatureKey, + ) -> Result<(), NetworkError> { + self.net + .unicast(recipient, *v, m) + .await + .map_err(|e| NetworkError::MessageSendError(format!("cliquenet unicast error: {e}")))?; + Ok(()) + } + + async fn recv_message(&self) -> Result, NetworkError> { + let (_src, data) = + self.net.receive().await.map_err(|e| { + NetworkError::MessageSendError(format!("cliquenet receive error: {e}")) + })?; + Ok(Vec::from(&data[..])) + } + + async fn update_view( + &self, + v: ViewNumber, + _: Option, + _: EpochMembershipCoordinator, + ) where + U: NodeType, + { + self.net.gc(*v) + } + + async fn wait_for_ready(&self) {} + + fn pause(&self) { + unimplemented!("Pausing not implemented for cliquenet"); + } + + fn resume(&self) { + unimplemented!("Resuming not implemented for cliquenet"); + } + + fn shut_down<'a, 'b>(&'a self) -> BoxSyncFuture<'b, ()> + where + 'a: 'b, + Self: 'b, + { + boxed_sync(ready(())) + } +} + +#[cfg(feature = "hotshot-testing")] +impl TestableNetworkingImplementation for Cliquenet { + fn generator( + expected_node_count: usize, + _num_bootstrap: usize, + _network_id: usize, + _da_committee_size: usize, + _reliability_config: Option>, + _secondary_network_delay: Duration, + ) -> AsyncGenerator> { + let mut parties = Vec::new(); + for i in 0..expected_node_count { + use std::net::Ipv4Addr; + + use cliquenet::Address; + + let secret = T::SignatureKey::generated_from_seed_indexed([0u8; 32], i as u64).1; + let public = T::SignatureKey::from_private(&secret); + let kpair = derive_keypair::<::SignatureKey>(&secret); + let port = portpicker::pick_unused_port().expect("an unused port is available"); + let addr = Address::Inet(Ipv4Addr::LOCALHOST.into(), port); + + parties.push((kpair, public, addr)); + } + + let parties = Arc::new(parties); + + Box::pin(move |i| { + let parties = parties.clone(); + let future = async move { + use hotshot_types::traits::metrics::NoMetrics; + + let (s, k, a) = &parties[i as usize]; + let it = parties + .iter() + .map(|(s, k, a)| (k.clone(), s.public_key(), a.clone())); + let net = Cliquenet::create("test", k.clone(), s.clone(), a.clone(), it, NoMetrics) + .await + .unwrap(); + Arc::new(net) + }; + Box::pin(future) + }) + } + + fn in_flight_message_count(&self) -> Option { + None + } +} diff --git a/crates/hotshot/hotshot/src/traits/networking/combined_network.rs b/crates/hotshot/hotshot/src/traits/networking/combined_network.rs index 622a937fad6..af0dcad8961 100644 --- a/crates/hotshot/hotshot/src/traits/networking/combined_network.rs +++ b/crates/hotshot/hotshot/src/traits/networking/combined_network.rs @@ -31,7 +31,7 @@ use hotshot_types::{ COMBINED_NETWORK_CACHE_SIZE, COMBINED_NETWORK_DELAY_DURATION, COMBINED_NETWORK_MIN_PRIMARY_FAILURES, COMBINED_NETWORK_PRIMARY_CHECK_INTERVAL, }, - data::ViewNumber, + data::{EpochNumber, ViewNumber}, epoch_membership::EpochMembershipCoordinator, traits::{ network::{BroadcastDelay, ConnectedNetwork, Topic}, @@ -350,6 +350,7 @@ impl ConnectedNetwork for CombinedNetworks async fn broadcast_message( &self, + view: ViewNumber, message: Vec, topic: Topic, broadcast_delay: BroadcastDelay, @@ -362,12 +363,12 @@ impl ConnectedNetwork for CombinedNetworks message, async move { primary - .broadcast_message(primary_message, topic, BroadcastDelay::None) + .broadcast_message(view, primary_message, topic, BroadcastDelay::None) .await }, async move { secondary - .broadcast_message(secondary_message, topic, BroadcastDelay::None) + .broadcast_message(view, secondary_message, topic, BroadcastDelay::None) .await }, broadcast_delay, @@ -377,6 +378,7 @@ impl ConnectedNetwork for CombinedNetworks async fn da_broadcast_message( &self, + view: ViewNumber, message: Vec, recipients: Vec, broadcast_delay: BroadcastDelay, @@ -390,12 +392,17 @@ impl ConnectedNetwork for CombinedNetworks message, async move { primary - .da_broadcast_message(primary_message, primary_recipients, BroadcastDelay::None) + .da_broadcast_message( + view, + primary_message, + primary_recipients, + BroadcastDelay::None, + ) .await }, async move { secondary - .da_broadcast_message(secondary_message, recipients, BroadcastDelay::None) + .da_broadcast_message(view, secondary_message, recipients, BroadcastDelay::None) .await }, broadcast_delay, @@ -405,6 +412,7 @@ impl ConnectedNetwork for CombinedNetworks async fn direct_message( &self, + view: ViewNumber, message: Vec, recipient: TYPES::SignatureKey, ) -> Result<(), NetworkError> { @@ -417,10 +425,14 @@ impl ConnectedNetwork for CombinedNetworks message, async move { primary - .direct_message(primary_message, primary_recipient) + .direct_message(view, primary_message, primary_recipient) + .await + }, + async move { + secondary + .direct_message(view, secondary_message, recipient) .await }, - async move { secondary.direct_message(secondary_message, recipient).await }, BroadcastDelay::None, ) .await @@ -428,7 +440,7 @@ impl ConnectedNetwork for CombinedNetworks async fn vid_broadcast_message( &self, - messages: HashMap>, + messages: HashMap)>, ) -> Result<(), NetworkError> { self.networks.0.vid_broadcast_message(messages).await } @@ -469,20 +481,20 @@ impl ConnectedNetwork for CombinedNetworks self.secondary().queue_node_lookup(view_number, pk) } - async fn update_view<'a, T>( - &'a self, - view: u64, - epoch: Option, + async fn update_view( + &self, + view: ViewNumber, + epoch: Option, membership: EpochMembershipCoordinator, ) where - T: NodeType + 'a, + T: NodeType, { let delayed_tasks_channels = Arc::clone(&self.delayed_tasks_channels); spawn(async move { let mut map_lock = delayed_tasks_channels.write().await; while let Some((first_view, _)) = map_lock.first_key_value() { // Broadcast a cancelling signal to all the tasks related to each view older than the new one - if *first_view < view { + if *first_view < *view { if let Some((_, (sender, _))) = map_lock.pop_first() { let _ = sender.try_broadcast(()); } else { diff --git a/crates/hotshot/hotshot/src/traits/networking/libp2p_network.rs b/crates/hotshot/hotshot/src/traits/networking/libp2p_network.rs index fc7dfca67fe..7ef8a5bdcce 100644 --- a/crates/hotshot/hotshot/src/traits/networking/libp2p_network.rs +++ b/crates/hotshot/hotshot/src/traits/networking/libp2p_network.rs @@ -51,7 +51,7 @@ use hotshot_types::traits::network::{ use hotshot_types::{ boxed_sync, constants::LOOK_AHEAD, - data::ViewNumber, + data::{EpochNumber, ViewNumber}, network::NetworkConfig, traits::{ metrics::{Counter, Gauge, Metrics, NoMetrics}, @@ -786,6 +786,7 @@ impl ConnectedNetwork for Libp2pNetwork { #[instrument(name = "Libp2pNetwork::broadcast_message", skip_all)] async fn broadcast_message( &self, + _: ViewNumber, message: Vec, topic: Topic, _broadcast_delay: BroadcastDelay, @@ -843,6 +844,7 @@ impl ConnectedNetwork for Libp2pNetwork { #[instrument(name = "Libp2pNetwork::da_broadcast_message", skip_all)] async fn da_broadcast_message( &self, + view: ViewNumber, message: Vec, recipients: Vec, _broadcast_delay: BroadcastDelay, @@ -864,7 +866,7 @@ impl ConnectedNetwork for Libp2pNetwork { let future_results = recipients .into_iter() - .map(|r| self.direct_message(message.clone(), r)); + .map(|r| self.direct_message(view, message.clone(), r)); let results = join_all(future_results).await; let errors: Vec<_> = results.into_iter().filter_map(|r| r.err()).collect(); @@ -879,6 +881,7 @@ impl ConnectedNetwork for Libp2pNetwork { #[instrument(name = "Libp2pNetwork::direct_message", skip_all)] async fn direct_message( &self, + _: ViewNumber, message: Vec, recipient: T::SignatureKey, ) -> Result<(), NetworkError> { @@ -988,16 +991,16 @@ impl ConnectedNetwork for Libp2pNetwork { /// So the logic with libp2p is to prefetch upcoming leaders libp2p address to /// save time when we later need to direct message the leader our vote. Hence the /// use of the future view and leader to queue the lookups. - async fn update_view<'a, TYPES>( - &'a self, - view: u64, - epoch: Option, + async fn update_view( + &self, + view: ViewNumber, + epoch: Option, membership_coordinator: EpochMembershipCoordinator, ) where - TYPES: NodeType + 'a, + TYPES: NodeType, { - let future_view = ::View::new(view) + LOOK_AHEAD; - let epoch = epoch.map(::Epoch::new); + let future_view = ::View::new(*view) + LOOK_AHEAD; + let epoch = epoch.map(|e| ::Epoch::new(*e)); let membership = match membership_coordinator.membership_for_epoch(epoch).await { Ok(m) => m, diff --git a/crates/hotshot/hotshot/src/traits/networking/memory_network.rs b/crates/hotshot/hotshot/src/traits/networking/memory_network.rs index 7d0e2ca5804..f7cb95b5bf2 100644 --- a/crates/hotshot/hotshot/src/traits/networking/memory_network.rs +++ b/crates/hotshot/hotshot/src/traits/networking/memory_network.rs @@ -23,6 +23,7 @@ use async_trait::async_trait; use dashmap::DashMap; use hotshot_types::{ boxed_sync, + data::ViewNumber, traits::{ network::{ AsyncGenerator, BroadcastDelay, ConnectedNetwork, TestableNetworkingImplementation, @@ -245,6 +246,7 @@ impl ConnectedNetwork for MemoryNetwork { #[instrument(name = "MemoryNetwork::broadcast_message")] async fn broadcast_message( &self, + _: ViewNumber, message: Vec, topic: Topic, _broadcast_delay: BroadcastDelay, @@ -295,6 +297,7 @@ impl ConnectedNetwork for MemoryNetwork { #[instrument(name = "MemoryNetwork::da_broadcast_message")] async fn da_broadcast_message( &self, + _: ViewNumber, message: Vec, recipients: Vec, _broadcast_delay: BroadcastDelay, @@ -347,7 +350,12 @@ impl ConnectedNetwork for MemoryNetwork { } #[instrument(name = "MemoryNetwork::direct_message")] - async fn direct_message(&self, message: Vec, recipient: K) -> Result<(), NetworkError> { + async fn direct_message( + &self, + _: ViewNumber, + message: Vec, + recipient: K, + ) -> Result<(), NetworkError> { // debug!(?message, ?recipient, "Sending direct message"); // Bincode the message trace!("Message bincoded, finding recipient"); diff --git a/crates/hotshot/hotshot/src/traits/networking/push_cdn_network.rs b/crates/hotshot/hotshot/src/traits/networking/push_cdn_network.rs index df0dc77130b..61e9efa5f49 100644 --- a/crates/hotshot/hotshot/src/traits/networking/push_cdn_network.rs +++ b/crates/hotshot/hotshot/src/traits/networking/push_cdn_network.rs @@ -519,6 +519,7 @@ impl ConnectedNetwork for PushCdnNetwork { /// - If we fail to send the broadcast message. async fn broadcast_message( &self, + _: ViewNumber, message: Vec, topic: HotShotTopic, _broadcast_delay: BroadcastDelay, @@ -544,6 +545,7 @@ impl ConnectedNetwork for PushCdnNetwork { /// - If we fail to send the broadcast message. async fn da_broadcast_message( &self, + _: ViewNumber, message: Vec, _recipients: Vec, _broadcast_delay: BroadcastDelay, @@ -566,7 +568,12 @@ impl ConnectedNetwork for PushCdnNetwork { /// /// - If we fail to serialize the message /// - If we fail to send the direct message - async fn direct_message(&self, message: Vec, recipient: K) -> Result<(), NetworkError> { + async fn direct_message( + &self, + _: ViewNumber, + message: Vec, + recipient: K, + ) -> Result<(), NetworkError> { // If the message is to ourselves, just add it to the internal queue if recipient == self.public_key { self.internal_queue.lock().push_back(message); diff --git a/crates/hotshot/hotshot/src/types/handle.rs b/crates/hotshot/hotshot/src/types/handle.rs index b1009dde43e..a341f0a66c0 100644 --- a/crates/hotshot/hotshot/src/types/handle.rs +++ b/crates/hotshot/hotshot/src/types/handle.rs @@ -26,9 +26,10 @@ use hotshot_types::{ traits::{ consensus_api::ConsensusApi, network::{BroadcastDelay, ConnectedNetwork, Topic}, - node_implementation::NodeType, + node_implementation::{ConsensusTime, NodeType}, signature_key::SignatureKey, }, + vote::HasViewNumber, }; use tracing::instrument; @@ -106,22 +107,33 @@ impl + 'static, V: Versions> sender: self.public_key().clone(), kind: MessageKind::External(msg), }; + let view: TYPES::View = message.view_number(); let serialized_message = self.hotshot.upgrade_lock.serialize(&message).await?; match recipients { RecipientList::Broadcast => { self.network - .broadcast_message(serialized_message, Topic::Global, BroadcastDelay::None) + .broadcast_message( + view.u64().into(), + serialized_message, + Topic::Global, + BroadcastDelay::None, + ) .await?; }, RecipientList::Direct(recipient) => { self.network - .direct_message(serialized_message, recipient) + .direct_message(view.u64().into(), serialized_message, recipient) .await?; }, RecipientList::Many(recipients) => { self.network - .da_broadcast_message(serialized_message, recipients, BroadcastDelay::None) + .da_broadcast_message( + view.u64().into(), + serialized_message, + recipients, + BroadcastDelay::None, + ) .await?; }, } diff --git a/crates/hotshot/task-impls/src/network.rs b/crates/hotshot/task-impls/src/network.rs index f610874b1b5..ea9aa41632c 100644 --- a/crates/hotshot/task-impls/src/network.rs +++ b/crates/hotshot/task-impls/src/network.rs @@ -856,6 +856,7 @@ impl< )), } }; + let view = message.view_number(); let serialized_message = match self.upgrade_lock.serialize(&message).await { Ok(serialized) => serialized, Err(e) => { @@ -864,7 +865,7 @@ impl< }, }; - messages.insert(recipient, serialized_message); + messages.insert(recipient, (view.u64().into(), serialized_message)); } let net = Arc::clone(&self.network); @@ -1399,10 +1400,10 @@ impl< let keep_view = TYPES::View::new(view.saturating_sub(1)); self.cancel_tasks(keep_view); let net = Arc::clone(&self.network); - let epoch = self.epoch.map(|x| x.u64()); + let epoch = self.epoch.map(|x| x.u64().into()); let membership_coordinator = self.membership_coordinator.clone(); spawn(async move { - net.update_view::(*keep_view, epoch, membership_coordinator) + net.update_view::(keep_view.u64().into(), epoch, membership_coordinator) .await; }); None @@ -1603,16 +1604,24 @@ impl< let transmit_result = match transmit { TransmitType::Direct(recipient) => { - network.direct_message(serialized_message, recipient).await + network + .direct_message(view_number.u64().into(), serialized_message, recipient) + .await }, TransmitType::Broadcast => { network - .broadcast_message(serialized_message, committee_topic, broadcast_delay) + .broadcast_message( + view_number.u64().into(), + serialized_message, + committee_topic, + broadcast_delay, + ) .await }, TransmitType::DaCommitteeBroadcast => { network .da_broadcast_message( + view_number.u64().into(), serialized_message, da_committee.iter().cloned().collect(), broadcast_delay, diff --git a/crates/hotshot/testing/src/byzantine/byzantine_behaviour.rs b/crates/hotshot/testing/src/byzantine/byzantine_behaviour.rs index f24555060a0..edcec4ceda0 100644 --- a/crates/hotshot/testing/src/byzantine/byzantine_behaviour.rs +++ b/crates/hotshot/testing/src/byzantine/byzantine_behaviour.rs @@ -549,7 +549,11 @@ impl, V: Versions> EventTransforme ); }; let transmit_result = network - .direct_message(serialized_message.clone(), node.clone()) + .direct_message( + view_number.u64().into(), + serialized_message.clone(), + node.clone(), + ) .await; match transmit_result { Ok(()) => tracing::info!( @@ -627,7 +631,11 @@ impl, V: Versions> EventTransforme ); }; let transmit_result = network - .direct_message(serialized_message.clone(), node.clone()) + .direct_message( + view_number.u64().into(), + serialized_message.clone(), + node.clone(), + ) .await; match transmit_result { Ok(()) => tracing::info!( @@ -684,7 +692,11 @@ impl, V: Versions> EventTransforme ); }; let transmit_result = network - .direct_message(serialized_message.clone(), node.clone()) + .direct_message( + view_number.u64().into(), + serialized_message.clone(), + node.clone(), + ) .await; match transmit_result { Ok(()) => tracing::info!( diff --git a/crates/hotshot/testing/tests/test_epoch_end.rs b/crates/hotshot/testing/tests/test_epoch_end.rs index ac326b93efa..22387f5eab8 100644 --- a/crates/hotshot/testing/tests/test_epoch_end.rs +++ b/crates/hotshot/testing/tests/test_epoch_end.rs @@ -5,14 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, TestTwoStakeTablesTypes, TestTypes, + CliquenetImpl, CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, + TestTwoStakeTablesTypes, TestTypes, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_end, - Impls: [CombinedImpl, Libp2pImpl, PushCdnImpl], + Impls: [CombinedImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes, TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/test_epoch_success_catchup_types.rs b/crates/hotshot/testing/tests/test_epoch_success_catchup_types.rs index 84aab6582c6..37ecc23062b 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_catchup_types.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_catchup_types.rs @@ -7,7 +7,8 @@ use hotshot_example_types::{ membership::static_committee::StaticStakeTable, node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestTypesEpochCatchupTypes, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + TestTypesEpochCatchupTypes, }, }; use hotshot_macros::cross_tests; @@ -16,7 +17,7 @@ use hotshot_types::signature_key::{BLSPubKey, SchnorrPubKey}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypesEpochCatchupTypes< StaticStakeTable< diff --git a/crates/hotshot/testing/tests/test_epoch_success_overlap_2_f.rs b/crates/hotshot/testing/tests/test_epoch_success_overlap_2_f.rs index ecb0b00adac..3096e526523 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_overlap_2_f.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_overlap_2_f.rs @@ -5,15 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, StableQuorumFilterConfig, - TestTypesRandomizedCommitteeMembers, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + StableQuorumFilterConfig, TestTypesRandomizedCommitteeMembers, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypesRandomizedCommitteeMembers, StableQuorumFilterConfig<123, 4>>, // Overlap = 2F ], diff --git a/crates/hotshot/testing/tests/test_epoch_success_overlap_2f_plus_1.rs b/crates/hotshot/testing/tests/test_epoch_success_overlap_2f_plus_1.rs index db231a2c130..d4ba5ba51a2 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_overlap_2f_plus_1.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_overlap_2f_plus_1.rs @@ -5,15 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, StableQuorumFilterConfig, - TestTypesRandomizedCommitteeMembers, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + StableQuorumFilterConfig, TestTypesRandomizedCommitteeMembers, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypesRandomizedCommitteeMembers, StableQuorumFilterConfig<123, 5>>, // Overlap = 2F+1 ], diff --git a/crates/hotshot/testing/tests/test_epoch_success_overlap_3f.rs b/crates/hotshot/testing/tests/test_epoch_success_overlap_3f.rs index a08d307762a..d3ebb7a5271 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_overlap_3f.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_overlap_3f.rs @@ -5,15 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, StableQuorumFilterConfig, - TestTypesRandomizedCommitteeMembers, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + StableQuorumFilterConfig, TestTypesRandomizedCommitteeMembers, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypesRandomizedCommitteeMembers, StableQuorumFilterConfig<123, 6>>, // Overlap = 3F ], diff --git a/crates/hotshot/testing/tests/test_epoch_success_overlap_dynamic.rs b/crates/hotshot/testing/tests/test_epoch_success_overlap_dynamic.rs index 29fbaea7847..090ec9e65fa 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_overlap_dynamic.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_overlap_dynamic.rs @@ -5,15 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, RandomOverlapQuorumFilterConfig, - TestTypesRandomizedCommitteeMembers, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + RandomOverlapQuorumFilterConfig, TestTypesRandomizedCommitteeMembers, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypesRandomizedCommitteeMembers, RandomOverlapQuorumFilterConfig<123, 4, 7, 0, 2>>, // Overlap = Dynamic ], diff --git a/crates/hotshot/testing/tests/test_epoch_success_overlap_f.rs b/crates/hotshot/testing/tests/test_epoch_success_overlap_f.rs index 4b50d7137e4..a966a4f35dc 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_overlap_f.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_overlap_f.rs @@ -5,15 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, StableQuorumFilterConfig, - TestTypesRandomizedCommitteeMembers, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + StableQuorumFilterConfig, TestTypesRandomizedCommitteeMembers, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypesRandomizedCommitteeMembers, StableQuorumFilterConfig<123, 2>>, // Overlap = F ], diff --git a/crates/hotshot/testing/tests/test_epoch_success_overlap_f_plus_1.rs b/crates/hotshot/testing/tests/test_epoch_success_overlap_f_plus_1.rs index 62589499e72..c527a405de2 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_overlap_f_plus_1.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_overlap_f_plus_1.rs @@ -5,15 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, StableQuorumFilterConfig, - TestTypesRandomizedCommitteeMembers, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + StableQuorumFilterConfig, TestTypesRandomizedCommitteeMembers, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypesRandomizedCommitteeMembers, StableQuorumFilterConfig<123, 3>>, // Overlap = F+1 ], diff --git a/crates/hotshot/testing/tests/test_epoch_success_types.rs b/crates/hotshot/testing/tests/test_epoch_success_types.rs index 362895bb708..2dc702084ad 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_types.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_types.rs @@ -5,14 +5,14 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestTypes, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestTypes, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypes, ], diff --git a/crates/hotshot/testing/tests/test_epoch_success_types_randomized_leader.rs b/crates/hotshot/testing/tests/test_epoch_success_types_randomized_leader.rs index 0a010c94213..1e12f3176de 100644 --- a/crates/hotshot/testing/tests/test_epoch_success_types_randomized_leader.rs +++ b/crates/hotshot/testing/tests/test_epoch_success_types_randomized_leader.rs @@ -5,14 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestTypesRandomizedLeader, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + TestTypesRandomizedLeader, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_epoch_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypesRandomizedLeader, ], diff --git a/crates/hotshot/testing/tests/test_epoch_unequal_stake.rs b/crates/hotshot/testing/tests/test_epoch_unequal_stake.rs index 69ff49fce22..fae6c88ccbb 100644 --- a/crates/hotshot/testing/tests/test_epoch_unequal_stake.rs +++ b/crates/hotshot/testing/tests/test_epoch_unequal_stake.rs @@ -8,7 +8,7 @@ use std::time::Duration; use alloy::primitives::U256; use hotshot_example_types::node_types::{ - EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestTypes, + CliquenetImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestTypes, }; use hotshot_macros::cross_tests; use hotshot_testing::{ @@ -23,7 +23,7 @@ use hotshot_testing::{ // and can tailor our view failure set against that. cross_tests!( TestName: test_unequal_stake_success_with_failing_majority_count, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [EpochsTestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/test_epochs_failures.rs b/crates/hotshot/testing/tests/test_epochs_failures.rs index c200eb0f09a..c4ecaca1643 100644 --- a/crates/hotshot/testing/tests/test_epochs_failures.rs +++ b/crates/hotshot/testing/tests/test_epochs_failures.rs @@ -7,7 +7,7 @@ use std::time::Duration; use hotshot_example_types::node_types::{ - CombinedImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + CliquenetImpl, CombinedImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestConsecutiveLeaderTypes, TestTwoStakeTablesTypes, TestTypes, }; use hotshot_macros::cross_tests; @@ -20,7 +20,7 @@ use hotshot_testing::{ cross_tests!( TestName: test_with_failures_2_with_epochs, - Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl, CliquenetImpl], Types: [TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, @@ -53,7 +53,7 @@ cross_tests!( cross_tests!( TestName: test_with_double_leader_failures_with_epochs, - Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl, CliquenetImpl], Types: [TestConsecutiveLeaderTypes], Versions: [EpochsTestVersions], Ignore: false, @@ -94,7 +94,7 @@ cross_tests!( cross_tests!( TestName: test_with_failures_half_f_epochs_1, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [EpochsTestVersions], Ignore: false, @@ -127,8 +127,8 @@ cross_tests!( ); cross_tests!( - TestName: test_with_failures_half_f_epochs_2, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + TestName: test_with_failures_half_f_epochs_2, + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, @@ -167,7 +167,7 @@ cross_tests!( cross_tests!( TestName: test_with_failures_f_epochs_1, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [EpochsTestVersions], Ignore: false, @@ -214,7 +214,7 @@ cross_tests!( cross_tests!( TestName: test_with_failures_f_epochs_2, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/test_shorter_decide.rs b/crates/hotshot/testing/tests/test_shorter_decide.rs index c9019000dd7..cb56d294955 100644 --- a/crates/hotshot/testing/tests/test_shorter_decide.rs +++ b/crates/hotshot/testing/tests/test_shorter_decide.rs @@ -5,7 +5,8 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, TestTwoStakeTablesTypes, TestTypes, + CliquenetImpl, CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, + TestTwoStakeTablesTypes, TestTypes, }; use hotshot_macros::cross_tests; use hotshot_testing::{ @@ -17,7 +18,7 @@ use hotshot_testing::{ // This test fails with the old decide rule cross_tests!( TestName: test_shorter_decide, - Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl, CliquenetImpl], Types: [TestTypes, TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/test_success_with_async_delay_2_with_epochs.rs b/crates/hotshot/testing/tests/test_success_with_async_delay_2_with_epochs.rs index 3a6162b526c..f61d48dfe6e 100644 --- a/crates/hotshot/testing/tests/test_success_with_async_delay_2_with_epochs.rs +++ b/crates/hotshot/testing/tests/test_success_with_async_delay_2_with_epochs.rs @@ -8,8 +8,8 @@ use std::{collections::HashMap, time::Duration}; use hotshot_example_types::{ node_types::{ - CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, TestTwoStakeTablesTypes, - TestTypes, + CliquenetImpl, CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, + TestTwoStakeTablesTypes, TestTypes, }, testable_delay::{DelayConfig, DelayOptions, DelaySettings, SupportedTraitTypesForAsyncDelay}, }; @@ -22,7 +22,7 @@ use hotshot_testing::{ cross_tests!( TestName: test_success_with_async_delay_2_with_epochs, - Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl, CliquenetImpl], Types: [TestTypes, TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/test_success_with_async_delay_with_epochs.rs b/crates/hotshot/testing/tests/test_success_with_async_delay_with_epochs.rs index 4aaa41a4d30..850e7fd3865 100644 --- a/crates/hotshot/testing/tests/test_success_with_async_delay_with_epochs.rs +++ b/crates/hotshot/testing/tests/test_success_with_async_delay_with_epochs.rs @@ -8,8 +8,8 @@ use std::collections::HashMap; use hotshot_example_types::{ node_types::{ - CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, TestTwoStakeTablesTypes, - TestTypes, + CliquenetImpl, CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, + TestTwoStakeTablesTypes, TestTypes, }, testable_delay::{DelayConfig, DelayOptions, DelaySettings, SupportedTraitTypesForAsyncDelay}, }; @@ -18,7 +18,7 @@ use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder:: cross_tests!( TestName: test_success_with_async_delay_with_epochs, - Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl, CliquenetImpl], Types: [TestTypes, TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/test_success_with_epochs.rs b/crates/hotshot/testing/tests/test_success_with_epochs.rs index 63e09c2f61d..45b37ce504f 100644 --- a/crates/hotshot/testing/tests/test_success_with_epochs.rs +++ b/crates/hotshot/testing/tests/test_success_with_epochs.rs @@ -5,15 +5,15 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, TestTwoStakeTablesTypes, TestTypes, - TestTypesRandomizedLeader, + CliquenetImpl, CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, + TestTwoStakeTablesTypes, TestTypes, TestTypesRandomizedLeader, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_success_with_epochs, - Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl, CliquenetImpl], Types: [TestTypes, TestTypesRandomizedLeader, TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/test_vid2_success.rs b/crates/hotshot/testing/tests/test_vid2_success.rs index 949c3d64d12..f025def9455 100644 --- a/crates/hotshot/testing/tests/test_vid2_success.rs +++ b/crates/hotshot/testing/tests/test_vid2_success.rs @@ -5,14 +5,14 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - Libp2pImpl, MemoryImpl, PushCdnImpl, TestTypes, Vid2TestVersions, + CliquenetImpl, Libp2pImpl, MemoryImpl, PushCdnImpl, TestTypes, Vid2TestVersions, }; use hotshot_macros::cross_tests; use hotshot_testing::{block_builder::SimpleBuilderImplementation, test_builder::TestDescription}; cross_tests!( TestName: test_vid2_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [ TestTypes, ], diff --git a/crates/hotshot/testing/tests/test_with_double_leader_no_failures_with_epochs.rs b/crates/hotshot/testing/tests/test_with_double_leader_no_failures_with_epochs.rs index 04015e14ec6..66126173f0d 100644 --- a/crates/hotshot/testing/tests/test_with_double_leader_no_failures_with_epochs.rs +++ b/crates/hotshot/testing/tests/test_with_double_leader_no_failures_with_epochs.rs @@ -5,8 +5,8 @@ // along with the HotShot repository. If not, see . use hotshot_example_types::node_types::{ - CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, TestConsecutiveLeaderTypes, - TestTwoStakeTablesTypes, + CliquenetImpl, CombinedImpl, EpochsTestVersions, Libp2pImpl, PushCdnImpl, + TestConsecutiveLeaderTypes, TestTwoStakeTablesTypes, }; use hotshot_macros::cross_tests; use hotshot_testing::{ @@ -15,7 +15,7 @@ use hotshot_testing::{ }; cross_tests!( TestName: test_with_double_leader_no_failures_with_epochs, - Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl], + Impls: [Libp2pImpl, PushCdnImpl, CombinedImpl, CliquenetImpl], Types: [TestConsecutiveLeaderTypes, TestTwoStakeTablesTypes], Versions: [EpochsTestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/tests_1/test_success.rs b/crates/hotshot/testing/tests/tests_1/test_success.rs index 0f21fdf33d4..138cddf2862 100644 --- a/crates/hotshot/testing/tests/tests_1/test_success.rs +++ b/crates/hotshot/testing/tests/tests_1/test_success.rs @@ -8,8 +8,7 @@ use std::collections::HashMap; use hotshot_example_types::{ node_types::{ - Libp2pImpl, MemoryImpl, PushCdnImpl, TestConsecutiveLeaderTypes, TestTypes, - TestTypesRandomizedLeader, TestVersions, + CliquenetImpl, Libp2pImpl, MemoryImpl, PushCdnImpl, TestConsecutiveLeaderTypes, TestTypes, TestTypesRandomizedLeader, TestVersions }, testable_delay::{DelayConfig, DelayOptions, DelaySettings, SupportedTraitTypesForAsyncDelay}, }; @@ -22,7 +21,7 @@ use hotshot_testing::{ cross_tests!( TestName: test_success, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes, TestTypesRandomizedLeader], Versions: [TestVersions], Ignore: false, @@ -37,7 +36,7 @@ cross_tests!( cross_tests!( TestName: test_success_with_async_delay, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [TestVersions], Ignore: false, @@ -65,7 +64,7 @@ cross_tests!( cross_tests!( TestName: test_success_with_async_delay_2, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [TestVersions], Ignore: false, @@ -101,7 +100,7 @@ cross_tests!( cross_tests!( TestName: test_with_double_leader_no_failures, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestConsecutiveLeaderTypes], Versions: [TestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/tests_1/test_with_failures_2.rs b/crates/hotshot/testing/tests/tests_1/test_with_failures_2.rs index 4354e1cb7fc..6c952360bb9 100644 --- a/crates/hotshot/testing/tests/tests_1/test_with_failures_2.rs +++ b/crates/hotshot/testing/tests/tests_1/test_with_failures_2.rs @@ -10,7 +10,7 @@ use std::{collections::HashMap, time::Duration}; use hotshot_example_types::{ node_types::{ - CombinedImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, + CombinedImpl, EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, CliquenetImpl, TestConsecutiveLeaderTypes, TestTwoStakeTablesTypes, TestVersions, }, state_types::TestTypes, @@ -36,7 +36,7 @@ use hotshot_types::{ // Test that a good leader can succeed in the view directly after view sync cross_tests!( TestName: test_with_failures_2, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [TestVersions], Ignore: false, @@ -73,7 +73,7 @@ cross_tests!( cross_tests!( TestName: test_with_double_leader_failures, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestConsecutiveLeaderTypes], Versions: [TestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/tests_3/byzantine_tests.rs b/crates/hotshot/testing/tests/tests_3/byzantine_tests.rs index 8487e6bf723..9cad832f0c4 100644 --- a/crates/hotshot/testing/tests/tests_3/byzantine_tests.rs +++ b/crates/hotshot/testing/tests/tests_3/byzantine_tests.rs @@ -2,7 +2,7 @@ use std::{collections::HashSet, rc::Rc, sync::Arc, time::Duration}; use async_lock::RwLock; use hotshot_example_types::{ - node_types::{EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions}, + node_types::{EpochsTestVersions, Libp2pImpl, MemoryImpl, PushCdnImpl, CliquenetImpl, TestVersions}, state_types::TestTypes, }; use hotshot_macros::cross_tests; @@ -108,7 +108,7 @@ cross_tests!( cross_tests!( TestName: dishonest_da, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [TestVersions], Ignore: false, @@ -137,7 +137,7 @@ cross_tests!( cross_tests!( TestName: dishonest_voting, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [TestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/tests_3/memory_network.rs b/crates/hotshot/testing/tests/tests_3/memory_network.rs index a7516001fec..ad68de259f6 100644 --- a/crates/hotshot/testing/tests/tests_3/memory_network.rs +++ b/crates/hotshot/testing/tests/tests_3/memory_network.rs @@ -22,7 +22,7 @@ use hotshot_types::{ traits::{ network::{BroadcastDelay, ConnectedNetwork, TestableNetworkingImplementation, Topic}, node_implementation::{ConsensusTime, NodeType}, - }, + }, vote::HasViewNumber, }; use rand::{rngs::StdRng, RngCore, SeedableRng}; use tokio::time::timeout; @@ -118,9 +118,10 @@ async fn memory_network_direct_queue() { // Test 1 -> 2 // Send messages for sent_message in first_messages { + let view = sent_message.view_number(); let serialized_message = upgrade_lock.serialize(&sent_message).await.unwrap(); network1 - .direct_message(serialized_message.clone(), pub_key_2) + .direct_message(view, serialized_message.clone(), pub_key_2) .await .expect("Failed to message node"); let recv_message = network2 @@ -140,9 +141,10 @@ async fn memory_network_direct_queue() { // Test 2 -> 1 // Send messages for sent_message in second_messages { + let view = sent_message.view_number(); let serialized_message = upgrade_lock.serialize(&sent_message).await.unwrap(); network2 - .direct_message(serialized_message.clone(), pub_key_1) + .direct_message(view, serialized_message.clone(), pub_key_1) .await .expect("Failed to message node"); let recv_message = network1 @@ -177,9 +179,10 @@ async fn memory_network_broadcast_queue() { // Test 1 -> 2 // Send messages for sent_message in first_messages { + let view = sent_message.view_number(); let serialized_message = upgrade_lock.serialize(&sent_message).await.unwrap(); network1 - .broadcast_message(serialized_message.clone(), Topic::Da, BroadcastDelay::None) + .broadcast_message(view, serialized_message.clone(), Topic::Da, BroadcastDelay::None) .await .expect("Failed to message node"); let recv_message = network2 @@ -199,9 +202,11 @@ async fn memory_network_broadcast_queue() { // Test 2 -> 1 // Send messages for sent_message in second_messages { + let view = sent_message.view_number(); let serialized_message = upgrade_lock.serialize(&sent_message).await.unwrap(); network2 .broadcast_message( + view, serialized_message.clone(), Topic::Global, BroadcastDelay::None, @@ -247,10 +252,11 @@ async fn memory_network_test_in_flight_message_count() { let upgrade_lock = UpgradeLock::::new(); for (count, message) in messages.iter().enumerate() { + let view = message.view_number(); let serialized_message = upgrade_lock.serialize(message).await.unwrap(); network1 - .direct_message(serialized_message.clone(), pub_key_2) + .direct_message(view, serialized_message.clone(), pub_key_2) .await .unwrap(); // network 2 has received `count` broadcast messages and `count + 1` direct messages @@ -261,6 +267,7 @@ async fn memory_network_test_in_flight_message_count() { network2 .broadcast_message( + view, serialized_message.clone(), Topic::Global, BroadcastDelay::None, diff --git a/crates/hotshot/testing/tests/tests_3/test_with_failures_half_f.rs b/crates/hotshot/testing/tests/tests_3/test_with_failures_half_f.rs index 95ba4bc69ae..4063b7611a4 100644 --- a/crates/hotshot/testing/tests/tests_3/test_with_failures_half_f.rs +++ b/crates/hotshot/testing/tests/tests_3/test_with_failures_half_f.rs @@ -7,7 +7,7 @@ use std::time::Duration; use hotshot_example_types::{ - node_types::{Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions}, + node_types::{Libp2pImpl, MemoryImpl, PushCdnImpl, CliquenetImpl, TestVersions}, state_types::TestTypes, }; use hotshot_macros::cross_tests; @@ -19,7 +19,7 @@ use hotshot_testing::{ // Test f/2 nodes leaving the network. cross_tests!( TestName: test_with_failures_half_f, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [TestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/tests_4/test_with_failures_f.rs b/crates/hotshot/testing/tests/tests_4/test_with_failures_f.rs index dbfa66df00e..10feba34182 100644 --- a/crates/hotshot/testing/tests/tests_4/test_with_failures_f.rs +++ b/crates/hotshot/testing/tests/tests_4/test_with_failures_f.rs @@ -7,7 +7,7 @@ use std::time::Duration; use hotshot_example_types::{ - node_types::{Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions}, + node_types::{Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions, CliquenetImpl}, state_types::TestTypes, }; use hotshot_macros::cross_tests; @@ -19,7 +19,7 @@ use hotshot_testing::{ // Test f nodes leaving the network. cross_tests!( TestName: test_with_failures_f, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [TestVersions], Ignore: false, diff --git a/crates/hotshot/testing/tests/tests_5/test_with_failures.rs b/crates/hotshot/testing/tests/tests_5/test_with_failures.rs index 1fb3f1f6369..1af82103576 100644 --- a/crates/hotshot/testing/tests/tests_5/test_with_failures.rs +++ b/crates/hotshot/testing/tests/tests_5/test_with_failures.rs @@ -7,7 +7,7 @@ use std::time::Duration; use hotshot_example_types::{ - node_types::{Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions}, + node_types::{Libp2pImpl, MemoryImpl, PushCdnImpl, TestVersions, CliquenetImpl}, state_types::TestTypes, }; use hotshot_macros::cross_tests; @@ -20,7 +20,7 @@ use hotshot_testing::{ // Test one node leaving the network. cross_tests!( TestName: test_with_failures_one, - Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl], + Impls: [MemoryImpl, Libp2pImpl, PushCdnImpl, CliquenetImpl], Types: [TestTypes], Versions: [TestVersions], Ignore: false, diff --git a/crates/hotshot/types/src/data.rs b/crates/hotshot/types/src/data.rs index a46fff247b2..578b5ce0974 100644 --- a/crates/hotshot/types/src/data.rs +++ b/crates/hotshot/types/src/data.rs @@ -79,6 +79,18 @@ macro_rules! impl_u64_wrapper { } } + impl From for $t { + fn from(n: u64) -> Self { + Self(n) + } + } + + impl From<$t> for u64 { + fn from(n: $t) -> Self { + n.0 + } + } + impl Display for $t { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.0) @@ -927,7 +939,7 @@ pub struct QuorumProposal2 { /// Legacy version of `QuorumProposal2` corresponding to consensus protocol version V3. /// /// `QuorumProposal2` state_cert field was updated to use new -/// `LightClientStateUpdateCertificateV2`. +/// `LightClientStateUpdateCertificateV2`. /// This legacy version uses the older `LightClientStateUpdateCertificateV1` /// format for backward compatibility. /// @@ -1004,7 +1016,7 @@ impl From> for QuorumProposal2Legacy: Clone + Send + Sync + 'st /// blocking async fn broadcast_message( &self, + view: ViewNumber, message: Vec, topic: Topic, broadcast_delay: BroadcastDelay, @@ -216,6 +219,7 @@ pub trait ConnectedNetwork: Clone + Send + Sync + 'st /// blocking async fn da_broadcast_message( &self, + view: ViewNumber, message: Vec, recipients: Vec, broadcast_delay: BroadcastDelay, @@ -225,11 +229,11 @@ pub trait ConnectedNetwork: Clone + Send + Sync + 'st /// blocking async fn vid_broadcast_message( &self, - messages: HashMap>, + messages: HashMap)>, ) -> Result<(), NetworkError> { let future_results = messages .into_iter() - .map(|(recipient_key, message)| self.direct_message(message, recipient_key)); + .map(|(recipient_key, (v, m))| self.direct_message(v, m, recipient_key)); let results = join_all(future_results).await; let errors: Vec<_> = results.into_iter().filter_map(|r| r.err()).collect(); @@ -243,7 +247,12 @@ pub trait ConnectedNetwork: Clone + Send + Sync + 'st /// Sends a direct message to a specific node /// blocking - async fn direct_message(&self, message: Vec, recipient: K) -> Result<(), NetworkError>; + async fn direct_message( + &self, + view: ViewNumber, + message: Vec, + recipient: K, + ) -> Result<(), NetworkError>; /// Receive one or many messages from the underlying network. /// @@ -257,21 +266,21 @@ pub trait ConnectedNetwork: Clone + Send + Sync + 'st /// Does not error. fn queue_node_lookup( &self, - _view_number: ViewNumber, - _pk: K, + _: ViewNumber, + _: K, ) -> Result<(), TrySendError>> { Ok(()) } /// Update view can be used for any reason, but mostly it's for canceling tasks, /// and looking up the address of the leader of a future view. - async fn update_view<'a, TYPES>( - &'a self, - _view: u64, - _epoch: Option, - _membership_coordinator: EpochMembershipCoordinator, + async fn update_view( + &self, + _: ViewNumber, + _: Option, + _: EpochMembershipCoordinator, ) where - TYPES: NodeType + 'a, + TYPES: NodeType, { } diff --git a/node-metrics/src/api/node_validator/v0/mod.rs b/node-metrics/src/api/node_validator/v0/mod.rs index 1d053990474..613ed4050cd 100644 --- a/node-metrics/src/api/node_validator/v0/mod.rs +++ b/node-metrics/src/api/node_validator/v0/mod.rs @@ -616,7 +616,6 @@ where // Next, do we already have a connection? if let Some(connection_mut) = &mut self_mut.connection { - // Alright, then we'll want to retrieve the next entry pin_mut!(connection_mut); return match connection_mut.poll_next(cx) { diff --git a/request-response/src/network.rs b/request-response/src/network.rs index 5bfccecf872..f6a1bea7056 100644 --- a/request-response/src/network.rs +++ b/request-response/src/network.rs @@ -7,14 +7,11 @@ //! what to do with them (as opposed to having some sort of filtering mechanism). So for //! [`Receiver`] I've done a blanket implementation for channels that send [`Vec`]s. -use std::{ops::Deref, sync::Arc}; +use std::sync::Arc; -use anyhow::{Context, Result}; +use anyhow::Result; use async_trait::async_trait; -use hotshot_types::traits::{ - network::{BroadcastDelay, ConnectedNetwork, Topic}, - signature_key::SignatureKey, -}; +use hotshot_types::traits::signature_key::SignatureKey; use tokio::sync::mpsc; /// A type alias for a shareable byte array @@ -38,28 +35,6 @@ pub trait Receiver: Send + Sync + 'static { async fn receive_message(&mut self) -> Result; } -/// A blanket implementation of the [`Sender`] trait for all types that dereference to [`ConnectedNetwork`] -#[async_trait] -impl Sender for T -where - T: Deref> + Send + Sync + 'static + Clone, - K: SignatureKey + 'static, -{ - async fn send_direct_message(&self, message: &Bytes, recipient: K) -> Result<()> { - // Send the message to the specified recipient - self.direct_message(message.to_vec(), recipient) - .await - .with_context(|| "failed to send message") - } - - async fn send_broadcast_message(&self, message: &Bytes) -> Result<()> { - // Send the message to all recipients - self.broadcast_message(message.to_vec(), Topic::Global, BroadcastDelay::None) - .await - .with_context(|| "failed to send message") - } -} - /// An implementation of the [`Receiver`] trait for the [`mpsc::Receiver`] type. Allows us to send messages /// to a channel and have the protocol receive them. #[async_trait] diff --git a/sequencer/src/external_event_handler.rs b/sequencer/src/external_event_handler.rs index 6deb3ac670f..4c15c5e71c0 100644 --- a/sequencer/src/external_event_handler.rs +++ b/sequencer/src/external_event_handler.rs @@ -8,7 +8,7 @@ use hotshot::types::Message; use hotshot_types::{ message::MessageKind, traits::{ - network::{BroadcastDelay, ConnectedNetwork, Topic}, + network::{BroadcastDelay, ConnectedNetwork, Topic, ViewMessage}, node_implementation::Versions, }, }; @@ -95,6 +95,7 @@ impl ExternalEventHandler { // Match the message type match message { OutboundMessage::Direct(message, recipient) => { + let view = message.view_number(); // Wrap it in the real message type let message_inner = Message { sender: public_key, @@ -112,12 +113,13 @@ impl ExternalEventHandler { }; // Send the message to the recipient - if let Err(err) = network.direct_message(message_bytes, recipient).await { + if let Err(err) = network.direct_message(view, message_bytes, recipient).await { tracing::warn!("Failed to send message: {:?}", err); }; }, OutboundMessage::Broadcast(message) => { + let view = message.view_number(); // Wrap it in the real message type let message_inner = Message { sender: public_key, @@ -136,7 +138,7 @@ impl ExternalEventHandler { // Broadcast the message to the global topic if let Err(err) = network - .broadcast_message(message_bytes, Topic::Global, BroadcastDelay::None) + .broadcast_message(view, message_bytes, Topic::Global, BroadcastDelay::None) .await { tracing::error!("Failed to broadcast message: {:?}", err);